From 981b20e431643fc9a2c602055ee8029a25f48a0d Mon Sep 17 00:00:00 2001 From: Alexander Miroshnichenko Date: Mon, 3 Feb 2025 07:25:35 +0300 Subject: [PATCH] sys-kernel/hardened-kernel: update v6.12.10 bcachefs cherry-pick updates from bcachefs-for-upstream 5d9ccda Signed-off-by: Alexander Miroshnichenko --- sys-kernel/hardened-kernel/Manifest | 9 - .../files/linux-6.11.amd64.config | 7068 ----- ...rry-pick-updates-from-master-1410769.patch | 24672 ---------------- .../hardened-kernel-6.11.8.ebuild | 138 - 4 files changed, 31887 deletions(-) delete mode 100644 sys-kernel/hardened-kernel/files/linux-6.11.amd64.config delete mode 100644 sys-kernel/hardened-kernel/files/linux-6.12/1191-bcachefs-cherry-pick-updates-from-master-1410769.patch delete mode 100644 sys-kernel/hardened-kernel/hardened-kernel-6.11.8.ebuild diff --git a/sys-kernel/hardened-kernel/Manifest b/sys-kernel/hardened-kernel/Manifest index 7a4c5c9..9b9fd17 100644 --- a/sys-kernel/hardened-kernel/Manifest +++ b/sys-kernel/hardened-kernel/Manifest @@ -1,19 +1,10 @@ -DIST genpatches-6.11-10.base.tar.xz 757872 BLAKE2B 72566af9a781288f516dcd30881851fe371a0f3d072aeabbd9d3e57ea96896cb9d8f0d594f8729215baa83d9546c675789b596dac5781b3640e963059d23223b SHA512 ae04d309e3b97cfd7f09993cf297fa5825c53e83acc54805f1f6f2d09cd07aa1715866be3d59874d0131d1746a398b9449fda1987ea6bdbd66402e411569d874 -DIST genpatches-6.11-10.experimental.tar.xz 77928 BLAKE2B a0928f0ff7eb6b9a5659d0ab41dafcf3b474cd7aa357b65a7a147972132c08703a88467e51b7dbd8004781cb0cb8a9620190737963f1fcc1e9e5d98f68ba72d6 SHA512 2be91396f9ec97b2e051db72742e3db1edaa56255c7a2cde2ce2ecc1de4771e92ba6d55e863380fe4dc6ef8d8778bec1a9926a9ffe2dd5d1036b9c36a9afae13 -DIST genpatches-6.11-10.extras.tar.xz 4060 BLAKE2B a94b8799f6c1d338a17e25b1dde6aa616754bfde092eb5ad1da11a6ec8b1107dce827d05ecc756a4918339329190e6572bb089de89d9a11c8c08f067eb7b269d SHA512 1a166a0054827ac9bef700d075cc2a1e3934dbe7b7aa64b34109b521f5bb21e231d59be4643f6faf702e5d0b3cb7d82e8cc1ba1f77e3bf88c38f9b6ffc61e35f DIST genpatches-6.12-13.base.tar.xz 806528 BLAKE2B 0eec1195d5a497e64556d992893f251a54c558151aef1fc1d03de0b95bdbe03588ef486875e7676f9635da62e975dd8509279ed2d96b4dcb3796e1d8a4bb3768 SHA512 3cf8549679aae42fe56a25c0f9d5eb5c59fb867c77dc22c21446064e55d91276ec75f60f5737ab9a39c82dd0e74ab1ee3bd62b5a78f7448827b5577257976aed DIST genpatches-6.12-13.experimental.tar.xz 78444 BLAKE2B 721334f36ae20a598295ce87c31acb12eadee9080bed53ba3a4fbd03180abfa4771c39d0b2d79ccdd28b04766ddd6eb80b1c8f3dc9e5fd2b8c17fe921125d1ba SHA512 834f04aed25aa6f9e660e64b02e488f2c99cf8cb4e20a11c813c8e21b7d91a650e0c795c1369147206d707c75f2a1d25144b08ca9b1b02162443d4ea1bd36d26 DIST genpatches-6.12-13.extras.tar.xz 4056 BLAKE2B af48fba5d81bf8e13658555d94fa131e3f8fa06144148dd11b58823f802c5c4aea823fc4cd8f308ba6518638b65637d0b8a030d513d4b2a05384fc8f40550d94 SHA512 d7b1290d44696284fde4980ad3b90a190659056739a084ae704d1edf1a06ee166fa78960a42999b9e6e2d8aad4fd330621c8865f5e2517f1453c916d6375a365 DIST gentoo-kernel-config-g14.tar.gz 5686 BLAKE2B e5147ff4ffab92428b3e7f1b0897b997f8a298805f4f43b0a4d3047607bbb1a5ebfc268e0bb9688372a5eda561df9f256c78e00cdd7e3caf7868724214722f56 SHA512 f79638f9ff9dd0154512baf0234024216b6708d35a3c03a580ca5913286ad1ea13bdde5ea9b4722c6a7cd8d591c11ec52c1e225111a260343cd56aa1f1a88502 -DIST kernel-aarch64-fedora.config.6.11.5-gentoo 285046 BLAKE2B e8ae27d70fa023976e950d4edcb38963e2fff39efa5cd1ff5922278e871efe6e6cda11c609e721eb2a3f7b030ea75447be384065d3b177000c301fc287a34d7f SHA512 121bbeebace3b760ff6ef36cf9970def3073966ea2fc1089c19c08d27a0524502dedc8c988c5239e78ce04caea6feb5ba7b5d53e0319b22ba63ce6cbc2a07e75 DIST kernel-aarch64-fedora.config.6.12.1-gentoo 287989 BLAKE2B fbf6183487ffc6d30543c6b9caedbca224cc9ce4ec917e35ab351030212b721af8cc33aafa1feb229a1d6b45c9f45329f8e4957bdb3d43bee7ac223eeb90a994 SHA512 fad6121dfe4a3c82039cfe77614e90b4a954fe12d156f29ef9a596745327a3d30c7a40fc4002405a692685c7deaf9a7d3d6f944d505bc51ed5c387f9c9fd6311 -DIST kernel-i686-fedora.config.6.11.5-gentoo 252533 BLAKE2B 2578babdc1916dded2da15e51c497219c551b83bf7ca32c1be8fd79dc7d7c57afc797c6cb736c0b8ba0e71a6a8db73be2db48c594355737a6ff24fb9bbe9c632 SHA512 6ae96a64e6a3b0ee2fd1bd7e9a0cdbfa1ea5a22a1066811d87dbeb6335d625b7d336366d73ff9936714103a9c1ccb4d01863092d4d252791366a34758916851f DIST kernel-i686-fedora.config.6.12.1-gentoo 255123 BLAKE2B 186e22a426f2485ba46429852d94e139b91cc3508eb405028b3fd779a224a2c8eaf3e6a15bbee7ebc54a295bf8c3d434d972cd33b42b87951cf826435a3c3d65 SHA512 90f505be0faab191dc06dba834e4d6726f88d9868b6610ab5b89c496a4b4c37e9ce414e6e1447136edb028420a243b1ce620de5e97aa2b4b784e20a6eeacd015 -DIST kernel-ppc64le-fedora.config.6.11.5-gentoo 239546 BLAKE2B 1ea0246c6416a81ebe30dd534ba60ebbef9f8aca4bb36b4f961d9f781a88c8c5ea67fe7cabd959332b02096ce8152567045dd85be79a25cb0b80aa5adae5008f SHA512 9d0eceea756171a6971faa15a04d9cd0c17e5c92db39c2546ab238bda6ef4a88532fe6079f20d3b225c6eb5d26c63e5134e78c617d02b30e1e90e2c7ae18ecc9 DIST kernel-ppc64le-fedora.config.6.12.1-gentoo 241811 BLAKE2B 59fcfd508a208e7a6a393d1b35811e553098669e9db7302fe929d998fb705330f104b671ba34d6e5992f9c4ddb2e7f30509a261f324400cb3cd5216405ef4614 SHA512 ab0bf86e20bf3348b8703fbf7aeb29105a02ca768f9a4cf47fc59c77e452b4659367bd8c10ceb67295d869158905e2c76c2a55745e01db3fab420ce257c11cb6 -DIST kernel-x86_64-fedora.config.6.11.5-gentoo 253618 BLAKE2B 1c366b738886b8ab0614c38a8f4e71ab4edc6e33083ff129a9235ee68faf9280c124e8c28280dd9f041532a69dc64242fa3d49f1b11a155945227fe3363d0793 SHA512 e25c967e0f18f8de3a4a3e7fa4af06c08f507ca1c98c5dcb7311b776f77877231fef23c989c7e4321c51d4b2d68783244bb9162f27a4db08daa8e6c799e79b52 DIST kernel-x86_64-fedora.config.6.12.1-gentoo 256170 BLAKE2B 39e03735453c66f4262005517fe00e66da0b8c813f7d4a0247083a69eac998c81d75ccdd6c56a389f287f2b9dd5d20e3703b9cfb4d3f89b4672897d026a6a00b SHA512 57f584d82145c5d35bd71f7b3fa5f45adb12cc46915d6f88c7c6f291b6dfde73a4a7f4b796cecf46b145fde5090efc26fdc4fe3945b37d5878efc94ce39b4b7a -DIST linux-6.11.tar.xz 146900704 BLAKE2B e7750c0878d71a56a0ce52d4c4c912199dad5bf5e2e8f872585a6494afbb37cbd852e612a6858936d2dc9b7776a3933818f540db408d57e90d18ea5249bba7ab SHA512 329c1f94008742e3f0c2ce7e591a16316d1b2cb9ea4596d4f45604097e07b7aa2f64afa40630a07f321a858455c77aa32ba57b271932ddcf4dc27863f9081cea DIST linux-6.12.tar.xz 147906904 BLAKE2B b2ec2fc69218cacabbbe49f78384a5d259ca581b717617c12b000b16f4a4c59ee348ea886b37147f5f70fb9a7a01c1e2c8f19021078f6b23f5bc62d1c48d5e5e SHA512 a37b1823df7b4f72542f689b65882634740ba0401a42fdcf6601d9efd2e132e5a7650e70450ba76f6cd1f13ca31180f2ccee9d54fe4df89bc0000ade4380a548 -DIST linux-hardened-v6.11.8-hardened1.patch 95386 BLAKE2B c8afa1a25191e73d0a1208ce3bc7dea7d856d2697adcd3f5a9d1ec9695f393aa42099353699c1f58dd056c6fb4215860661a6a17358c887877612ac58a4cf3f6 SHA512 d5baa895f069af8e8f3e6d605e86e10137de6a3d956d8dc092e6c3ed4c52ae6faa9dc10dce2bee6696a75e0d7e595f912e06f64a36965ef282918145567597b3 DIST linux-hardened-v6.12.10-hardened1.patch 89621 BLAKE2B f33abaf900d6401b58bdd712f0ab3069aa9156d2b68666248e53dc7c93a9817d6ee220cb70b47f3b225cfb39d779094c1021f20a93c060933bff94ba0f51a3d1 SHA512 66c70fd5d98a5a603b5661f6a0915fc34544180cededfd02c8f5b374da5af2b1d5a5b2e6dd52aa8aaa8b59f07ae0a4f2adafc8c970a6c08e7cc56289ef0e96bf diff --git a/sys-kernel/hardened-kernel/files/linux-6.11.amd64.config b/sys-kernel/hardened-kernel/files/linux-6.11.amd64.config deleted file mode 100644 index 1138d10..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.11.amd64.config +++ /dev/null @@ -1,7068 +0,0 @@ -# -# Automatically generated file; DO NOT EDIT. -# Linux/x86 6.11.8 Kernel Configuration -# -CONFIG_CC_VERSION_TEXT="gcc (Gentoo Hardened 13.2.1_p20230826 p7) 13.2.1 20230826" -CONFIG_CC_IS_GCC=y -CONFIG_GCC_VERSION=130201 -CONFIG_CLANG_VERSION=0 -CONFIG_AS_IS_GNU=y -CONFIG_AS_VERSION=24000 -CONFIG_LD_IS_BFD=y -CONFIG_LD_VERSION=24000 -CONFIG_LLD_VERSION=0 -CONFIG_CC_CAN_LINK=y -CONFIG_CC_CAN_LINK_STATIC=y -CONFIG_GCC_ASM_GOTO_OUTPUT_BROKEN=y -CONFIG_TOOLS_SUPPORT_RELR=y -CONFIG_CC_HAS_ASM_INLINE=y -CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y -CONFIG_PAHOLE_VERSION=127 -CONFIG_HAVE_CTF_TOOLCHAIN=y -CONFIG_IRQ_WORK=y -CONFIG_BUILDTIME_TABLE_SORT=y -CONFIG_THREAD_INFO_IN_TASK=y - -# -# General setup -# -CONFIG_INIT_ENV_ARG_LIMIT=32 -# CONFIG_COMPILE_TEST is not set -# CONFIG_WERROR is not set -CONFIG_LOCALVERSION="" -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_BUILD_SALT="" -CONFIG_HAVE_KERNEL_GZIP=y -CONFIG_HAVE_KERNEL_BZIP2=y -CONFIG_HAVE_KERNEL_LZMA=y -CONFIG_HAVE_KERNEL_XZ=y -CONFIG_HAVE_KERNEL_LZO=y -CONFIG_HAVE_KERNEL_LZ4=y -CONFIG_HAVE_KERNEL_ZSTD=y -# CONFIG_KERNEL_GZIP is not set -# CONFIG_KERNEL_BZIP2 is not set -# CONFIG_KERNEL_LZMA is not set -# CONFIG_KERNEL_XZ is not set -# CONFIG_KERNEL_LZO is not set -# CONFIG_KERNEL_LZ4 is not set -CONFIG_KERNEL_ZSTD=y -CONFIG_DEFAULT_INIT="" -CONFIG_DEFAULT_HOSTNAME="gentoo" -CONFIG_SYSVIPC=y -CONFIG_SYSVIPC_SYSCTL=y -CONFIG_SYSVIPC_COMPAT=y -CONFIG_POSIX_MQUEUE=y -CONFIG_POSIX_MQUEUE_SYSCTL=y -CONFIG_WATCH_QUEUE=y -CONFIG_CROSS_MEMORY_ATTACH=y -# CONFIG_USELIB is not set -CONFIG_AUDIT=y -CONFIG_HAVE_ARCH_AUDITSYSCALL=y -CONFIG_AUDITSYSCALL=y - -# -# IRQ subsystem -# -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_GENERIC_IRQ_SHOW=y -CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y -CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_GENERIC_IRQ_MIGRATION=y -CONFIG_HARDIRQS_SW_RESEND=y -CONFIG_IRQ_DOMAIN=y -CONFIG_IRQ_DOMAIN_HIERARCHY=y -CONFIG_GENERIC_MSI_IRQ=y -CONFIG_IRQ_MSI_IOMMU=y -CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y -CONFIG_GENERIC_IRQ_RESERVATION_MODE=y -CONFIG_GENERIC_IRQ_STAT_SNAPSHOT=y -CONFIG_IRQ_FORCED_THREADING=y -CONFIG_SPARSE_IRQ=y -# CONFIG_GENERIC_IRQ_DEBUGFS is not set -# end of IRQ subsystem - -CONFIG_CLOCKSOURCE_WATCHDOG=y -CONFIG_ARCH_CLOCKSOURCE_INIT=y -CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y -CONFIG_GENERIC_TIME_VSYSCALL=y -CONFIG_GENERIC_CLOCKEVENTS=y -CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y -CONFIG_GENERIC_CLOCKEVENTS_BROADCAST_IDLE=y -CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y -CONFIG_GENERIC_CMOS_UPDATE=y -CONFIG_HAVE_POSIX_CPU_TIMERS_TASK_WORK=y -CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y -CONFIG_CONTEXT_TRACKING=y -CONFIG_CONTEXT_TRACKING_IDLE=y - -# -# Timers subsystem -# -CONFIG_TICK_ONESHOT=y -CONFIG_NO_HZ_COMMON=y -# CONFIG_HZ_PERIODIC is not set -CONFIG_NO_HZ_IDLE=y -# CONFIG_NO_HZ_FULL is not set -CONFIG_CONTEXT_TRACKING_USER=y -CONFIG_CONTEXT_TRACKING_USER_FORCE=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US=100 -# end of Timers subsystem - -CONFIG_BPF=y -CONFIG_HAVE_EBPF_JIT=y -CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y - -# -# BPF subsystem -# -CONFIG_BPF_SYSCALL=y -CONFIG_BPF_JIT=y -CONFIG_BPF_JIT_ALWAYS_ON=y -CONFIG_BPF_JIT_DEFAULT_ON=y -CONFIG_BPF_UNPRIV_DEFAULT_OFF=y -# CONFIG_BPF_PRELOAD is not set -CONFIG_BPF_LSM=y -# end of BPF subsystem - -CONFIG_PREEMPT_BUILD=y -# CONFIG_PREEMPT_NONE is not set -CONFIG_PREEMPT_VOLUNTARY=y -# CONFIG_PREEMPT is not set -CONFIG_PREEMPT_COUNT=y -CONFIG_PREEMPTION=y -CONFIG_PREEMPT_DYNAMIC=y -CONFIG_SCHED_CORE=y - -# -# CPU/Task time and stats accounting -# -CONFIG_VIRT_CPU_ACCOUNTING=y -# CONFIG_TICK_CPU_ACCOUNTING is not set -CONFIG_VIRT_CPU_ACCOUNTING_GEN=y -CONFIG_IRQ_TIME_ACCOUNTING=y -CONFIG_HAVE_SCHED_AVG_IRQ=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BSD_PROCESS_ACCT_V3=y -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_PSI=y -# CONFIG_PSI_DEFAULT_DISABLED is not set -# end of CPU/Task time and stats accounting - -CONFIG_CPU_ISOLATION=y - -# -# RCU Subsystem -# -CONFIG_TREE_RCU=y -CONFIG_PREEMPT_RCU=y -# CONFIG_RCU_EXPERT is not set -CONFIG_TREE_SRCU=y -CONFIG_TASKS_RCU_GENERIC=y -CONFIG_NEED_TASKS_RCU=y -CONFIG_TASKS_RCU=y -CONFIG_TASKS_RUDE_RCU=y -CONFIG_TASKS_TRACE_RCU=y -CONFIG_RCU_STALL_COMMON=y -CONFIG_RCU_NEED_SEGCBLIST=y -# end of RCU Subsystem - -CONFIG_IKCONFIG=m -CONFIG_IKCONFIG_PROC=y -CONFIG_IKHEADERS=m -CONFIG_LOG_BUF_SHIFT=17 -CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 -CONFIG_PRINTK_INDEX=y -CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y - -# -# Scheduler features -# -# CONFIG_UCLAMP_TASK is not set -# CONFIG_SCHED_ALT is not set -# end of Scheduler features - -CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y -CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y -CONFIG_CC_HAS_INT128=y -CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5" -CONFIG_GCC10_NO_ARRAY_BOUNDS=y -CONFIG_CC_NO_ARRAY_BOUNDS=y -CONFIG_GCC_NO_STRINGOP_OVERFLOW=y -CONFIG_CC_NO_STRINGOP_OVERFLOW=y -CONFIG_ARCH_SUPPORTS_INT128=y -# CONFIG_NUMA_BALANCING is not set -CONFIG_SLAB_OBJ_EXT=y -CONFIG_CGROUPS=y -CONFIG_PAGE_COUNTER=y -# CONFIG_CGROUP_FAVOR_DYNMODS is not set -CONFIG_MEMCG=y -# CONFIG_MEMCG_V1 is not set -CONFIG_BLK_CGROUP=y -CONFIG_CGROUP_WRITEBACK=y -CONFIG_CGROUP_SCHED=y -CONFIG_FAIR_GROUP_SCHED=y -CONFIG_CFS_BANDWIDTH=y -# CONFIG_RT_GROUP_SCHED is not set -CONFIG_SCHED_MM_CID=y -CONFIG_CGROUP_PIDS=y -CONFIG_CGROUP_RDMA=y -CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_HUGETLB=y -CONFIG_CPUSETS=y -CONFIG_PROC_PID_CPUSET=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CGROUP_CPUACCT=y -CONFIG_CGROUP_PERF=y -CONFIG_CGROUP_BPF=y -CONFIG_CGROUP_MISC=y -# CONFIG_CGROUP_DEBUG is not set -CONFIG_SOCK_CGROUP_DATA=y -CONFIG_NAMESPACES=y -CONFIG_UTS_NS=y -CONFIG_TIME_NS=y -CONFIG_IPC_NS=y -CONFIG_USER_NS=y -CONFIG_USER_NS_UNPRIVILEGED=y -CONFIG_PID_NS=y -CONFIG_NET_NS=y -CONFIG_CHECKPOINT_RESTORE=y -CONFIG_SCHED_AUTOGROUP=y -CONFIG_RELAY=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" -# CONFIG_RD_GZIP is not set -# CONFIG_RD_BZIP2 is not set -# CONFIG_RD_LZMA is not set -CONFIG_RD_XZ=y -# CONFIG_RD_LZO is not set -# CONFIG_RD_LZ4 is not set -CONFIG_RD_ZSTD=y -CONFIG_BOOT_CONFIG=y -# CONFIG_BOOT_CONFIG_FORCE is not set -# CONFIG_BOOT_CONFIG_EMBED is not set -CONFIG_INITRAMFS_PRESERVE_MTIME=y -CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_LD_ORPHAN_WARN=y -CONFIG_LD_ORPHAN_WARN_LEVEL="warn" -CONFIG_SYSCTL=y -CONFIG_HAVE_UID16=y -CONFIG_SYSCTL_EXCEPTION_TRACE=y -CONFIG_HAVE_PCSPKR_PLATFORM=y -# CONFIG_EXPERT is not set -CONFIG_UID16=y -CONFIG_MULTIUSER=y -CONFIG_SGETMASK_SYSCALL=y -CONFIG_SYSFS_SYSCALL=y -CONFIG_FHANDLE=y -CONFIG_POSIX_TIMERS=y -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_ELF_CORE=y -CONFIG_PCSPKR_PLATFORM=y -CONFIG_FUTEX=y -CONFIG_FUTEX_PI=y -CONFIG_EPOLL=y -CONFIG_SIGNALFD=y -CONFIG_TIMERFD=y -CONFIG_EVENTFD=y -CONFIG_SHMEM=y -CONFIG_AIO=y -CONFIG_IO_URING=y -CONFIG_ADVISE_SYSCALLS=y -CONFIG_MEMBARRIER=y -CONFIG_KCMP=y -CONFIG_RSEQ=y -CONFIG_CACHESTAT_SYSCALL=y -CONFIG_KALLSYMS=y -# CONFIG_KALLSYMS_SELFTEST is not set -# CONFIG_KALLSYMS_ALL is not set -CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y -CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y -CONFIG_HAVE_PERF_EVENTS=y -CONFIG_GUEST_PERF_EVENTS=y - -# -# Kernel Performance Events And Counters -# -CONFIG_PERF_EVENTS=y -# CONFIG_DEBUG_PERF_USE_VMALLOC is not set -# end of Kernel Performance Events And Counters - -CONFIG_SYSTEM_DATA_VERIFICATION=y -CONFIG_PROFILING=y -CONFIG_TRACEPOINTS=y - -# -# Kexec and crash features -# -# CONFIG_KEXEC is not set -# CONFIG_KEXEC_FILE is not set -# end of Kexec and crash features -# end of General setup - -CONFIG_64BIT=y -CONFIG_X86_64=y -CONFIG_X86=y -CONFIG_INSTRUCTION_DECODER=y -CONFIG_OUTPUT_FORMAT="elf64-x86-64" -CONFIG_LOCKDEP_SUPPORT=y -CONFIG_STACKTRACE_SUPPORT=y -CONFIG_MMU=y -CONFIG_ARCH_MMAP_RND_BITS_MIN=28 -CONFIG_ARCH_MMAP_RND_BITS_MAX=32 -CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8 -CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 -CONFIG_GENERIC_ISA_DMA=y -CONFIG_GENERIC_BUG=y -CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_ARCH_HAS_CPU_RELAX=y -CONFIG_ARCH_HIBERNATION_POSSIBLE=y -CONFIG_ARCH_SUSPEND_POSSIBLE=y -CONFIG_AUDIT_ARCH=y -CONFIG_HAVE_INTEL_TXT=y -CONFIG_X86_64_SMP=y -CONFIG_ARCH_SUPPORTS_UPROBES=y -CONFIG_FIX_EARLYCON_MEM=y -CONFIG_PGTABLE_LEVELS=4 -CONFIG_CC_HAS_SANE_STACKPROTECTOR=y - -# -# Processor type and features -# -CONFIG_SMP=y -CONFIG_X86_X2APIC=y -CONFIG_X86_POSTED_MSI=y -CONFIG_X86_MPPARSE=y -CONFIG_X86_CPU_RESCTRL=y -CONFIG_X86_FRED=y -# CONFIG_X86_EXTENDED_PLATFORM is not set -CONFIG_X86_INTEL_LPSS=y -CONFIG_X86_AMD_PLATFORM_DEVICE=y -CONFIG_IOSF_MBI=y -# CONFIG_IOSF_MBI_DEBUG is not set -CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y -CONFIG_SCHED_OMIT_FRAME_POINTER=y -CONFIG_HYPERVISOR_GUEST=y -CONFIG_PARAVIRT=y -# CONFIG_PARAVIRT_DEBUG is not set -CONFIG_PARAVIRT_SPINLOCKS=y -CONFIG_X86_HV_CALLBACK_VECTOR=y -# CONFIG_XEN is not set -CONFIG_KVM_GUEST=y -CONFIG_ARCH_CPUIDLE_HALTPOLL=y -CONFIG_PVH=y -CONFIG_PARAVIRT_TIME_ACCOUNTING=y -CONFIG_PARAVIRT_CLOCK=y -# CONFIG_JAILHOUSE_GUEST is not set -# CONFIG_ACRN_GUEST is not set -# CONFIG_INTEL_TDX_GUEST is not set -CONFIG_MK8=y -# CONFIG_MK8SSE3 is not set -# CONFIG_MK10 is not set -# CONFIG_MBARCELONA is not set -# CONFIG_MBOBCAT is not set -# CONFIG_MJAGUAR is not set -# CONFIG_MBULLDOZER is not set -# CONFIG_MPILEDRIVER is not set -# CONFIG_MSTEAMROLLER is not set -# CONFIG_MEXCAVATOR is not set -# CONFIG_MZEN is not set -# CONFIG_MZEN2 is not set -# CONFIG_MZEN3 is not set -# CONFIG_MZEN4 is not set -# CONFIG_MPSC is not set -# CONFIG_MATOM is not set -# CONFIG_MCORE2 is not set -# CONFIG_MNEHALEM is not set -# CONFIG_MWESTMERE is not set -# CONFIG_MSILVERMONT is not set -# CONFIG_MGOLDMONT is not set -# CONFIG_MGOLDMONTPLUS is not set -# CONFIG_MSANDYBRIDGE is not set -# CONFIG_MIVYBRIDGE is not set -# CONFIG_MHASWELL is not set -# CONFIG_MBROADWELL is not set -# CONFIG_MSKYLAKE is not set -# CONFIG_MSKYLAKEX is not set -# CONFIG_MCANNONLAKE is not set -# CONFIG_MICELAKE is not set -# CONFIG_MCASCADELAKE is not set -# CONFIG_MCOOPERLAKE is not set -# CONFIG_MTIGERLAKE is not set -# CONFIG_MSAPPHIRERAPIDS is not set -# CONFIG_MROCKETLAKE is not set -# CONFIG_MALDERLAKE is not set -# CONFIG_MRAPTORLAKE is not set -# CONFIG_MMETEORLAKE is not set -# CONFIG_MEMERALDRAPIDS is not set -# CONFIG_GENERIC_CPU is not set -# CONFIG_MNATIVE_INTEL is not set -# CONFIG_MNATIVE_AMD is not set -CONFIG_X86_INTERNODE_CACHE_SHIFT=6 -CONFIG_X86_L1_CACHE_SHIFT=6 -CONFIG_X86_INTEL_USERCOPY=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_TSC=y -CONFIG_X86_HAVE_PAE=y -CONFIG_X86_CMPXCHG64=y -CONFIG_X86_CMOV=y -CONFIG_X86_MINIMUM_CPU_FAMILY=64 -CONFIG_X86_DEBUGCTLMSR=y -CONFIG_IA32_FEAT_CTL=y -CONFIG_X86_VMX_FEATURE_NAMES=y -CONFIG_CPU_SUP_INTEL=y -CONFIG_CPU_SUP_AMD=y -CONFIG_CPU_SUP_HYGON=y -CONFIG_CPU_SUP_CENTAUR=y -CONFIG_CPU_SUP_ZHAOXIN=y -CONFIG_HPET_TIMER=y -CONFIG_HPET_EMULATE_RTC=y -CONFIG_DMI=y -# CONFIG_GART_IOMMU is not set -CONFIG_BOOT_VESA_SUPPORT=y -# CONFIG_MAXSMP is not set -CONFIG_NR_CPUS_RANGE_BEGIN=2 -CONFIG_NR_CPUS_RANGE_END=512 -CONFIG_NR_CPUS_DEFAULT=64 -CONFIG_NR_CPUS=8 -CONFIG_SCHED_CLUSTER=y -CONFIG_SCHED_SMT=y -CONFIG_SCHED_MC=y -CONFIG_SCHED_MC_PRIO=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_ACPI_MADT_WAKEUP=y -CONFIG_X86_IO_APIC=y -CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y -CONFIG_X86_MCE=y -# CONFIG_X86_MCELOG_LEGACY is not set -CONFIG_X86_MCE_INTEL=y -CONFIG_X86_MCE_AMD=y -CONFIG_X86_MCE_THRESHOLD=y -# CONFIG_X86_MCE_INJECT is not set - -# -# Performance monitoring -# -CONFIG_PERF_EVENTS_INTEL_UNCORE=y -CONFIG_PERF_EVENTS_INTEL_RAPL=y -CONFIG_PERF_EVENTS_INTEL_CSTATE=y -# CONFIG_PERF_EVENTS_AMD_POWER is not set -CONFIG_PERF_EVENTS_AMD_UNCORE=m -# CONFIG_PERF_EVENTS_AMD_BRS is not set -# end of Performance monitoring - -CONFIG_X86_VSYSCALL_EMULATION=y -# CONFIG_X86_IOPL_IOPERM is not set -CONFIG_MICROCODE=y -# CONFIG_MICROCODE_LATE_LOADING is not set -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_X86_5LEVEL is not set -CONFIG_X86_DIRECT_GBPAGES=y -# CONFIG_X86_CPA_STATISTICS is not set -# CONFIG_AMD_MEM_ENCRYPT is not set -CONFIG_NUMA=y -# CONFIG_AMD_NUMA is not set -CONFIG_X86_64_ACPI_NUMA=y -# CONFIG_NUMA_EMU is not set -CONFIG_NODES_SHIFT=2 -CONFIG_ARCH_SPARSEMEM_ENABLE=y -CONFIG_ARCH_SPARSEMEM_DEFAULT=y -# CONFIG_ARCH_MEMORY_PROBE is not set -CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 -CONFIG_X86_PMEM_LEGACY_DEVICE=y -CONFIG_X86_PMEM_LEGACY=m -CONFIG_X86_CHECK_BIOS_CORRUPTION=y -CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y -CONFIG_MTRR=y -CONFIG_MTRR_SANITIZER=y -CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1 -CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=0 -CONFIG_X86_PAT=y -CONFIG_ARCH_USES_PG_UNCACHED=y -CONFIG_X86_UMIP=y -CONFIG_CC_HAS_IBT=y -CONFIG_X86_CET=y -CONFIG_X86_KERNEL_IBT=y -CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y -# CONFIG_X86_INTEL_TSX_MODE_OFF is not set -# CONFIG_X86_INTEL_TSX_MODE_ON is not set -CONFIG_X86_INTEL_TSX_MODE_AUTO=y -# CONFIG_X86_SGX is not set -# CONFIG_X86_USER_SHADOW_STACK is not set -# CONFIG_INTEL_TDX_HOST is not set -CONFIG_EFI=y -CONFIG_EFI_STUB=y -# CONFIG_EFI_HANDOVER_PROTOCOL is not set -# CONFIG_EFI_MIXED is not set -# CONFIG_HZ_100 is not set -# CONFIG_HZ_250 is not set -CONFIG_HZ_300=y -# CONFIG_HZ_1000 is not set -CONFIG_HZ=300 -CONFIG_SCHED_HRTICK=y -CONFIG_ARCH_SUPPORTS_KEXEC=y -CONFIG_ARCH_SUPPORTS_KEXEC_FILE=y -CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY=y -CONFIG_ARCH_SUPPORTS_KEXEC_SIG=y -CONFIG_ARCH_SUPPORTS_KEXEC_SIG_FORCE=y -CONFIG_ARCH_SUPPORTS_KEXEC_BZIMAGE_VERIFY_SIG=y -CONFIG_ARCH_SUPPORTS_KEXEC_JUMP=y -CONFIG_ARCH_SUPPORTS_CRASH_DUMP=y -CONFIG_ARCH_SUPPORTS_CRASH_HOTPLUG=y -CONFIG_PHYSICAL_START=0x1000000 -CONFIG_RELOCATABLE=y -CONFIG_RANDOMIZE_BASE=y -CONFIG_X86_NEED_RELOCS=y -CONFIG_PHYSICAL_ALIGN=0x1000000 -CONFIG_DYNAMIC_MEMORY_LAYOUT=y -CONFIG_RANDOMIZE_MEMORY=y -CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0xa -CONFIG_HOTPLUG_CPU=y -# CONFIG_COMPAT_VDSO is not set -# CONFIG_LEGACY_VSYSCALL_XONLY is not set -CONFIG_LEGACY_VSYSCALL_NONE=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="vdso32=0 page_poison=1 page_alloc.shuffle=1 slab_nomerge pti=on" -# CONFIG_CMDLINE_OVERRIDE is not set -# CONFIG_MODIFY_LDT_SYSCALL is not set -# CONFIG_STRICT_SIGALTSTACK_SIZE is not set -CONFIG_HAVE_LIVEPATCH=y -# end of Processor type and features - -CONFIG_CC_HAS_NAMED_AS=y -CONFIG_USE_X86_SEG_SUPPORT=y -CONFIG_CC_HAS_SLS=y -CONFIG_CC_HAS_RETURN_THUNK=y -CONFIG_CC_HAS_ENTRY_PADDING=y -CONFIG_FUNCTION_PADDING_CFI=11 -CONFIG_FUNCTION_PADDING_BYTES=16 -CONFIG_CALL_PADDING=y -CONFIG_HAVE_CALL_THUNKS=y -CONFIG_CALL_THUNKS=y -CONFIG_PREFIX_SYMBOLS=y -CONFIG_CPU_MITIGATIONS=y -CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=y -CONFIG_MITIGATION_RETPOLINE=y -CONFIG_MITIGATION_RETHUNK=y -CONFIG_MITIGATION_UNRET_ENTRY=y -CONFIG_MITIGATION_CALL_DEPTH_TRACKING=y -# CONFIG_CALL_THUNKS_DEBUG is not set -CONFIG_MITIGATION_IBPB_ENTRY=y -CONFIG_MITIGATION_IBRS_ENTRY=y -CONFIG_MITIGATION_SRSO=y -CONFIG_MITIGATION_SLS=y -CONFIG_MITIGATION_GDS_FORCE=y -CONFIG_MITIGATION_RFDS=y -CONFIG_MITIGATION_SPECTRE_BHI=y -CONFIG_ARCH_HAS_ADD_PAGES=y - -# -# Power management and ACPI options -# -CONFIG_ARCH_HIBERNATION_HEADER=y -CONFIG_SUSPEND=y -CONFIG_SUSPEND_FREEZER=y -CONFIG_HIBERNATE_CALLBACKS=y -CONFIG_HIBERNATION=y -CONFIG_HIBERNATION_SNAPSHOT_DEV=y -CONFIG_HIBERNATION_COMP_LZO=y -# CONFIG_HIBERNATION_COMP_LZ4 is not set -CONFIG_HIBERNATION_DEF_COMP="lzo" -CONFIG_PM_STD_PARTITION="" -CONFIG_PM_SLEEP=y -CONFIG_PM_SLEEP_SMP=y -CONFIG_PM_AUTOSLEEP=y -# CONFIG_PM_USERSPACE_AUTOSLEEP is not set -CONFIG_PM_WAKELOCKS=y -CONFIG_PM_WAKELOCKS_LIMIT=100 -CONFIG_PM_WAKELOCKS_GC=y -CONFIG_PM=y -# CONFIG_PM_DEBUG is not set -CONFIG_PM_CLK=y -CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y -CONFIG_ENERGY_MODEL=y -CONFIG_ARCH_SUPPORTS_ACPI=y -CONFIG_ACPI=y -CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y -CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y -CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y -CONFIG_ACPI_THERMAL_LIB=y -# CONFIG_ACPI_DEBUGGER is not set -CONFIG_ACPI_SPCR_TABLE=y -CONFIG_ACPI_FPDT=y -CONFIG_ACPI_LPIT=y -CONFIG_ACPI_SLEEP=y -CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y -CONFIG_ACPI_EC_DEBUGFS=m -CONFIG_ACPI_AC=y -CONFIG_ACPI_BATTERY=m -CONFIG_ACPI_BUTTON=y -CONFIG_ACPI_VIDEO=m -CONFIG_ACPI_FAN=y -CONFIG_ACPI_TAD=m -# CONFIG_ACPI_DOCK is not set -CONFIG_ACPI_CPU_FREQ_PSS=y -CONFIG_ACPI_PROCESSOR_CSTATE=y -CONFIG_ACPI_PROCESSOR_IDLE=y -CONFIG_ACPI_CPPC_LIB=y -CONFIG_ACPI_PROCESSOR=y -CONFIG_ACPI_HOTPLUG_CPU=y -CONFIG_ACPI_PROCESSOR_AGGREGATOR=y -CONFIG_ACPI_THERMAL=y -CONFIG_ACPI_PLATFORM_PROFILE=m -CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y -# CONFIG_ACPI_TABLE_UPGRADE is not set -# CONFIG_ACPI_DEBUG is not set -CONFIG_ACPI_PCI_SLOT=y -CONFIG_ACPI_CONTAINER=y -CONFIG_ACPI_HOTPLUG_MEMORY=y -CONFIG_ACPI_HOTPLUG_IOAPIC=y -CONFIG_ACPI_SBS=m -CONFIG_ACPI_HED=y -CONFIG_ACPI_BGRT=y -CONFIG_ACPI_NHLT=y -# CONFIG_ACPI_NFIT is not set -CONFIG_ACPI_NUMA=y -# CONFIG_ACPI_HMAT is not set -CONFIG_HAVE_ACPI_APEI=y -CONFIG_HAVE_ACPI_APEI_NMI=y -CONFIG_ACPI_APEI=y -CONFIG_ACPI_APEI_GHES=y -CONFIG_ACPI_APEI_PCIEAER=y -CONFIG_ACPI_APEI_MEMORY_FAILURE=y -# CONFIG_ACPI_APEI_EINJ is not set -# CONFIG_ACPI_APEI_ERST_DEBUG is not set -# CONFIG_ACPI_DPTF is not set -CONFIG_ACPI_EXTLOG=m -CONFIG_ACPI_CONFIGFS=m -# CONFIG_ACPI_PFRUT is not set -CONFIG_ACPI_PCC=y -# CONFIG_ACPI_FFH is not set -CONFIG_PMIC_OPREGION=y -CONFIG_ACPI_VIOT=y -CONFIG_ACPI_PRMT=y -CONFIG_X86_PM_TIMER=y - -# -# CPU Frequency scaling -# -CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_GOV_ATTR_SET=y -CONFIG_CPU_FREQ_GOV_COMMON=y -CONFIG_CPU_FREQ_STAT=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y -CONFIG_CPU_FREQ_GOV_PERFORMANCE=y -CONFIG_CPU_FREQ_GOV_POWERSAVE=m -CONFIG_CPU_FREQ_GOV_USERSPACE=m -CONFIG_CPU_FREQ_GOV_ONDEMAND=y -CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y -CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y - -# -# CPU frequency scaling drivers -# -CONFIG_X86_INTEL_PSTATE=y -CONFIG_X86_PCC_CPUFREQ=y -CONFIG_X86_AMD_PSTATE=y -CONFIG_X86_AMD_PSTATE_DEFAULT_MODE=3 -CONFIG_X86_AMD_PSTATE_UT=m -CONFIG_X86_ACPI_CPUFREQ=y -# CONFIG_X86_ACPI_CPUFREQ_CPB is not set -CONFIG_X86_POWERNOW_K8=m -CONFIG_X86_AMD_FREQ_SENSITIVITY=m -# CONFIG_X86_SPEEDSTEP_CENTRINO is not set -# CONFIG_X86_P4_CLOCKMOD is not set - -# -# shared options -# -# end of CPU Frequency scaling - -# -# CPU Idle -# -CONFIG_CPU_IDLE=y -CONFIG_CPU_IDLE_GOV_LADDER=y -CONFIG_CPU_IDLE_GOV_MENU=y -CONFIG_CPU_IDLE_GOV_TEO=y -CONFIG_CPU_IDLE_GOV_HALTPOLL=y -CONFIG_HALTPOLL_CPUIDLE=y -# end of CPU Idle - -CONFIG_INTEL_IDLE=y -# end of Power management and ACPI options - -# -# Bus options (PCI etc.) -# -CONFIG_PCI_DIRECT=y -CONFIG_PCI_MMCONFIG=y -CONFIG_MMCONF_FAM10H=y -CONFIG_ISA_DMA_API=y -CONFIG_AMD_NB=y -# end of Bus options (PCI etc.) - -# -# Binary Emulations -# -CONFIG_IA32_EMULATION=y -# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set -# CONFIG_X86_X32_ABI is not set -CONFIG_COMPAT_32=y -CONFIG_COMPAT=y -CONFIG_COMPAT_FOR_U64_ALIGNMENT=y -# end of Binary Emulations - -CONFIG_KVM_COMMON=y -CONFIG_HAVE_KVM_PFNCACHE=y -CONFIG_HAVE_KVM_IRQCHIP=y -CONFIG_HAVE_KVM_IRQ_ROUTING=y -CONFIG_HAVE_KVM_DIRTY_RING=y -CONFIG_HAVE_KVM_DIRTY_RING_TSO=y -CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL=y -CONFIG_KVM_MMIO=y -CONFIG_KVM_ASYNC_PF=y -CONFIG_HAVE_KVM_MSI=y -CONFIG_HAVE_KVM_READONLY_MEM=y -CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y -CONFIG_KVM_VFIO=y -CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y -CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY=y -CONFIG_KVM_COMPAT=y -CONFIG_HAVE_KVM_IRQ_BYPASS=y -CONFIG_HAVE_KVM_NO_POLL=y -CONFIG_KVM_XFER_TO_GUEST_WORK=y -CONFIG_HAVE_KVM_PM_NOTIFIER=y -CONFIG_KVM_GENERIC_HARDWARE_ENABLING=y -CONFIG_KVM_GENERIC_MMU_NOTIFIER=y -CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES=y -CONFIG_KVM_PRIVATE_MEM=y -CONFIG_KVM_GENERIC_PRIVATE_MEM=y -CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE=y -CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE=y -CONFIG_VIRTUALIZATION=y -CONFIG_KVM=m -CONFIG_KVM_INTEL=m -CONFIG_KVM_AMD=m -CONFIG_KVM_AMD_SEV=y -CONFIG_KVM_SMM=y -# CONFIG_KVM_HYPERV is not set -# CONFIG_KVM_XEN is not set -CONFIG_KVM_EXTERNAL_WRITE_TRACKING=y -CONFIG_KVM_MAX_NR_VCPUS=1024 -CONFIG_AS_AVX512=y -CONFIG_AS_SHA1_NI=y -CONFIG_AS_SHA256_NI=y -CONFIG_AS_TPAUSE=y -CONFIG_AS_GFNI=y -CONFIG_AS_VAES=y -CONFIG_AS_VPCLMULQDQ=y -CONFIG_AS_WRUSS=y -CONFIG_ARCH_CONFIGURES_CPU_MITIGATIONS=y - -# -# General architecture-dependent options -# -CONFIG_HOTPLUG_SMT=y -CONFIG_HOTPLUG_CORE_SYNC=y -CONFIG_HOTPLUG_CORE_SYNC_DEAD=y -CONFIG_HOTPLUG_CORE_SYNC_FULL=y -CONFIG_HOTPLUG_SPLIT_STARTUP=y -CONFIG_HOTPLUG_PARALLEL=y -CONFIG_GENERIC_ENTRY=y -CONFIG_KPROBES=y -CONFIG_JUMP_LABEL=y -# CONFIG_STATIC_KEYS_SELFTEST is not set -# CONFIG_STATIC_CALL_SELFTEST is not set -CONFIG_OPTPROBES=y -CONFIG_KPROBES_ON_FTRACE=y -CONFIG_UPROBES=y -CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y -CONFIG_ARCH_USE_BUILTIN_BSWAP=y -CONFIG_KRETPROBES=y -CONFIG_KRETPROBE_ON_RETHOOK=y -CONFIG_USER_RETURN_NOTIFIER=y -CONFIG_HAVE_IOREMAP_PROT=y -CONFIG_HAVE_KPROBES=y -CONFIG_HAVE_KRETPROBES=y -CONFIG_HAVE_OPTPROBES=y -CONFIG_HAVE_KPROBES_ON_FTRACE=y -CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE=y -CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y -CONFIG_HAVE_NMI=y -CONFIG_TRACE_IRQFLAGS_SUPPORT=y -CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y -CONFIG_HAVE_ARCH_TRACEHOOK=y -CONFIG_HAVE_DMA_CONTIGUOUS=y -CONFIG_GENERIC_SMP_IDLE_THREAD=y -CONFIG_ARCH_HAS_FORTIFY_SOURCE=y -CONFIG_ARCH_HAS_SET_MEMORY=y -CONFIG_ARCH_HAS_SET_DIRECT_MAP=y -CONFIG_ARCH_HAS_CPU_FINALIZE_INIT=y -CONFIG_ARCH_HAS_CPU_PASID=y -CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y -CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y -CONFIG_ARCH_WANTS_NO_INSTR=y -CONFIG_HAVE_ASM_MODVERSIONS=y -CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y -CONFIG_HAVE_RSEQ=y -CONFIG_HAVE_RUST=y -CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y -CONFIG_HAVE_HW_BREAKPOINT=y -CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y -CONFIG_HAVE_USER_RETURN_NOTIFIER=y -CONFIG_HAVE_PERF_EVENTS_NMI=y -CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y -CONFIG_HAVE_PERF_REGS=y -CONFIG_HAVE_PERF_USER_STACK_DUMP=y -CONFIG_HAVE_ARCH_JUMP_LABEL=y -CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y -CONFIG_MMU_GATHER_TABLE_FREE=y -CONFIG_MMU_GATHER_RCU_TABLE_FREE=y -CONFIG_MMU_GATHER_MERGE_VMAS=y -CONFIG_MMU_LAZY_TLB_REFCOUNT=y -CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y -CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS=y -CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y -CONFIG_HAVE_CMPXCHG_LOCAL=y -CONFIG_HAVE_CMPXCHG_DOUBLE=y -CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y -CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y -CONFIG_HAVE_ARCH_SECCOMP=y -CONFIG_HAVE_ARCH_SECCOMP_FILTER=y -CONFIG_SECCOMP=y -CONFIG_SECCOMP_FILTER=y -# CONFIG_SECCOMP_CACHE_DEBUG is not set -CONFIG_HAVE_ARCH_STACKLEAK=y -CONFIG_HAVE_STACKPROTECTOR=y -CONFIG_STACKPROTECTOR=y -CONFIG_STACKPROTECTOR_STRONG=y -CONFIG_ARCH_SUPPORTS_LTO_CLANG=y -CONFIG_ARCH_SUPPORTS_LTO_CLANG_THIN=y -CONFIG_LTO_NONE=y -CONFIG_ARCH_SUPPORTS_CFI_CLANG=y -CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y -CONFIG_HAVE_CONTEXT_TRACKING_USER=y -CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK=y -CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y -CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y -CONFIG_HAVE_MOVE_PUD=y -CONFIG_HAVE_MOVE_PMD=y -CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y -CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y -CONFIG_HAVE_ARCH_HUGE_VMAP=y -CONFIG_HAVE_ARCH_HUGE_VMALLOC=y -CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y -CONFIG_ARCH_WANT_PMD_MKWRITE=y -CONFIG_HAVE_ARCH_SOFT_DIRTY=y -CONFIG_HAVE_MOD_ARCH_SPECIFIC=y -CONFIG_MODULES_USE_ELF_RELA=y -CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y -CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y -CONFIG_SOFTIRQ_ON_OWN_STACK=y -CONFIG_ARCH_HAS_ELF_RANDOMIZE=y -CONFIG_HAVE_ARCH_MMAP_RND_BITS=y -CONFIG_HAVE_EXIT_THREAD=y -CONFIG_ARCH_MMAP_RND_BITS=32 -CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y -CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16 -CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y -CONFIG_HAVE_PAGE_SIZE_4KB=y -CONFIG_PAGE_SIZE_4KB=y -CONFIG_PAGE_SIZE_LESS_THAN_64KB=y -CONFIG_PAGE_SIZE_LESS_THAN_256KB=y -CONFIG_PAGE_SHIFT=12 -CONFIG_HAVE_OBJTOOL=y -CONFIG_HAVE_JUMP_LABEL_HACK=y -CONFIG_HAVE_NOINSTR_HACK=y -CONFIG_HAVE_NOINSTR_VALIDATION=y -CONFIG_HAVE_UACCESS_VALIDATION=y -CONFIG_HAVE_STACK_VALIDATION=y -CONFIG_HAVE_RELIABLE_STACKTRACE=y -CONFIG_OLD_SIGSUSPEND3=y -CONFIG_COMPAT_OLD_SIGACTION=y -CONFIG_COMPAT_32BIT_TIME=y -CONFIG_HAVE_ARCH_VMAP_STACK=y -CONFIG_VMAP_STACK=y -CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET=y -CONFIG_RANDOMIZE_KSTACK_OFFSET=y -CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y -CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y -CONFIG_STRICT_KERNEL_RWX=y -CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y -CONFIG_STRICT_MODULE_RWX=y -CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y -CONFIG_ARCH_USE_MEMREMAP_PROT=y -CONFIG_LOCK_EVENT_COUNTS=y -CONFIG_ARCH_HAS_MEM_ENCRYPT=y -CONFIG_ARCH_HAS_CC_PLATFORM=y -CONFIG_HAVE_STATIC_CALL=y -CONFIG_HAVE_STATIC_CALL_INLINE=y -CONFIG_HAVE_PREEMPT_DYNAMIC=y -CONFIG_HAVE_PREEMPT_DYNAMIC_CALL=y -CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y -CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y -CONFIG_ARCH_SUPPORTS_PAGE_TABLE_CHECK=y -CONFIG_ARCH_HAS_ELFCORE_COMPAT=y -CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH=y -CONFIG_DYNAMIC_SIGFRAME=y -CONFIG_ARCH_HAS_HW_PTE_YOUNG=y -CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y -CONFIG_ARCH_HAS_KERNEL_FPU_SUPPORT=y - -# -# GCOV-based kernel profiling -# -# CONFIG_GCOV_KERNEL is not set -CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y -# end of GCOV-based kernel profiling - -CONFIG_HAVE_GCC_PLUGINS=y -CONFIG_GCC_PLUGINS=y -CONFIG_GCC_PLUGIN_LATENT_ENTROPY=y -CONFIG_FUNCTION_ALIGNMENT_4B=y -CONFIG_FUNCTION_ALIGNMENT_16B=y -CONFIG_FUNCTION_ALIGNMENT=16 -# end of General architecture-dependent options - -CONFIG_RT_MUTEXES=y -CONFIG_MODULE_SIG_FORMAT=y -CONFIG_MODULES=y -CONFIG_MODULE_DEBUGFS=y -# CONFIG_MODULE_DEBUG is not set -# CONFIG_MODULE_FORCE_LOAD is not set -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y -CONFIG_MODVERSIONS=y -CONFIG_ASM_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG=y -# CONFIG_MODULE_SIG_FORCE is not set -CONFIG_MODULE_SIG_ALL=y -CONFIG_MODULE_SIG_SHA1=y -# CONFIG_MODULE_SIG_SHA256 is not set -# CONFIG_MODULE_SIG_SHA384 is not set -# CONFIG_MODULE_SIG_SHA512 is not set -# CONFIG_MODULE_SIG_SHA3_256 is not set -# CONFIG_MODULE_SIG_SHA3_384 is not set -# CONFIG_MODULE_SIG_SHA3_512 is not set -CONFIG_MODULE_SIG_HASH="sha1" -# CONFIG_MODULE_COMPRESS_NONE is not set -# CONFIG_MODULE_COMPRESS_GZIP is not set -# CONFIG_MODULE_COMPRESS_XZ is not set -CONFIG_MODULE_COMPRESS_ZSTD=y -CONFIG_MODULE_DECOMPRESS=y -# CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS is not set -CONFIG_MODPROBE_PATH="/sbin/modprobe" -# CONFIG_TRIM_UNUSED_KSYMS is not set -CONFIG_MODULES_TREE_LOOKUP=y -CONFIG_BLOCK=y -CONFIG_BLOCK_LEGACY_AUTOLOAD=y -CONFIG_BLK_RQ_ALLOC_TIME=y -CONFIG_BLK_CGROUP_RWSTAT=y -CONFIG_BLK_CGROUP_PUNT_BIO=y -CONFIG_BLK_DEV_BSG_COMMON=y -CONFIG_BLK_ICQ=y -CONFIG_BLK_DEV_BSGLIB=y -CONFIG_BLK_DEV_INTEGRITY=y -CONFIG_BLK_DEV_WRITE_MOUNTED=y -CONFIG_BLK_DEV_ZONED=y -CONFIG_BLK_DEV_THROTTLING=y -CONFIG_BLK_WBT=y -CONFIG_BLK_WBT_MQ=y -CONFIG_BLK_CGROUP_IOLATENCY=y -CONFIG_BLK_CGROUP_IOCOST=y -CONFIG_BLK_CGROUP_IOPRIO=y -CONFIG_BLK_DEBUG_FS=y -# CONFIG_BLK_SED_OPAL is not set -CONFIG_BLK_INLINE_ENCRYPTION=y -CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_AIX_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_KARMA_PARTITION is not set -CONFIG_EFI_PARTITION=y -# CONFIG_SYSV68_PARTITION is not set -# CONFIG_CMDLINE_PARTITION is not set -# end of Partition Types - -CONFIG_BLK_MQ_PCI=y -CONFIG_BLK_MQ_VIRTIO=y -CONFIG_BLK_PM=y -CONFIG_BLOCK_HOLDER_DEPRECATED=y -CONFIG_BLK_MQ_STACKING=y - -# -# IO Schedulers -# -CONFIG_MQ_IOSCHED_DEADLINE=y -CONFIG_MQ_IOSCHED_KYBER=y -CONFIG_IOSCHED_BFQ=y -CONFIG_BFQ_GROUP_IOSCHED=y -# CONFIG_BFQ_CGROUP_DEBUG is not set -# end of IO Schedulers - -CONFIG_PREEMPT_NOTIFIERS=y -CONFIG_PADATA=y -CONFIG_ASN1=y -CONFIG_UNINLINE_SPIN_UNLOCK=y -CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y -CONFIG_MUTEX_SPIN_ON_OWNER=y -CONFIG_RWSEM_SPIN_ON_OWNER=y -CONFIG_LOCK_SPIN_ON_OWNER=y -CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y -CONFIG_QUEUED_SPINLOCKS=y -CONFIG_ARCH_USE_QUEUED_RWLOCKS=y -CONFIG_QUEUED_RWLOCKS=y -CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y -CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y -CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y -CONFIG_FREEZER=y - -# -# Executable file formats -# -CONFIG_BINFMT_ELF=y -CONFIG_COMPAT_BINFMT_ELF=y -CONFIG_ELFCORE=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_BINFMT_SCRIPT=y -# CONFIG_BINFMT_MISC is not set -CONFIG_COREDUMP=y -# end of Executable file formats - -# -# Memory Management options -# -CONFIG_ZPOOL=y -CONFIG_SWAP=y -CONFIG_ZSWAP=y -CONFIG_ZSWAP_DEFAULT_ON=y -CONFIG_ZSWAP_SHRINKER_DEFAULT_ON=y -# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set -# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set -# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set -# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set -# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set -CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y -CONFIG_ZSWAP_COMPRESSOR_DEFAULT="zstd" -# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD is not set -# CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED is not set -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC=y -CONFIG_ZSWAP_ZPOOL_DEFAULT="zsmalloc" -CONFIG_ZBUD=y -# CONFIG_Z3FOLD_DEPRECATED is not set -CONFIG_HAVE_ZSMALLOC=y -CONFIG_ZSMALLOC=y -# CONFIG_ZSMALLOC_STAT is not set -CONFIG_ZSMALLOC_CHAIN_SIZE=8 - -# -# Slab allocator options -# -CONFIG_SLUB=y -# CONFIG_SLAB_MERGE_DEFAULT is not set -CONFIG_SLAB_FREELIST_RANDOM=y -CONFIG_SLAB_FREELIST_HARDENED=y -CONFIG_SLAB_BUCKETS=y -CONFIG_SLAB_CANARY=y -# CONFIG_SLUB_STATS is not set -CONFIG_SLUB_CPU_PARTIAL=y -CONFIG_RANDOM_KMALLOC_CACHES=y -# end of Slab allocator options - -CONFIG_SHUFFLE_PAGE_ALLOCATOR=y -# CONFIG_COMPAT_BRK is not set -CONFIG_SPARSEMEM=y -CONFIG_SPARSEMEM_EXTREME=y -CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y -CONFIG_SPARSEMEM_VMEMMAP=y -CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y -CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y -CONFIG_HAVE_GUP_FAST=y -CONFIG_NUMA_KEEP_MEMINFO=y -CONFIG_MEMORY_ISOLATION=y -CONFIG_EXCLUSIVE_SYSTEM_RAM=y -CONFIG_HAVE_BOOTMEM_INFO_NODE=y -CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y -CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y -CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y -CONFIG_MEMORY_HOTREMOVE=y -CONFIG_MHP_MEMMAP_ON_MEMORY=y -CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y -CONFIG_SPLIT_PTLOCK_CPUS=4 -CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y -CONFIG_MEMORY_BALLOON=y -CONFIG_BALLOON_COMPACTION=y -CONFIG_COMPACTION=y -CONFIG_COMPACT_UNEVICTABLE_DEFAULT=1 -CONFIG_PAGE_REPORTING=y -CONFIG_MIGRATION=y -CONFIG_DEVICE_MIGRATION=y -CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y -CONFIG_ARCH_ENABLE_THP_MIGRATION=y -CONFIG_CONTIG_ALLOC=y -CONFIG_PCP_BATCH_SCALE_MAX=5 -CONFIG_PHYS_ADDR_T_64BIT=y -CONFIG_MMU_NOTIFIER=y -CONFIG_KSM=y -CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 -CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y -CONFIG_MEMORY_FAILURE=y -# CONFIG_HWPOISON_INJECT is not set -CONFIG_ARCH_WANT_GENERAL_HUGETLB=y -CONFIG_ARCH_WANTS_THP_SWAP=y -CONFIG_TRANSPARENT_HUGEPAGE=y -# CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set -CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y -# CONFIG_TRANSPARENT_HUGEPAGE_NEVER is not set -CONFIG_THP_SWAP=y -CONFIG_READ_ONLY_THP_FOR_FS=y -CONFIG_PGTABLE_HAS_HUGE_LEAVES=y -CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y -CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y -CONFIG_USE_PERCPU_NUMA_NODE_ID=y -CONFIG_HAVE_SETUP_PER_CPU_AREA=y -# CONFIG_CMA is not set -# CONFIG_MEM_SOFT_DIRTY is not set -CONFIG_GENERIC_EARLY_IOREMAP=y -# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set -CONFIG_PAGE_IDLE_FLAG=y -CONFIG_IDLE_PAGE_TRACKING=y -CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y -CONFIG_ARCH_HAS_CURRENT_STACK_POINTER=y -CONFIG_ARCH_HAS_PTE_DEVMAP=y -CONFIG_ZONE_DMA=y -CONFIG_ZONE_DMA32=y -CONFIG_ZONE_DEVICE=y -CONFIG_HMM_MIRROR=y -CONFIG_GET_FREE_REGION=y -CONFIG_DEVICE_PRIVATE=y -CONFIG_VMAP_PFN=y -CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y -CONFIG_ARCH_HAS_PKEYS=y -CONFIG_VM_EVENT_COUNTERS=y -# CONFIG_PERCPU_STATS is not set -# CONFIG_GUP_TEST is not set -# CONFIG_DMAPOOL_TEST is not set -CONFIG_ARCH_HAS_PTE_SPECIAL=y -CONFIG_MEMFD_CREATE=y -CONFIG_SECRETMEM=y -CONFIG_ANON_VMA_NAME=y -# CONFIG_USERFAULTFD is not set -CONFIG_LRU_GEN=y -CONFIG_LRU_GEN_ENABLED=y -# CONFIG_LRU_GEN_STATS is not set -CONFIG_LRU_GEN_WALKS_MMU=y -CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y -CONFIG_PER_VMA_LOCK=y -CONFIG_LOCK_MM_AND_FIND_VMA=y -CONFIG_IOMMU_MM_DATA=y -CONFIG_EXECMEM=y - -# -# Data Access Monitoring -# -# CONFIG_DAMON is not set -# end of Data Access Monitoring -# end of Memory Management options - -CONFIG_NET=y -CONFIG_NET_INGRESS=y -CONFIG_NET_EGRESS=y -CONFIG_NET_XGRESS=y -CONFIG_SKB_DECRYPTED=y -CONFIG_SKB_EXTENSIONS=y - -# -# Networking options -# -CONFIG_PACKET=m -CONFIG_PACKET_DIAG=m -CONFIG_UNIX=y -CONFIG_AF_UNIX_OOB=y -CONFIG_UNIX_DIAG=y -CONFIG_TLS=m -CONFIG_TLS_DEVICE=y -# CONFIG_TLS_TOE is not set -CONFIG_XFRM=y -CONFIG_XFRM_OFFLOAD=y -CONFIG_XFRM_ALGO=m -CONFIG_XFRM_USER=m -# CONFIG_XFRM_USER_COMPAT is not set -CONFIG_XFRM_INTERFACE=m -CONFIG_XFRM_SUB_POLICY=y -CONFIG_XFRM_MIGRATE=y -CONFIG_XFRM_STATISTICS=y -CONFIG_XFRM_AH=m -CONFIG_XFRM_ESP=m -CONFIG_XFRM_IPCOMP=m -CONFIG_NET_KEY=m -# CONFIG_NET_KEY_MIGRATE is not set -CONFIG_XDP_SOCKETS=y -CONFIG_XDP_SOCKETS_DIAG=m -CONFIG_NET_HANDSHAKE=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -# CONFIG_IP_FIB_TRIE_STATS is not set -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -# CONFIG_IP_ROUTE_VERBOSE is not set -CONFIG_IP_ROUTE_CLASSID=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set -CONFIG_NET_IPIP=m -# CONFIG_NET_IPGRE_DEMUX is not set -CONFIG_NET_IP_TUNNEL=m -CONFIG_IP_MROUTE_COMMON=y -CONFIG_IP_MROUTE=y -CONFIG_IP_MROUTE_MULTIPLE_TABLES=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y -# CONFIG_NET_IPVTI is not set -CONFIG_NET_UDP_TUNNEL=m -# CONFIG_NET_FOU is not set -# CONFIG_NET_FOU_IP_TUNNELS is not set -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_ESP_OFFLOAD=m -# CONFIG_INET_ESPINTCP is not set -CONFIG_INET_IPCOMP=m -CONFIG_INET_TABLE_PERTURB_ORDER=16 -CONFIG_INET_XFRM_TUNNEL=m -CONFIG_INET_TUNNEL=m -# CONFIG_INET_DIAG is not set -CONFIG_TCP_CONG_ADVANCED=y -CONFIG_TCP_CONG_BIC=m -CONFIG_TCP_CONG_CUBIC=y -CONFIG_TCP_CONG_WESTWOOD=m -CONFIG_TCP_CONG_HTCP=m -CONFIG_TCP_CONG_HSTCP=m -CONFIG_TCP_CONG_HYBLA=m -CONFIG_TCP_CONG_VEGAS=y -# CONFIG_TCP_CONG_NV is not set -CONFIG_TCP_CONG_SCALABLE=m -CONFIG_TCP_CONG_LP=m -CONFIG_TCP_CONG_VENO=m -CONFIG_TCP_CONG_YEAH=y -CONFIG_TCP_CONG_ILLINOIS=m -# CONFIG_TCP_CONG_DCTCP is not set -# CONFIG_TCP_CONG_CDG is not set -CONFIG_TCP_CONG_BBR=y -# CONFIG_DEFAULT_CUBIC is not set -# CONFIG_DEFAULT_VEGAS is not set -CONFIG_DEFAULT_BBR=y -# CONFIG_DEFAULT_RENO is not set -CONFIG_DEFAULT_TCP_CONG="bbr" -CONFIG_TCP_SIGPOOL=y -# CONFIG_TCP_AO is not set -CONFIG_TCP_MD5SIG=y -# CONFIG_TCP_SIMULT_CONNECT_DEFAULT_ON is not set -CONFIG_IPV6=y -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_IPV6_ROUTE_INFO=y -CONFIG_IPV6_OPTIMISTIC_DAD=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -# CONFIG_INET6_ESPINTCP is not set -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_MIP6=m -CONFIG_IPV6_ILA=m -CONFIG_INET6_XFRM_TUNNEL=m -CONFIG_INET6_TUNNEL=m -# CONFIG_IPV6_VTI is not set -CONFIG_IPV6_SIT=m -CONFIG_IPV6_SIT_6RD=y -CONFIG_IPV6_NDISC_NODETYPE=y -CONFIG_IPV6_TUNNEL=m -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -CONFIG_IPV6_MROUTE=y -CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y -# CONFIG_IPV6_PIMSM_V2 is not set -CONFIG_IPV6_SEG6_LWTUNNEL=y -CONFIG_IPV6_SEG6_HMAC=y -CONFIG_IPV6_SEG6_BPF=y -# CONFIG_IPV6_RPL_LWTUNNEL is not set -CONFIG_IPV6_IOAM6_LWTUNNEL=y -# CONFIG_NETLABEL is not set -# CONFIG_MPTCP is not set -CONFIG_NETWORK_SECMARK=y -CONFIG_NET_PTP_CLASSIFY=y -# CONFIG_NETWORK_PHY_TIMESTAMPING is not set -CONFIG_NETFILTER=y -CONFIG_NETFILTER_ADVANCED=y -CONFIG_BRIDGE_NETFILTER=m - -# -# Core Netfilter Configuration -# -CONFIG_NETFILTER_INGRESS=y -CONFIG_NETFILTER_EGRESS=y -CONFIG_NETFILTER_SKIP_EGRESS=y -CONFIG_NETFILTER_NETLINK=m -CONFIG_NETFILTER_FAMILY_BRIDGE=y -CONFIG_NETFILTER_FAMILY_ARP=y -CONFIG_NETFILTER_BPF_LINK=y -CONFIG_NETFILTER_NETLINK_HOOK=m -CONFIG_NETFILTER_NETLINK_ACCT=m -CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_NETLINK_LOG=m -CONFIG_NETFILTER_NETLINK_OSF=m -CONFIG_NF_CONNTRACK=m -CONFIG_NF_LOG_SYSLOG=m -CONFIG_NETFILTER_CONNCOUNT=m -CONFIG_NF_CONNTRACK_MARK=y -CONFIG_NF_CONNTRACK_SECMARK=y -CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CONNTRACK_TIMEOUT=y -CONFIG_NF_CONNTRACK_TIMESTAMP=y -CONFIG_NF_CONNTRACK_LABELS=y -CONFIG_NF_CONNTRACK_OVS=y -CONFIG_NF_CT_PROTO_DCCP=y -CONFIG_NF_CT_PROTO_GRE=y -CONFIG_NF_CT_PROTO_SCTP=y -CONFIG_NF_CT_PROTO_UDPLITE=y -CONFIG_NF_CONNTRACK_AMANDA=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_H323=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_BROADCAST=m -CONFIG_NF_CONNTRACK_NETBIOS_NS=m -CONFIG_NF_CONNTRACK_SNMP=m -CONFIG_NF_CONNTRACK_PPTP=m -CONFIG_NF_CONNTRACK_SANE=m -CONFIG_NF_CONNTRACK_SIP=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_NETLINK_HELPER=m -CONFIG_NETFILTER_NETLINK_GLUE_CT=y -CONFIG_NF_NAT=m -CONFIG_NF_NAT_AMANDA=m -CONFIG_NF_NAT_FTP=m -CONFIG_NF_NAT_IRC=m -CONFIG_NF_NAT_SIP=m -CONFIG_NF_NAT_TFTP=m -CONFIG_NF_NAT_REDIRECT=y -CONFIG_NF_NAT_MASQUERADE=y -CONFIG_NF_NAT_OVS=y -CONFIG_NETFILTER_SYNPROXY=m -CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y -CONFIG_NF_TABLES_NETDEV=y -CONFIG_NFT_NUMGEN=m -CONFIG_NFT_CT=m -CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_CONNLIMIT=m -CONFIG_NFT_LOG=m -CONFIG_NFT_LIMIT=m -CONFIG_NFT_MASQ=m -CONFIG_NFT_REDIR=m -CONFIG_NFT_NAT=m -CONFIG_NFT_TUNNEL=m -CONFIG_NFT_QUEUE=m -CONFIG_NFT_QUOTA=m -CONFIG_NFT_REJECT=m -CONFIG_NFT_REJECT_INET=m -CONFIG_NFT_COMPAT=m -CONFIG_NFT_HASH=m -CONFIG_NFT_FIB=m -CONFIG_NFT_FIB_INET=m -CONFIG_NFT_XFRM=m -CONFIG_NFT_SOCKET=m -CONFIG_NFT_OSF=m -CONFIG_NFT_TPROXY=m -CONFIG_NFT_SYNPROXY=m -CONFIG_NF_DUP_NETDEV=m -CONFIG_NFT_DUP_NETDEV=m -CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m -CONFIG_NF_FLOW_TABLE_INET=m -CONFIG_NF_FLOW_TABLE=m -CONFIG_NF_FLOW_TABLE_PROCFS=y -CONFIG_NETFILTER_XTABLES=m -CONFIG_NETFILTER_XTABLES_COMPAT=y - -# -# Xtables combined modules -# -CONFIG_NETFILTER_XT_MARK=m -CONFIG_NETFILTER_XT_CONNMARK=m -CONFIG_NETFILTER_XT_SET=m - -# -# Xtables targets -# -CONFIG_NETFILTER_XT_TARGET_AUDIT=m -CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m -CONFIG_NETFILTER_XT_TARGET_CT=m -CONFIG_NETFILTER_XT_TARGET_DSCP=m -CONFIG_NETFILTER_XT_TARGET_HL=m -CONFIG_NETFILTER_XT_TARGET_HMARK=m -CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m -CONFIG_NETFILTER_XT_TARGET_LED=m -CONFIG_NETFILTER_XT_TARGET_LOG=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_NAT=m -CONFIG_NETFILTER_XT_TARGET_NETMAP=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set -CONFIG_NETFILTER_XT_TARGET_RATEEST=m -CONFIG_NETFILTER_XT_TARGET_REDIRECT=m -CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m -CONFIG_NETFILTER_XT_TARGET_TEE=m -CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m -CONFIG_NETFILTER_XT_TARGET_SECMARK=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m - -# -# Xtables matches -# -CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m -CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NETFILTER_XT_MATCH_CGROUP=m -CONFIG_NETFILTER_XT_MATCH_CLUSTER=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_CPU=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ECN=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_HL=m -CONFIG_NETFILTER_XT_MATCH_IPCOMP=m -CONFIG_NETFILTER_XT_MATCH_IPRANGE=m -CONFIG_NETFILTER_XT_MATCH_L2TP=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_NFACCT=m -CONFIG_NETFILTER_XT_MATCH_OSF=m -CONFIG_NETFILTER_XT_MATCH_OWNER=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_RATEEST=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_RECENT=m -CONFIG_NETFILTER_XT_MATCH_SCTP=m -CONFIG_NETFILTER_XT_MATCH_SOCKET=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -# end of Core Netfilter Configuration - -CONFIG_IP_SET=m -CONFIG_IP_SET_MAX=256 -CONFIG_IP_SET_BITMAP_IP=m -CONFIG_IP_SET_BITMAP_IPMAC=m -CONFIG_IP_SET_BITMAP_PORT=m -CONFIG_IP_SET_HASH_IP=m -CONFIG_IP_SET_HASH_IPMARK=m -CONFIG_IP_SET_HASH_IPPORT=m -CONFIG_IP_SET_HASH_IPPORTIP=m -CONFIG_IP_SET_HASH_IPPORTNET=m -CONFIG_IP_SET_HASH_IPMAC=m -CONFIG_IP_SET_HASH_MAC=m -CONFIG_IP_SET_HASH_NETPORTNET=m -CONFIG_IP_SET_HASH_NET=m -CONFIG_IP_SET_HASH_NETNET=m -CONFIG_IP_SET_HASH_NETPORT=m -CONFIG_IP_SET_HASH_NETIFACE=m -CONFIG_IP_SET_LIST_SET=m -# CONFIG_IP_VS is not set - -# -# IP: Netfilter Configuration -# -CONFIG_NF_DEFRAG_IPV4=m -CONFIG_IP_NF_IPTABLES_LEGACY=m -CONFIG_NF_SOCKET_IPV4=m -CONFIG_NF_TPROXY_IPV4=m -CONFIG_NF_TABLES_IPV4=y -CONFIG_NFT_REJECT_IPV4=m -CONFIG_NFT_DUP_IPV4=m -CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=y -CONFIG_NF_DUP_IPV4=m -CONFIG_NF_LOG_ARP=m -CONFIG_NF_LOG_IPV4=m -CONFIG_NF_REJECT_IPV4=m -CONFIG_NF_NAT_SNMP_BASIC=m -CONFIG_NF_NAT_PPTP=m -CONFIG_NF_NAT_H323=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_SYNPROXY=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_NFT_COMPAT_ARP=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -# end of IP: Netfilter Configuration - -# -# IPv6: Netfilter Configuration -# -CONFIG_IP6_NF_IPTABLES_LEGACY=m -CONFIG_NF_SOCKET_IPV6=m -CONFIG_NF_TPROXY_IPV6=m -CONFIG_NF_TABLES_IPV6=y -CONFIG_NFT_REJECT_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_DUP_IPV6=m -CONFIG_NF_REJECT_IPV6=m -CONFIG_NF_LOG_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m -# end of IPv6: Netfilter Configuration - -CONFIG_NF_DEFRAG_IPV6=m -CONFIG_NF_TABLES_BRIDGE=m -CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m -CONFIG_NF_CONNTRACK_BRIDGE=m -CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m -CONFIG_BRIDGE_NF_EBTABLES=m -CONFIG_BRIDGE_EBT_BROUTE=m -CONFIG_BRIDGE_EBT_T_FILTER=m -CONFIG_BRIDGE_EBT_T_NAT=m -CONFIG_BRIDGE_EBT_802_3=m -CONFIG_BRIDGE_EBT_AMONG=m -CONFIG_BRIDGE_EBT_ARP=m -CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m -CONFIG_BRIDGE_EBT_LIMIT=m -CONFIG_BRIDGE_EBT_MARK=m -CONFIG_BRIDGE_EBT_PKTTYPE=m -CONFIG_BRIDGE_EBT_STP=m -CONFIG_BRIDGE_EBT_VLAN=m -CONFIG_BRIDGE_EBT_ARPREPLY=m -CONFIG_BRIDGE_EBT_DNAT=m -CONFIG_BRIDGE_EBT_MARK_T=m -CONFIG_BRIDGE_EBT_REDIRECT=m -CONFIG_BRIDGE_EBT_SNAT=m -CONFIG_BRIDGE_EBT_LOG=m -CONFIG_BRIDGE_EBT_NFLOG=m -# CONFIG_IP_DCCP is not set -# CONFIG_IP_SCTP is not set -# CONFIG_RDS is not set -# CONFIG_TIPC is not set -CONFIG_ATM=m -CONFIG_ATM_CLIP=m -# CONFIG_ATM_CLIP_NO_ICMP is not set -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -# CONFIG_ATM_BR2684_IPFILTER is not set -CONFIG_L2TP=m -# CONFIG_L2TP_DEBUGFS is not set -CONFIG_L2TP_V3=y -CONFIG_L2TP_IP=m -CONFIG_L2TP_ETH=m -CONFIG_STP=m -CONFIG_BRIDGE=m -CONFIG_BRIDGE_IGMP_SNOOPING=y -# CONFIG_BRIDGE_VLAN_FILTERING is not set -# CONFIG_BRIDGE_MRP is not set -CONFIG_BRIDGE_CFM=y -# CONFIG_NET_DSA is not set -CONFIG_VLAN_8021Q=m -# CONFIG_VLAN_8021Q_GVRP is not set -# CONFIG_VLAN_8021Q_MVRP is not set -CONFIG_LLC=m -# CONFIG_LLC2 is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_PHONET is not set -# CONFIG_6LOWPAN is not set -# CONFIG_IEEE802154 is not set -CONFIG_NET_SCHED=y - -# -# Queueing/Scheduling -# -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_HFSC=m -CONFIG_NET_SCH_PRIO=m -# CONFIG_NET_SCH_MULTIQ is not set -CONFIG_NET_SCH_RED=m -# CONFIG_NET_SCH_SFB is not set -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -# CONFIG_NET_SCH_CBS is not set -CONFIG_NET_SCH_ETF=m -CONFIG_NET_SCH_MQPRIO_LIB=m -CONFIG_NET_SCH_TAPRIO=m -CONFIG_NET_SCH_GRED=m -# CONFIG_NET_SCH_NETEM is not set -# CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_MQPRIO is not set -CONFIG_NET_SCH_SKBPRIO=m -# CONFIG_NET_SCH_CHOKE is not set -CONFIG_NET_SCH_QFQ=m -# CONFIG_NET_SCH_CODEL is not set -CONFIG_NET_SCH_FQ_CODEL=y -CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_FQ=m -# CONFIG_NET_SCH_HHF is not set -# CONFIG_NET_SCH_PIE is not set -CONFIG_NET_SCH_INGRESS=m -# CONFIG_NET_SCH_PLUG is not set -# CONFIG_NET_SCH_ETS is not set -CONFIG_NET_SCH_DEFAULT=y -# CONFIG_DEFAULT_FQ is not set -# CONFIG_DEFAULT_FQ_CODEL is not set -# CONFIG_DEFAULT_SFQ is not set -CONFIG_DEFAULT_PFIFO_FAST=y -CONFIG_DEFAULT_NET_SCH="pfifo_fast" - -# -# Classification -# -CONFIG_NET_CLS=y -CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_CLS_U32_PERF=y -CONFIG_CLS_U32_MARK=y -CONFIG_NET_CLS_FLOW=m -CONFIG_NET_CLS_CGROUP=y -CONFIG_NET_CLS_BPF=m -# CONFIG_NET_CLS_FLOWER is not set -# CONFIG_NET_CLS_MATCHALL is not set -CONFIG_NET_EMATCH=y -CONFIG_NET_EMATCH_STACK=32 -CONFIG_NET_EMATCH_CMP=m -CONFIG_NET_EMATCH_NBYTE=m -CONFIG_NET_EMATCH_U32=m -CONFIG_NET_EMATCH_META=m -CONFIG_NET_EMATCH_TEXT=m -# CONFIG_NET_EMATCH_IPSET is not set -CONFIG_NET_EMATCH_IPT=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=m -CONFIG_NET_ACT_GACT=m -CONFIG_GACT_PROB=y -CONFIG_NET_ACT_MIRRED=m -# CONFIG_NET_ACT_SAMPLE is not set -CONFIG_NET_ACT_NAT=m -CONFIG_NET_ACT_PEDIT=m -# CONFIG_NET_ACT_SIMP is not set -# CONFIG_NET_ACT_SKBEDIT is not set -# CONFIG_NET_ACT_CSUM is not set -# CONFIG_NET_ACT_MPLS is not set -# CONFIG_NET_ACT_VLAN is not set -CONFIG_NET_ACT_BPF=m -# CONFIG_NET_ACT_CONNMARK is not set -CONFIG_NET_ACT_CTINFO=m -# CONFIG_NET_ACT_SKBMOD is not set -# CONFIG_NET_ACT_IFE is not set -# CONFIG_NET_ACT_TUNNEL_KEY is not set -CONFIG_NET_ACT_CT=m -# CONFIG_NET_ACT_GATE is not set -# CONFIG_NET_TC_SKB_EXT is not set -CONFIG_NET_SCH_FIFO=y -# CONFIG_DCB is not set -CONFIG_DNS_RESOLVER=m -CONFIG_BATMAN_ADV=m -# CONFIG_BATMAN_ADV_BATMAN_V is not set -CONFIG_BATMAN_ADV_BLA=y -CONFIG_BATMAN_ADV_DAT=y -CONFIG_BATMAN_ADV_NC=y -CONFIG_BATMAN_ADV_MCAST=y -# CONFIG_BATMAN_ADV_DEBUG is not set -# CONFIG_BATMAN_ADV_TRACING is not set -CONFIG_OPENVSWITCH=m -CONFIG_OPENVSWITCH_VXLAN=m -# CONFIG_VSOCKETS is not set -CONFIG_NETLINK_DIAG=y -CONFIG_MPLS=y -CONFIG_NET_MPLS_GSO=m -# CONFIG_MPLS_ROUTING is not set -CONFIG_NET_NSH=m -# CONFIG_HSR is not set -# CONFIG_NET_SWITCHDEV is not set -# CONFIG_NET_L3_MASTER_DEV is not set -# CONFIG_QRTR is not set -# CONFIG_NET_NCSI is not set -CONFIG_PCPU_DEV_REFCNT=y -CONFIG_MAX_SKB_FRAGS=17 -CONFIG_RPS=y -CONFIG_RFS_ACCEL=y -CONFIG_SOCK_RX_QUEUE_MAPPING=y -CONFIG_XPS=y -CONFIG_CGROUP_NET_PRIO=y -CONFIG_CGROUP_NET_CLASSID=y -CONFIG_NET_RX_BUSY_POLL=y -CONFIG_BQL=y -CONFIG_BPF_STREAM_PARSER=y -CONFIG_NET_FLOW_LIMIT=y - -# -# Network testing -# -CONFIG_NET_PKTGEN=m -# CONFIG_NET_DROP_MONITOR is not set -# end of Network testing -# end of Networking options - -# CONFIG_HAMRADIO is not set -# CONFIG_CAN is not set -CONFIG_BT=m -CONFIG_BT_BREDR=y -CONFIG_BT_RFCOMM=m -CONFIG_BT_RFCOMM_TTY=y -# CONFIG_BT_BNEP is not set -# CONFIG_BT_HIDP is not set -CONFIG_BT_LE=y -CONFIG_BT_LE_L2CAP_ECRED=y -# CONFIG_BT_LEDS is not set -CONFIG_BT_MSFTEXT=y -CONFIG_BT_AOSPEXT=y -CONFIG_BT_DEBUGFS=y -# CONFIG_BT_SELFTEST is not set -# CONFIG_BT_FEATURE_DEBUG is not set - -# -# Bluetooth device drivers -# -CONFIG_BT_INTEL=m -CONFIG_BT_BCM=m -CONFIG_BT_RTL=m -CONFIG_BT_MTK=m -CONFIG_BT_HCIBTUSB=m -CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y -CONFIG_BT_HCIBTUSB_POLL_SYNC=y -CONFIG_BT_HCIBTUSB_BCM=y -CONFIG_BT_HCIBTUSB_MTK=y -CONFIG_BT_HCIBTUSB_RTL=y -# CONFIG_BT_HCIBTSDIO is not set -# CONFIG_BT_HCIUART is not set -# CONFIG_BT_HCIBCM203X is not set -# CONFIG_BT_HCIBCM4377 is not set -# CONFIG_BT_HCIBPA10X is not set -# CONFIG_BT_HCIBFUSB is not set -# CONFIG_BT_HCIVHCI is not set -# CONFIG_BT_MRVL is not set -CONFIG_BT_ATH3K=m -# CONFIG_BT_MTKSDIO is not set -CONFIG_BT_VIRTIO=m -# CONFIG_BT_INTEL_PCIE is not set -# end of Bluetooth device drivers - -CONFIG_AF_RXRPC=m -# CONFIG_AF_RXRPC_IPV6 is not set -# CONFIG_AF_RXRPC_INJECT_LOSS is not set -# CONFIG_AF_RXRPC_INJECT_RX_DELAY is not set -# CONFIG_AF_RXRPC_DEBUG is not set -# CONFIG_RXKAD is not set -# CONFIG_RXPERF is not set -# CONFIG_AF_KCM is not set -CONFIG_STREAM_PARSER=y -CONFIG_MCTP=y -CONFIG_FIB_RULES=y -CONFIG_WIRELESS=y -CONFIG_CFG80211=m -# CONFIG_NL80211_TESTMODE is not set -# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set -CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y -CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y -CONFIG_CFG80211_DEFAULT_PS=y -# CONFIG_CFG80211_DEBUGFS is not set -CONFIG_CFG80211_CRDA_SUPPORT=y -# CONFIG_CFG80211_WEXT is not set -CONFIG_MAC80211=m -CONFIG_MAC80211_HAS_RC=y -CONFIG_MAC80211_RC_MINSTREL=y -CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y -CONFIG_MAC80211_RC_DEFAULT="minstrel_ht" -CONFIG_MAC80211_MESH=y -CONFIG_MAC80211_LEDS=y -# CONFIG_MAC80211_MESSAGE_TRACING is not set -# CONFIG_MAC80211_DEBUG_MENU is not set -CONFIG_MAC80211_STA_HASH_MAX_SIZE=0 -CONFIG_RFKILL=m -CONFIG_RFKILL_LEDS=y -CONFIG_RFKILL_INPUT=y -# CONFIG_NET_9P is not set -# CONFIG_CAIF is not set -CONFIG_CEPH_LIB=m -# CONFIG_CEPH_LIB_PRETTYDEBUG is not set -# CONFIG_CEPH_LIB_USE_DNS_RESOLVER is not set -# CONFIG_NFC is not set -# CONFIG_PSAMPLE is not set -# CONFIG_NET_IFE is not set -CONFIG_LWTUNNEL=y -CONFIG_LWTUNNEL_BPF=y -CONFIG_DST_CACHE=y -CONFIG_GRO_CELLS=y -CONFIG_SOCK_VALIDATE_XMIT=y -CONFIG_NET_SELFTESTS=m -CONFIG_NET_SOCK_MSG=y -CONFIG_PAGE_POOL=y -# CONFIG_PAGE_POOL_STATS is not set -CONFIG_FAILOVER=m -CONFIG_ETHTOOL_NETLINK=y - -# -# Device Drivers -# -CONFIG_HAVE_EISA=y -# CONFIG_EISA is not set -CONFIG_HAVE_PCI=y -CONFIG_GENERIC_PCI_IOMAP=y -CONFIG_PCI=y -CONFIG_PCI_DOMAINS=y -CONFIG_PCIEPORTBUS=y -CONFIG_HOTPLUG_PCI_PCIE=y -CONFIG_PCIEAER=y -# CONFIG_PCIEAER_INJECT is not set -CONFIG_PCIE_ECRC=y -CONFIG_PCIEASPM=y -CONFIG_PCIEASPM_DEFAULT=y -# CONFIG_PCIEASPM_POWERSAVE is not set -# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set -# CONFIG_PCIEASPM_PERFORMANCE is not set -CONFIG_PCIE_PME=y -CONFIG_PCIE_DPC=y -CONFIG_PCIE_PTM=y -# CONFIG_PCIE_EDR is not set -CONFIG_PCI_MSI=y -CONFIG_PCI_QUIRKS=y -# CONFIG_PCI_DEBUG is not set -CONFIG_PCI_REALLOC_ENABLE_AUTO=y -CONFIG_PCI_STUB=m -# CONFIG_PCI_PF_STUB is not set -CONFIG_PCI_ATS=y -CONFIG_PCI_LOCKLESS_CONFIG=y -CONFIG_PCI_IOV=y -CONFIG_PCI_PRI=y -CONFIG_PCI_PASID=y -# CONFIG_PCI_P2PDMA is not set -CONFIG_PCI_LABEL=y -CONFIG_VGA_ARB=y -CONFIG_VGA_ARB_MAX_GPUS=16 -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_ACPI=y -CONFIG_HOTPLUG_PCI_ACPI_IBM=m -# CONFIG_HOTPLUG_PCI_CPCI is not set -# CONFIG_HOTPLUG_PCI_SHPC is not set - -# -# PCI controller drivers -# -# CONFIG_VMD is not set - -# -# Cadence-based PCIe controllers -# -# end of Cadence-based PCIe controllers - -# -# DesignWare-based PCIe controllers -# -# CONFIG_PCI_MESON is not set -# CONFIG_PCIE_DW_PLAT_HOST is not set -# end of DesignWare-based PCIe controllers - -# -# Mobiveil-based PCIe controllers -# -# end of Mobiveil-based PCIe controllers - -# -# PLDA-based PCIe controllers -# -# end of PLDA-based PCIe controllers -# end of PCI controller drivers - -# -# PCI Endpoint -# -# CONFIG_PCI_ENDPOINT is not set -# end of PCI Endpoint - -# -# PCI switch controller drivers -# -# CONFIG_PCI_SW_SWITCHTEC is not set -# end of PCI switch controller drivers - -# CONFIG_CXL_BUS is not set -# CONFIG_PCCARD is not set -# CONFIG_RAPIDIO is not set - -# -# Generic Driver Options -# -CONFIG_AUXILIARY_BUS=y -CONFIG_UEVENT_HELPER=y -CONFIG_UEVENT_HELPER_PATH="" -CONFIG_DEVTMPFS=y -CONFIG_DEVTMPFS_MOUNT=y -CONFIG_DEVTMPFS_SAFE=y -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y - -# -# Firmware loader -# -CONFIG_FW_LOADER=y -CONFIG_FW_LOADER_PAGED_BUF=y -CONFIG_FW_LOADER_SYSFS=y -CONFIG_EXTRA_FIRMWARE="" -# CONFIG_FW_LOADER_USER_HELPER is not set -CONFIG_FW_LOADER_COMPRESS=y -CONFIG_FW_LOADER_COMPRESS_XZ=y -CONFIG_FW_LOADER_COMPRESS_ZSTD=y -CONFIG_FW_CACHE=y -CONFIG_FW_UPLOAD=y -# end of Firmware loader - -CONFIG_WANT_DEV_COREDUMP=y -CONFIG_ALLOW_DEV_COREDUMP=y -CONFIG_DEV_COREDUMP=y -# CONFIG_DEBUG_DRIVER is not set -# CONFIG_DEBUG_DEVRES is not set -# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set -# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set -CONFIG_GENERIC_CPU_DEVICES=y -CONFIG_GENERIC_CPU_AUTOPROBE=y -CONFIG_GENERIC_CPU_VULNERABILITIES=y -CONFIG_REGMAP=y -CONFIG_REGMAP_I2C=y -CONFIG_REGMAP_SPI=y -CONFIG_DMA_SHARED_BUFFER=y -# CONFIG_DMA_FENCE_TRACE is not set -# CONFIG_FW_DEVLINK_SYNC_STATE_TIMEOUT is not set -# end of Generic Driver Options - -# -# Bus devices -# -# CONFIG_MHI_BUS is not set -# CONFIG_MHI_BUS_EP is not set -# end of Bus devices - -# -# Cache Drivers -# -# end of Cache Drivers - -CONFIG_CONNECTOR=y -CONFIG_PROC_EVENTS=y - -# -# Firmware Drivers -# - -# -# ARM System Control and Management Interface Protocol -# -# end of ARM System Control and Management Interface Protocol - -# CONFIG_EDD is not set -CONFIG_FIRMWARE_MEMMAP=y -CONFIG_DMIID=y -CONFIG_DMI_SYSFS=m -CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y -# CONFIG_ISCSI_IBFT is not set -# CONFIG_FW_CFG_SYSFS is not set -CONFIG_SYSFB=y -CONFIG_SYSFB_SIMPLEFB=y -# CONFIG_GOOGLE_FIRMWARE is not set - -# -# EFI (Extensible Firmware Interface) Support -# -CONFIG_EFI_ESRT=y -CONFIG_EFI_VARS_PSTORE=y -# CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE is not set -CONFIG_EFI_DXE_MEM_ATTRIBUTES=y -CONFIG_EFI_RUNTIME_WRAPPERS=y -CONFIG_EFI_BOOTLOADER_CONTROL=m -CONFIG_EFI_CAPSULE_LOADER=m -# CONFIG_EFI_TEST is not set -CONFIG_EFI_DEV_PATH_PARSER=y -CONFIG_APPLE_PROPERTIES=y -CONFIG_RESET_ATTACK_MITIGATION=y -CONFIG_EFI_RCI2_TABLE=y -# CONFIG_EFI_DISABLE_PCI_DMA is not set -CONFIG_EFI_EARLYCON=y -# CONFIG_EFI_CUSTOM_SSDT_OVERLAYS is not set -# CONFIG_EFI_DISABLE_RUNTIME is not set -CONFIG_EFI_COCO_SECRET=y -# end of EFI (Extensible Firmware Interface) Support - -CONFIG_UEFI_CPER=y -CONFIG_UEFI_CPER_X86=y - -# -# Qualcomm firmware drivers -# -# end of Qualcomm firmware drivers - -# -# Tegra firmware driver -# -# end of Tegra firmware driver -# end of Firmware Drivers - -# CONFIG_GNSS is not set -# CONFIG_MTD is not set -# CONFIG_OF is not set -CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y -# CONFIG_PARPORT is not set -CONFIG_PNP=y -CONFIG_PNP_DEBUG_MESSAGES=y - -# -# Protocols -# -CONFIG_PNPACPI=y -CONFIG_BLK_DEV=y -# CONFIG_BLK_DEV_NULL_BLK is not set -# CONFIG_BLK_DEV_FD is not set -CONFIG_CDROM=m -# CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set -CONFIG_ZRAM=m -# CONFIG_ZRAM_DEF_COMP_LZORLE is not set -CONFIG_ZRAM_DEF_COMP_ZSTD=y -# CONFIG_ZRAM_DEF_COMP_LZ4 is not set -# CONFIG_ZRAM_DEF_COMP_LZO is not set -# CONFIG_ZRAM_DEF_COMP_LZ4HC is not set -CONFIG_ZRAM_DEF_COMP="zstd" -CONFIG_ZRAM_WRITEBACK=y -CONFIG_ZRAM_TRACK_ENTRY_ACTIME=y -CONFIG_ZRAM_MEMORY_TRACKING=y -CONFIG_ZRAM_MULTI_COMP=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 -CONFIG_BLK_DEV_DRBD=m -# CONFIG_DRBD_FAULT_INJECTION is not set -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=8192 -# CONFIG_CDROM_PKTCDVD is not set -# CONFIG_ATA_OVER_ETH is not set -CONFIG_VIRTIO_BLK=m -CONFIG_BLK_DEV_RBD=m -CONFIG_BLK_DEV_UBLK=m -# CONFIG_BLKDEV_UBLK_LEGACY_OPCODES is not set - -# -# NVME Support -# -CONFIG_NVME_CORE=y -CONFIG_BLK_DEV_NVME=y -# CONFIG_NVME_MULTIPATH is not set -# CONFIG_NVME_VERBOSE_ERRORS is not set -CONFIG_NVME_HWMON=y -# CONFIG_NVME_FC is not set -# CONFIG_NVME_TCP is not set -# CONFIG_NVME_HOST_AUTH is not set -# CONFIG_NVME_TARGET is not set -# end of NVME Support - -# -# Misc devices -# -# CONFIG_AD525X_DPOT is not set -# CONFIG_DUMMY_IRQ is not set -# CONFIG_IBM_ASM is not set -# CONFIG_PHANTOM is not set -# CONFIG_TIFM_CORE is not set -# CONFIG_ICS932S401 is not set -CONFIG_ENCLOSURE_SERVICES=m -# CONFIG_HP_ILO is not set -# CONFIG_APDS9802ALS is not set -# CONFIG_ISL29003 is not set -# CONFIG_ISL29020 is not set -# CONFIG_SENSORS_TSL2550 is not set -# CONFIG_SENSORS_BH1770 is not set -# CONFIG_SENSORS_APDS990X is not set -# CONFIG_HMC6352 is not set -# CONFIG_DS1682 is not set -# CONFIG_LATTICE_ECP3_CONFIG is not set -# CONFIG_SRAM is not set -# CONFIG_DW_XDATA_PCIE is not set -# CONFIG_PCI_ENDPOINT_TEST is not set -# CONFIG_XILINX_SDFEC is not set -# CONFIG_NSM is not set -# CONFIG_C2PORT is not set - -# -# EEPROM support -# -CONFIG_EEPROM_AT24=m -# CONFIG_EEPROM_AT25 is not set -# CONFIG_EEPROM_MAX6875 is not set -CONFIG_EEPROM_93CX6=m -# CONFIG_EEPROM_93XX46 is not set -# CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_EE1004=m -# end of EEPROM support - -# CONFIG_CB710_CORE is not set - -# -# Texas Instruments shared transport line discipline -# -# end of Texas Instruments shared transport line discipline - -# CONFIG_SENSORS_LIS3_I2C is not set -# CONFIG_ALTERA_STAPL is not set -# CONFIG_INTEL_MEI is not set -# CONFIG_VMWARE_VMCI is not set -# CONFIG_GENWQE is not set -# CONFIG_ECHO is not set -# CONFIG_BCM_VK is not set -# CONFIG_MISC_ALCOR_PCI is not set -# CONFIG_MISC_RTSX_PCI is not set -# CONFIG_MISC_RTSX_USB is not set -# CONFIG_UACCE is not set -# CONFIG_PVPANIC is not set -# CONFIG_KEBA_CP500 is not set -# end of Misc devices - -# -# SCSI device support -# -CONFIG_SCSI_MOD=y -# CONFIG_RAID_ATTRS is not set -CONFIG_SCSI_COMMON=y -CONFIG_SCSI=y -CONFIG_SCSI_DMA=y -CONFIG_SCSI_PROC_FS=y - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=y -# CONFIG_CHR_DEV_ST is not set -CONFIG_BLK_DEV_SR=m -CONFIG_CHR_DEV_SG=y -CONFIG_BLK_DEV_BSG=y -# CONFIG_CHR_DEV_SCH is not set -CONFIG_SCSI_ENCLOSURE=m -# CONFIG_SCSI_CONSTANTS is not set -# CONFIG_SCSI_LOGGING is not set -CONFIG_SCSI_SCAN_ASYNC=y - -# -# SCSI Transports -# -# CONFIG_SCSI_SPI_ATTRS is not set -# CONFIG_SCSI_FC_ATTRS is not set -# CONFIG_SCSI_ISCSI_ATTRS is not set -# CONFIG_SCSI_SAS_ATTRS is not set -# CONFIG_SCSI_SAS_LIBSAS is not set -# CONFIG_SCSI_SRP_ATTRS is not set -# end of SCSI Transports - -CONFIG_SCSI_LOWLEVEL=y -# CONFIG_ISCSI_TCP is not set -# CONFIG_ISCSI_BOOT_SYSFS is not set -# CONFIG_SCSI_CXGB3_ISCSI is not set -# CONFIG_SCSI_CXGB4_ISCSI is not set -# CONFIG_SCSI_BNX2_ISCSI is not set -# CONFIG_BE2ISCSI is not set -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set -# CONFIG_SCSI_HPSA is not set -# CONFIG_SCSI_3W_9XXX is not set -# CONFIG_SCSI_3W_SAS is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AACRAID is not set -# CONFIG_SCSI_AIC7XXX is not set -# CONFIG_SCSI_AIC79XX is not set -# CONFIG_SCSI_AIC94XX is not set -# CONFIG_SCSI_MVSAS is not set -# CONFIG_SCSI_MVUMI is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_ARCMSR is not set -# CONFIG_SCSI_ESAS2R is not set -# CONFIG_MEGARAID_NEWGEN is not set -# CONFIG_MEGARAID_LEGACY is not set -# CONFIG_MEGARAID_SAS is not set -# CONFIG_SCSI_MPT3SAS is not set -# CONFIG_SCSI_MPT2SAS is not set -# CONFIG_SCSI_MPI3MR is not set -# CONFIG_SCSI_SMARTPQI is not set -# CONFIG_SCSI_HPTIOP is not set -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_MYRB is not set -# CONFIG_SCSI_MYRS is not set -# CONFIG_VMWARE_PVSCSI is not set -# CONFIG_SCSI_SNIC is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_FDOMAIN_PCI is not set -# CONFIG_SCSI_ISCI is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_STEX is not set -# CONFIG_SCSI_SYM53C8XX_2 is not set -# CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -# CONFIG_SCSI_QLA_ISCSI is not set -# CONFIG_SCSI_DC395x is not set -# CONFIG_SCSI_AM53C974 is not set -# CONFIG_SCSI_WD719X is not set -# CONFIG_SCSI_DEBUG is not set -# CONFIG_SCSI_PMCRAID is not set -# CONFIG_SCSI_PM8001 is not set -CONFIG_SCSI_VIRTIO=m -# CONFIG_SCSI_DH is not set -# end of SCSI device support - -CONFIG_ATA=y -CONFIG_SATA_HOST=y -CONFIG_PATA_TIMINGS=y -CONFIG_ATA_VERBOSE_ERROR=y -CONFIG_ATA_FORCE=y -CONFIG_ATA_ACPI=y -# CONFIG_SATA_ZPODD is not set -CONFIG_SATA_PMP=y - -# -# Controllers with non-SFF native interface -# -CONFIG_SATA_AHCI=y -CONFIG_SATA_MOBILE_LPM_POLICY=0 -CONFIG_SATA_AHCI_PLATFORM=m -# CONFIG_AHCI_DWC is not set -# CONFIG_SATA_INIC162X is not set -# CONFIG_SATA_ACARD_AHCI is not set -# CONFIG_SATA_SIL24 is not set -# CONFIG_ATA_SFF is not set -CONFIG_MD=y -CONFIG_BLK_DEV_MD=m -# CONFIG_MD_BITMAP_FILE is not set -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID10=m -CONFIG_MD_RAID456=m -CONFIG_BCACHE=m -# CONFIG_BCACHE_DEBUG is not set -CONFIG_BCACHE_ASYNC_REGISTRATION=y -CONFIG_BLK_DEV_DM_BUILTIN=y -CONFIG_BLK_DEV_DM=m -# CONFIG_DM_DEBUG is not set -CONFIG_DM_BUFIO=m -# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set -CONFIG_DM_BIO_PRISON=m -CONFIG_DM_PERSISTENT_DATA=m -CONFIG_DM_UNSTRIPED=m -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_THIN_PROVISIONING=m -CONFIG_DM_CACHE=m -CONFIG_DM_CACHE_SMQ=m -CONFIG_DM_WRITECACHE=m -# CONFIG_DM_EBS is not set -# CONFIG_DM_ERA is not set -# CONFIG_DM_CLONE is not set -CONFIG_DM_MIRROR=m -# CONFIG_DM_LOG_USERSPACE is not set -CONFIG_DM_RAID=m -# CONFIG_DM_ZERO is not set -# CONFIG_DM_MULTIPATH is not set -# CONFIG_DM_DELAY is not set -CONFIG_DM_DUST=m -CONFIG_DM_UEVENT=y -# CONFIG_DM_FLAKEY is not set -CONFIG_DM_VERITY=m -CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y -CONFIG_DM_VERITY_FEC=y -CONFIG_DM_SWITCH=m -# CONFIG_DM_LOG_WRITES is not set -CONFIG_DM_INTEGRITY=m -CONFIG_DM_ZONED=m -CONFIG_DM_AUDIT=y -CONFIG_DM_VDO=m -# CONFIG_TARGET_CORE is not set -# CONFIG_FUSION is not set - -# -# IEEE 1394 (FireWire) support -# -# CONFIG_FIREWIRE is not set -# CONFIG_FIREWIRE_NOSY is not set -# end of IEEE 1394 (FireWire) support - -# CONFIG_MACINTOSH_DRIVERS is not set -CONFIG_NETDEVICES=y -CONFIG_MII=m -CONFIG_NET_CORE=y -# CONFIG_BONDING is not set -CONFIG_DUMMY=m -CONFIG_WIREGUARD=m -# CONFIG_WIREGUARD_DEBUG is not set -# CONFIG_EQUALIZER is not set -# CONFIG_NET_FC is not set -# CONFIG_IFB is not set -# CONFIG_NET_TEAM is not set -CONFIG_MACVLAN=m -CONFIG_MACVTAP=m -# CONFIG_IPVLAN is not set -CONFIG_VXLAN=m -# CONFIG_GENEVE is not set -# CONFIG_BAREUDP is not set -# CONFIG_GTP is not set -CONFIG_PFCP=m -# CONFIG_AMT is not set -# CONFIG_MACSEC is not set -CONFIG_NETCONSOLE=m -CONFIG_NETCONSOLE_DYNAMIC=y -# CONFIG_NETCONSOLE_EXTENDED_LOG is not set -CONFIG_NETPOLL=y -CONFIG_NET_POLL_CONTROLLER=y -CONFIG_TUN=m -CONFIG_TAP=m -# CONFIG_TUN_VNET_CROSS_LE is not set -CONFIG_VETH=m -CONFIG_VIRTIO_NET=m -# CONFIG_NLMON is not set -CONFIG_NETKIT=y -# CONFIG_ARCNET is not set -# CONFIG_ATM_DRIVERS is not set -CONFIG_ETHERNET=y -# CONFIG_NET_VENDOR_3COM is not set -# CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_AGERE is not set -# CONFIG_NET_VENDOR_ALACRITECH is not set -# CONFIG_NET_VENDOR_ALTEON is not set -# CONFIG_ALTERA_TSE is not set -CONFIG_NET_VENDOR_AMAZON=y -# CONFIG_ENA_ETHERNET is not set -# CONFIG_NET_VENDOR_AMD is not set -# CONFIG_NET_VENDOR_AQUANTIA is not set -# CONFIG_NET_VENDOR_ARC is not set -CONFIG_NET_VENDOR_ASIX=y -# CONFIG_NET_VENDOR_ATHEROS is not set -# CONFIG_CX_ECAT is not set -CONFIG_NET_VENDOR_BROADCOM=y -# CONFIG_B44 is not set -# CONFIG_BCMGENET is not set -# CONFIG_BNX2 is not set -# CONFIG_CNIC is not set -CONFIG_TIGON3=m -CONFIG_TIGON3_HWMON=y -# CONFIG_BNX2X is not set -# CONFIG_SYSTEMPORT is not set -# CONFIG_BNXT is not set -# CONFIG_NET_VENDOR_CADENCE is not set -# CONFIG_NET_VENDOR_CAVIUM is not set -# CONFIG_NET_VENDOR_CHELSIO is not set -# CONFIG_NET_VENDOR_CISCO is not set -# CONFIG_NET_VENDOR_CORTINA is not set -# CONFIG_NET_VENDOR_DAVICOM is not set -# CONFIG_DNET is not set -# CONFIG_NET_VENDOR_DEC is not set -# CONFIG_NET_VENDOR_DLINK is not set -# CONFIG_NET_VENDOR_EMULEX is not set -# CONFIG_NET_VENDOR_ENGLEDER is not set -# CONFIG_NET_VENDOR_EZCHIP is not set -# CONFIG_NET_VENDOR_FUNGIBLE is not set -# CONFIG_NET_VENDOR_GOOGLE is not set -# CONFIG_NET_VENDOR_HUAWEI is not set -# CONFIG_NET_VENDOR_I825XX is not set -CONFIG_NET_VENDOR_INTEL=y -# CONFIG_E100 is not set -# CONFIG_E1000 is not set -CONFIG_E1000E=m -CONFIG_E1000E_HWTS=y -CONFIG_IGB=m -CONFIG_IGB_HWMON=y -CONFIG_IGB_DCA=y -CONFIG_IGBVF=m -# CONFIG_IXGBE is not set -# CONFIG_IXGBEVF is not set -# CONFIG_I40E is not set -# CONFIG_I40EVF is not set -# CONFIG_ICE is not set -# CONFIG_FM10K is not set -# CONFIG_IGC is not set -# CONFIG_IDPF is not set -# CONFIG_JME is not set -# CONFIG_NET_VENDOR_ADI is not set -# CONFIG_NET_VENDOR_LITEX is not set -# CONFIG_NET_VENDOR_MARVELL is not set -# CONFIG_NET_VENDOR_MELLANOX is not set -# CONFIG_NET_VENDOR_META is not set -# CONFIG_NET_VENDOR_MICREL is not set -# CONFIG_NET_VENDOR_MICROCHIP is not set -# CONFIG_NET_VENDOR_MICROSEMI is not set -# CONFIG_NET_VENDOR_MICROSOFT is not set -# CONFIG_NET_VENDOR_MYRI is not set -# CONFIG_FEALNX is not set -# CONFIG_NET_VENDOR_NI is not set -# CONFIG_NET_VENDOR_NATSEMI is not set -# CONFIG_NET_VENDOR_NETERION is not set -# CONFIG_NET_VENDOR_NETRONOME is not set -CONFIG_NET_VENDOR_NVIDIA=y -CONFIG_FORCEDETH=m -# CONFIG_NET_VENDOR_OKI is not set -# CONFIG_ETHOC is not set -# CONFIG_NET_VENDOR_PACKET_ENGINES is not set -# CONFIG_NET_VENDOR_PENSANDO is not set -# CONFIG_NET_VENDOR_QLOGIC is not set -# CONFIG_NET_VENDOR_BROCADE is not set -# CONFIG_NET_VENDOR_QUALCOMM is not set -# CONFIG_NET_VENDOR_RDC is not set -# CONFIG_NET_VENDOR_REALTEK is not set -# CONFIG_NET_VENDOR_RENESAS is not set -# CONFIG_NET_VENDOR_ROCKER is not set -# CONFIG_NET_VENDOR_SAMSUNG is not set -# CONFIG_NET_VENDOR_SEEQ is not set -# CONFIG_NET_VENDOR_SILAN is not set -# CONFIG_NET_VENDOR_SIS is not set -# CONFIG_NET_VENDOR_SOLARFLARE is not set -# CONFIG_NET_VENDOR_SMSC is not set -# CONFIG_NET_VENDOR_SOCIONEXT is not set -# CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SUN is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set -# CONFIG_NET_VENDOR_TEHUTI is not set -# CONFIG_NET_VENDOR_TI is not set -# CONFIG_NET_VENDOR_VERTEXCOM is not set -# CONFIG_NET_VENDOR_VIA is not set -# CONFIG_NET_VENDOR_WANGXUN is not set -# CONFIG_NET_VENDOR_WIZNET is not set -# CONFIG_NET_VENDOR_XILINX is not set -# CONFIG_FDDI is not set -# CONFIG_HIPPI is not set -CONFIG_PHYLINK=m -CONFIG_PHYLIB=m -CONFIG_SWPHY=y -# CONFIG_LED_TRIGGER_PHY is not set -CONFIG_FIXED_PHY=m -# CONFIG_SFP is not set - -# -# MII PHY device drivers -# -# CONFIG_AIR_EN8811H_PHY is not set -# CONFIG_AMD_PHY is not set -# CONFIG_ADIN_PHY is not set -# CONFIG_ADIN1100_PHY is not set -# CONFIG_AQUANTIA_PHY is not set -CONFIG_AX88796B_PHY=m -CONFIG_BROADCOM_PHY=m -# CONFIG_BCM54140_PHY is not set -# CONFIG_BCM7XXX_PHY is not set -# CONFIG_BCM84881_PHY is not set -# CONFIG_BCM87XX_PHY is not set -CONFIG_BCM_NET_PHYLIB=m -# CONFIG_CICADA_PHY is not set -# CONFIG_CORTINA_PHY is not set -# CONFIG_DAVICOM_PHY is not set -# CONFIG_ICPLUS_PHY is not set -# CONFIG_LXT_PHY is not set -# CONFIG_INTEL_XWAY_PHY is not set -# CONFIG_LSI_ET1011C_PHY is not set -# CONFIG_MARVELL_PHY is not set -# CONFIG_MARVELL_10G_PHY is not set -# CONFIG_MARVELL_88Q2XXX_PHY is not set -# CONFIG_MARVELL_88X2222_PHY is not set -# CONFIG_MAXLINEAR_GPHY is not set -# CONFIG_MEDIATEK_GE_PHY is not set -# CONFIG_MICREL_PHY is not set -# CONFIG_MICROCHIP_T1S_PHY is not set -# CONFIG_MICROCHIP_PHY is not set -# CONFIG_MICROCHIP_T1_PHY is not set -# CONFIG_MICROSEMI_PHY is not set -# CONFIG_MOTORCOMM_PHY is not set -# CONFIG_NATIONAL_PHY is not set -# CONFIG_NXP_CBTX_PHY is not set -# CONFIG_NXP_C45_TJA11XX_PHY is not set -# CONFIG_NXP_TJA11XX_PHY is not set -# CONFIG_NCN26000_PHY is not set -# CONFIG_QCA83XX_PHY is not set -# CONFIG_QCA808X_PHY is not set -# CONFIG_QSEMI_PHY is not set -# CONFIG_REALTEK_PHY is not set -# CONFIG_RENESAS_PHY is not set -# CONFIG_ROCKCHIP_PHY is not set -# CONFIG_SMSC_PHY is not set -# CONFIG_STE10XP is not set -# CONFIG_TERANETICS_PHY is not set -# CONFIG_DP83822_PHY is not set -# CONFIG_DP83TC811_PHY is not set -# CONFIG_DP83848_PHY is not set -# CONFIG_DP83867_PHY is not set -# CONFIG_DP83869_PHY is not set -# CONFIG_DP83TD510_PHY is not set -# CONFIG_DP83TG720_PHY is not set -# CONFIG_VITESSE_PHY is not set -# CONFIG_XILINX_GMII2RGMII is not set -# CONFIG_MICREL_KS8995MA is not set - -# -# MCTP Device Drivers -# -# CONFIG_MCTP_SERIAL is not set -# CONFIG_MCTP_TRANSPORT_I3C is not set -# end of MCTP Device Drivers - -CONFIG_MDIO_DEVICE=m -CONFIG_MDIO_BUS=m -CONFIG_FWNODE_MDIO=m -CONFIG_ACPI_MDIO=m -CONFIG_MDIO_DEVRES=m -# CONFIG_MDIO_BITBANG is not set -# CONFIG_MDIO_BCM_UNIMAC is not set -# CONFIG_MDIO_MVUSB is not set -# CONFIG_MDIO_THUNDER is not set - -# -# MDIO Multiplexers -# - -# -# PCS device drivers -# -# CONFIG_PCS_XPCS is not set -# end of PCS device drivers - -CONFIG_PPP=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_MPPE=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPPOATM=m -CONFIG_PPPOE=m -# CONFIG_PPPOE_HASH_BITS_1 is not set -# CONFIG_PPPOE_HASH_BITS_2 is not set -CONFIG_PPPOE_HASH_BITS_4=y -# CONFIG_PPPOE_HASH_BITS_8 is not set -CONFIG_PPPOE_HASH_BITS=4 -CONFIG_PPPOL2TP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -# CONFIG_SLIP is not set -CONFIG_SLHC=m - -# -# Host-side USB support is needed for USB Network Adapter support -# -CONFIG_USB_NET_DRIVERS=m -# CONFIG_USB_CATC is not set -# CONFIG_USB_KAWETH is not set -# CONFIG_USB_PEGASUS is not set -# CONFIG_USB_RTL8150 is not set -# CONFIG_USB_RTL8152 is not set -# CONFIG_USB_LAN78XX is not set -CONFIG_USB_USBNET=m -CONFIG_USB_NET_AX8817X=m -CONFIG_USB_NET_AX88179_178A=m -CONFIG_USB_NET_CDCETHER=m -CONFIG_USB_NET_CDC_EEM=m -CONFIG_USB_NET_CDC_NCM=m -# CONFIG_USB_NET_HUAWEI_CDC_NCM is not set -# CONFIG_USB_NET_CDC_MBIM is not set -# CONFIG_USB_NET_DM9601 is not set -# CONFIG_USB_NET_SR9700 is not set -# CONFIG_USB_NET_SR9800 is not set -# CONFIG_USB_NET_SMSC75XX is not set -# CONFIG_USB_NET_SMSC95XX is not set -# CONFIG_USB_NET_GL620A is not set -# CONFIG_USB_NET_NET1080 is not set -# CONFIG_USB_NET_PLUSB is not set -# CONFIG_USB_NET_MCS7830 is not set -CONFIG_USB_NET_RNDIS_HOST=m -# CONFIG_USB_NET_CDC_SUBSET is not set -# CONFIG_USB_NET_ZAURUS is not set -# CONFIG_USB_NET_CX82310_ETH is not set -# CONFIG_USB_NET_KALMIA is not set -# CONFIG_USB_NET_QMI_WWAN is not set -# CONFIG_USB_HSO is not set -# CONFIG_USB_NET_INT51X1 is not set -# CONFIG_USB_IPHETH is not set -# CONFIG_USB_SIERRA_NET is not set -# CONFIG_USB_VL600 is not set -# CONFIG_USB_NET_CH9200 is not set -# CONFIG_USB_NET_AQC111 is not set -CONFIG_USB_RTL8153_ECM=m -CONFIG_WLAN=y -# CONFIG_WLAN_VENDOR_ADMTEK is not set -CONFIG_ATH_COMMON=m -CONFIG_WLAN_VENDOR_ATH=y -# CONFIG_ATH_DEBUG is not set -# CONFIG_ATH5K is not set -# CONFIG_ATH5K_PCI is not set -# CONFIG_ATH9K is not set -# CONFIG_ATH9K_HTC is not set -# CONFIG_CARL9170 is not set -# CONFIG_ATH6KL is not set -# CONFIG_AR5523 is not set -# CONFIG_WIL6210 is not set -CONFIG_ATH10K=m -CONFIG_ATH10K_CE=y -CONFIG_ATH10K_PCI=m -# CONFIG_ATH10K_SDIO is not set -# CONFIG_ATH10K_USB is not set -# CONFIG_ATH10K_DEBUG is not set -# CONFIG_ATH10K_DEBUGFS is not set -CONFIG_ATH10K_LEDS=y -# CONFIG_ATH10K_TRACING is not set -# CONFIG_WCN36XX is not set -# CONFIG_ATH11K is not set -# CONFIG_ATH12K is not set -# CONFIG_WLAN_VENDOR_ATMEL is not set -# CONFIG_WLAN_VENDOR_BROADCOM is not set -CONFIG_WLAN_VENDOR_INTEL=y -# CONFIG_IPW2100 is not set -# CONFIG_IPW2200 is not set -# CONFIG_IWL4965 is not set -# CONFIG_IWL3945 is not set -CONFIG_IWLWIFI=m -CONFIG_IWLWIFI_LEDS=y -CONFIG_IWLDVM=m -CONFIG_IWLMVM=m -CONFIG_IWLWIFI_OPMODE_MODULAR=y - -# -# Debugging Options -# -# CONFIG_IWLWIFI_DEBUG is not set -# CONFIG_IWLWIFI_DEVICE_TRACING is not set -# end of Debugging Options - -# CONFIG_WLAN_VENDOR_INTERSIL is not set -# CONFIG_WLAN_VENDOR_MARVELL is not set -CONFIG_WLAN_VENDOR_MEDIATEK=y -# CONFIG_MT7601U is not set -CONFIG_MT76_CORE=m -CONFIG_MT76_LEDS=y -CONFIG_MT76_USB=m -CONFIG_MT76x02_LIB=m -CONFIG_MT76x02_USB=m -CONFIG_MT76_CONNAC_LIB=m -CONFIG_MT792x_LIB=m -# CONFIG_MT76x0U is not set -# CONFIG_MT76x0E is not set -CONFIG_MT76x2_COMMON=m -# CONFIG_MT76x2E is not set -CONFIG_MT76x2U=m -# CONFIG_MT7603E is not set -# CONFIG_MT7615E is not set -# CONFIG_MT7663U is not set -# CONFIG_MT7663S is not set -# CONFIG_MT7915E is not set -CONFIG_MT7921_COMMON=m -CONFIG_MT7921E=m -# CONFIG_MT7921S is not set -# CONFIG_MT7921U is not set -# CONFIG_MT7996E is not set -# CONFIG_MT7925E is not set -# CONFIG_MT7925U is not set -# CONFIG_WLAN_VENDOR_MICROCHIP is not set -# CONFIG_WLAN_VENDOR_PURELIFI is not set -# CONFIG_WLAN_VENDOR_RALINK is not set -CONFIG_WLAN_VENDOR_REALTEK=y -# CONFIG_RTL8180 is not set -# CONFIG_RTL8187 is not set -CONFIG_RTL_CARDS=m -# CONFIG_RTL8192CE is not set -CONFIG_RTL8192SE=m -# CONFIG_RTL8192DE is not set -# CONFIG_RTL8723AE is not set -# CONFIG_RTL8723BE is not set -# CONFIG_RTL8188EE is not set -# CONFIG_RTL8192EE is not set -# CONFIG_RTL8821AE is not set -# CONFIG_RTL8192CU is not set -# CONFIG_RTL8192DU is not set -CONFIG_RTLWIFI=m -CONFIG_RTLWIFI_PCI=m -# CONFIG_RTLWIFI_DEBUG is not set -# CONFIG_RTL8XXXU is not set -# CONFIG_RTW88 is not set -# CONFIG_RTW89 is not set -# CONFIG_WLAN_VENDOR_RSI is not set -# CONFIG_WLAN_VENDOR_SILABS is not set -# CONFIG_WLAN_VENDOR_ST is not set -# CONFIG_WLAN_VENDOR_TI is not set -# CONFIG_WLAN_VENDOR_ZYDAS is not set -# CONFIG_WLAN_VENDOR_QUANTENNA is not set -# CONFIG_MAC80211_HWSIM is not set -# CONFIG_VIRT_WIFI is not set -# CONFIG_WAN is not set - -# -# Wireless WAN -# -# CONFIG_WWAN is not set -# end of Wireless WAN - -# CONFIG_VMXNET3 is not set -# CONFIG_FUJITSU_ES is not set -CONFIG_USB4_NET=m -# CONFIG_NETDEVSIM is not set -CONFIG_NET_FAILOVER=m -# CONFIG_ISDN is not set - -# -# Input device support -# -CONFIG_INPUT=y -CONFIG_INPUT_LEDS=m -CONFIG_INPUT_FF_MEMLESS=y -CONFIG_INPUT_SPARSEKMAP=m -# CONFIG_INPUT_MATRIXKMAP is not set -CONFIG_INPUT_VIVALDIFMAP=y - -# -# Userland interfaces -# -CONFIG_INPUT_MOUSEDEV=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -# CONFIG_INPUT_JOYDEV is not set -CONFIG_INPUT_EVDEV=y -# CONFIG_INPUT_EVBUG is not set - -# -# Input Device Drivers -# -CONFIG_INPUT_KEYBOARD=y -# CONFIG_KEYBOARD_ADP5588 is not set -# CONFIG_KEYBOARD_ADP5589 is not set -# CONFIG_KEYBOARD_APPLESPI is not set -CONFIG_KEYBOARD_ATKBD=y -# CONFIG_KEYBOARD_QT1050 is not set -# CONFIG_KEYBOARD_QT1070 is not set -# CONFIG_KEYBOARD_QT2160 is not set -# CONFIG_KEYBOARD_DLINK_DIR685 is not set -# CONFIG_KEYBOARD_LKKBD is not set -# CONFIG_KEYBOARD_TCA6416 is not set -# CONFIG_KEYBOARD_TCA8418 is not set -# CONFIG_KEYBOARD_LM8323 is not set -# CONFIG_KEYBOARD_LM8333 is not set -# CONFIG_KEYBOARD_MAX7359 is not set -# CONFIG_KEYBOARD_MCS is not set -# CONFIG_KEYBOARD_MPR121 is not set -# CONFIG_KEYBOARD_NEWTON is not set -# CONFIG_KEYBOARD_OPENCORES is not set -# CONFIG_KEYBOARD_SAMSUNG is not set -# CONFIG_KEYBOARD_STOWAWAY is not set -# CONFIG_KEYBOARD_SUNKBD is not set -# CONFIG_KEYBOARD_TM2_TOUCHKEY is not set -CONFIG_KEYBOARD_XTKBD=m -# CONFIG_KEYBOARD_CYPRESS_SF is not set -CONFIG_INPUT_MOUSE=y -CONFIG_MOUSE_PS2=y -CONFIG_MOUSE_PS2_ALPS=y -CONFIG_MOUSE_PS2_BYD=y -CONFIG_MOUSE_PS2_LOGIPS2PP=y -CONFIG_MOUSE_PS2_SYNAPTICS=y -CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y -CONFIG_MOUSE_PS2_CYPRESS=y -CONFIG_MOUSE_PS2_LIFEBOOK=y -CONFIG_MOUSE_PS2_TRACKPOINT=y -# CONFIG_MOUSE_PS2_ELANTECH is not set -# CONFIG_MOUSE_PS2_SENTELIC is not set -# CONFIG_MOUSE_PS2_TOUCHKIT is not set -CONFIG_MOUSE_PS2_FOCALTECH=y -# CONFIG_MOUSE_PS2_VMMOUSE is not set -CONFIG_MOUSE_PS2_SMBUS=y -# CONFIG_MOUSE_SERIAL is not set -# CONFIG_MOUSE_APPLETOUCH is not set -# CONFIG_MOUSE_BCM5974 is not set -# CONFIG_MOUSE_CYAPA is not set -# CONFIG_MOUSE_ELAN_I2C is not set -# CONFIG_MOUSE_VSXXXAA is not set -CONFIG_MOUSE_SYNAPTICS_I2C=m -CONFIG_MOUSE_SYNAPTICS_USB=m -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TABLET is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -CONFIG_INPUT_MISC=y -# CONFIG_INPUT_AD714X is not set -# CONFIG_INPUT_BMA150 is not set -# CONFIG_INPUT_E3X0_BUTTON is not set -# CONFIG_INPUT_PCSPKR is not set -# CONFIG_INPUT_MMA8450 is not set -# CONFIG_INPUT_APANEL is not set -# CONFIG_INPUT_ATLAS_BTNS is not set -# CONFIG_INPUT_ATI_REMOTE2 is not set -# CONFIG_INPUT_KEYSPAN_REMOTE is not set -# CONFIG_INPUT_KXTJ9 is not set -# CONFIG_INPUT_POWERMATE is not set -# CONFIG_INPUT_YEALINK is not set -# CONFIG_INPUT_CM109 is not set -CONFIG_INPUT_UINPUT=m -# CONFIG_INPUT_PCF8574 is not set -# CONFIG_INPUT_PWM_BEEPER is not set -# CONFIG_INPUT_PWM_VIBRA is not set -# CONFIG_INPUT_DA7280_HAPTICS is not set -# CONFIG_INPUT_ADXL34X is not set -# CONFIG_INPUT_IMS_PCU is not set -# CONFIG_INPUT_IQS269A is not set -# CONFIG_INPUT_IQS626A is not set -# CONFIG_INPUT_IQS7222 is not set -# CONFIG_INPUT_CMA3000 is not set -# CONFIG_INPUT_IDEAPAD_SLIDEBAR is not set -# CONFIG_INPUT_DRV2665_HAPTICS is not set -# CONFIG_INPUT_DRV2667_HAPTICS is not set -CONFIG_RMI4_CORE=m -# CONFIG_RMI4_I2C is not set -# CONFIG_RMI4_SPI is not set -# CONFIG_RMI4_SMB is not set -CONFIG_RMI4_F03=y -CONFIG_RMI4_F03_SERIO=m -CONFIG_RMI4_2D_SENSOR=y -CONFIG_RMI4_F11=y -CONFIG_RMI4_F12=y -CONFIG_RMI4_F30=y -# CONFIG_RMI4_F34 is not set -# CONFIG_RMI4_F3A is not set -# CONFIG_RMI4_F54 is not set -# CONFIG_RMI4_F55 is not set - -# -# Hardware I/O ports -# -CONFIG_SERIO=y -CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y -CONFIG_SERIO_I8042=y -CONFIG_SERIO_SERPORT=m -CONFIG_SERIO_CT82C710=m -CONFIG_SERIO_PCIPS2=m -CONFIG_SERIO_LIBPS2=y -# CONFIG_SERIO_RAW is not set -# CONFIG_SERIO_ALTERA_PS2 is not set -# CONFIG_SERIO_PS2MULT is not set -# CONFIG_SERIO_ARC_PS2 is not set -# CONFIG_USERIO is not set -# CONFIG_GAMEPORT is not set -# end of Hardware I/O ports -# end of Input device support - -# -# Character devices -# -CONFIG_TTY=y -CONFIG_VT=y -CONFIG_CONSOLE_TRANSLATIONS=y -CONFIG_VT_CONSOLE=y -CONFIG_VT_CONSOLE_SLEEP=y -CONFIG_VT_HW_CONSOLE_BINDING=y -CONFIG_UNIX98_PTYS=y -# CONFIG_LEGACY_PTYS is not set -# CONFIG_LEGACY_TIOCSTI is not set -# CONFIG_LDISC_AUTOLOAD is not set - -# -# Serial drivers -# -CONFIG_SERIAL_EARLYCON=y -CONFIG_SERIAL_8250=y -# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set -CONFIG_SERIAL_8250_PNP=y -# CONFIG_SERIAL_8250_16550A_VARIANTS is not set -# CONFIG_SERIAL_8250_FINTEK is not set -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_DMA=y -CONFIG_SERIAL_8250_PCILIB=y -CONFIG_SERIAL_8250_PCI=y -CONFIG_SERIAL_8250_EXAR=y -CONFIG_SERIAL_8250_NR_UARTS=4 -CONFIG_SERIAL_8250_RUNTIME_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set -# CONFIG_SERIAL_8250_PCI1XXXX is not set -CONFIG_SERIAL_8250_DWLIB=y -# CONFIG_SERIAL_8250_DW is not set -# CONFIG_SERIAL_8250_RT288X is not set -CONFIG_SERIAL_8250_LPSS=y -CONFIG_SERIAL_8250_MID=y -CONFIG_SERIAL_8250_PERICOM=y - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_MAX3100=m -CONFIG_SERIAL_MAX310X=y -# CONFIG_SERIAL_UARTLITE is not set -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -# CONFIG_SERIAL_JSM is not set -# CONFIG_SERIAL_LANTIQ is not set -# CONFIG_SERIAL_SCCNXP is not set -# CONFIG_SERIAL_SC16IS7XX is not set -# CONFIG_SERIAL_ALTERA_JTAGUART is not set -# CONFIG_SERIAL_ALTERA_UART is not set -# CONFIG_SERIAL_ARC is not set -# CONFIG_SERIAL_RP2 is not set -# CONFIG_SERIAL_FSL_LPUART is not set -# CONFIG_SERIAL_FSL_LINFLEXUART is not set -# CONFIG_SERIAL_SPRD is not set -# end of Serial drivers - -# CONFIG_SERIAL_NONSTANDARD is not set -# CONFIG_N_GSM is not set -# CONFIG_NOZOMI is not set -# CONFIG_NULL_TTY is not set -CONFIG_HVC_DRIVER=y -# CONFIG_SERIAL_DEV_BUS is not set -CONFIG_VIRTIO_CONSOLE=m -# CONFIG_IPMI_HANDLER is not set -CONFIG_HW_RANDOM=y -# CONFIG_HW_RANDOM_TIMERIOMEM is not set -CONFIG_HW_RANDOM_INTEL=m -CONFIG_HW_RANDOM_AMD=m -# CONFIG_HW_RANDOM_BA431 is not set -# CONFIG_HW_RANDOM_VIA is not set -CONFIG_HW_RANDOM_VIRTIO=m -# CONFIG_HW_RANDOM_XIPHERA is not set -# CONFIG_APPLICOM is not set -CONFIG_MWAVE=m -# CONFIG_DEVMEM is not set -CONFIG_NVRAM=m -# CONFIG_DEVPORT is not set -CONFIG_HPET=y -CONFIG_HPET_MMAP=y -CONFIG_HPET_MMAP_DEFAULT=y -# CONFIG_HANGCHECK_TIMER is not set -CONFIG_TCG_TPM=m -CONFIG_TCG_TPM2_HMAC=y -CONFIG_HW_RANDOM_TPM=y -CONFIG_TCG_TIS_CORE=m -CONFIG_TCG_TIS=m -CONFIG_TCG_TIS_SPI=m -CONFIG_TCG_TIS_SPI_CR50=y -CONFIG_TCG_TIS_I2C=m -CONFIG_TCG_TIS_I2C_CR50=m -CONFIG_TCG_TIS_I2C_ATMEL=m -CONFIG_TCG_TIS_I2C_INFINEON=m -CONFIG_TCG_TIS_I2C_NUVOTON=m -CONFIG_TCG_NSC=m -CONFIG_TCG_ATMEL=m -CONFIG_TCG_INFINEON=m -CONFIG_TCG_CRB=m -CONFIG_TCG_VTPM_PROXY=m -CONFIG_TCG_TIS_ST33ZP24=m -CONFIG_TCG_TIS_ST33ZP24_I2C=m -CONFIG_TCG_TIS_ST33ZP24_SPI=m -# CONFIG_TELCLOCK is not set -# CONFIG_XILLYBUS is not set -# CONFIG_XILLYUSB is not set -# end of Character devices - -# -# I2C support -# -CONFIG_I2C=y -CONFIG_ACPI_I2C_OPREGION=y -CONFIG_I2C_BOARDINFO=y -CONFIG_I2C_COMPAT=y -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_MUX=m - -# -# Multiplexer I2C Chip support -# -# CONFIG_I2C_MUX_LTC4306 is not set -# CONFIG_I2C_MUX_PCA9541 is not set -# CONFIG_I2C_MUX_REG is not set -# CONFIG_I2C_MUX_MLXCPLD is not set -# end of Multiplexer I2C Chip support - -CONFIG_I2C_HELPER_AUTO=y -CONFIG_I2C_SMBUS=m -CONFIG_I2C_ALGOBIT=m - -# -# I2C Hardware Bus support -# - -# -# PC SMBus host controller drivers -# -CONFIG_I2C_CCGX_UCSI=m -# CONFIG_I2C_ALI1535 is not set -# CONFIG_I2C_ALI1563 is not set -# CONFIG_I2C_ALI15X3 is not set -# CONFIG_I2C_AMD756 is not set -# CONFIG_I2C_AMD8111 is not set -# CONFIG_I2C_AMD_MP2 is not set -CONFIG_I2C_I801=m -CONFIG_I2C_ISCH=m -CONFIG_I2C_ISMT=m -CONFIG_I2C_PIIX4=m -# CONFIG_I2C_NFORCE2 is not set -CONFIG_I2C_NVIDIA_GPU=m -# CONFIG_I2C_SIS5595 is not set -# CONFIG_I2C_SIS630 is not set -# CONFIG_I2C_SIS96X is not set -# CONFIG_I2C_VIA is not set -# CONFIG_I2C_VIAPRO is not set -# CONFIG_I2C_ZHAOXIN is not set - -# -# ACPI drivers -# -# CONFIG_I2C_SCMI is not set - -# -# I2C system bus drivers (mostly embedded / system-on-chip) -# -# CONFIG_I2C_DESIGNWARE_PLATFORM is not set -# CONFIG_I2C_DESIGNWARE_PCI is not set -# CONFIG_I2C_EMEV2 is not set -# CONFIG_I2C_OCORES is not set -# CONFIG_I2C_PCA_PLATFORM is not set -# CONFIG_I2C_SIMTEC is not set -# CONFIG_I2C_XILINX is not set - -# -# External I2C/SMBus adapter drivers -# -# CONFIG_I2C_DIOLAN_U2C is not set -# CONFIG_I2C_CP2615 is not set -# CONFIG_I2C_PCI1XXXX is not set -# CONFIG_I2C_ROBOTFUZZ_OSIF is not set -# CONFIG_I2C_TAOS_EVM is not set -# CONFIG_I2C_TINY_USB is not set - -# -# Other I2C/SMBus bus drivers -# -# CONFIG_I2C_MLXCPLD is not set -CONFIG_I2C_VIRTIO=m -# end of I2C Hardware Bus support - -# CONFIG_I2C_STUB is not set -# CONFIG_I2C_SLAVE is not set -# CONFIG_I2C_DEBUG_CORE is not set -# CONFIG_I2C_DEBUG_ALGO is not set -# CONFIG_I2C_DEBUG_BUS is not set -# end of I2C support - -CONFIG_I3C=m -CONFIG_CDNS_I3C_MASTER=m -CONFIG_DW_I3C_MASTER=m -# CONFIG_SVC_I3C_MASTER is not set -# CONFIG_MIPI_I3C_HCI is not set -CONFIG_SPI=y -# CONFIG_SPI_DEBUG is not set -CONFIG_SPI_MASTER=y -# CONFIG_SPI_MEM is not set - -# -# SPI Master Controller Drivers -# -# CONFIG_SPI_ALTERA is not set -# CONFIG_SPI_AXI_SPI_ENGINE is not set -# CONFIG_SPI_BITBANG is not set -# CONFIG_SPI_CADENCE is not set -# CONFIG_SPI_CH341 is not set -# CONFIG_SPI_DESIGNWARE is not set -# CONFIG_SPI_MICROCHIP_CORE is not set -# CONFIG_SPI_MICROCHIP_CORE_QSPI is not set -# CONFIG_SPI_LANTIQ_SSC is not set -# CONFIG_SPI_PCI1XXXX is not set -# CONFIG_SPI_PXA2XX is not set -# CONFIG_SPI_SC18IS602 is not set -# CONFIG_SPI_SIFIVE is not set -# CONFIG_SPI_MXIC is not set -# CONFIG_SPI_XCOMM is not set -# CONFIG_SPI_XILINX is not set -# CONFIG_SPI_AMD is not set - -# -# SPI Multiplexer support -# -# CONFIG_SPI_MUX is not set - -# -# SPI Protocol Masters -# -# CONFIG_SPI_SPIDEV is not set -# CONFIG_SPI_LOOPBACK_TEST is not set -# CONFIG_SPI_TLE62X0 is not set -# CONFIG_SPI_SLAVE is not set -CONFIG_SPI_DYNAMIC=y -CONFIG_SPMI=m -# CONFIG_SPMI_HISI3670 is not set -# CONFIG_HSI is not set -CONFIG_PPS=m -# CONFIG_PPS_DEBUG is not set - -# -# PPS clients support -# -# CONFIG_PPS_CLIENT_KTIMER is not set -# CONFIG_PPS_CLIENT_LDISC is not set -# CONFIG_PPS_CLIENT_GPIO is not set - -# -# PPS generators support -# - -# -# PTP clock support -# -CONFIG_PTP_1588_CLOCK=m -CONFIG_PTP_1588_CLOCK_OPTIONAL=m - -# -# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. -# -CONFIG_PTP_1588_CLOCK_KVM=m -# CONFIG_PTP_1588_CLOCK_IDT82P33 is not set -# CONFIG_PTP_1588_CLOCK_IDTCM is not set -# CONFIG_PTP_1588_CLOCK_FC3W is not set -# CONFIG_PTP_1588_CLOCK_MOCK is not set -# CONFIG_PTP_1588_CLOCK_VMW is not set -# end of PTP clock support - -CONFIG_PINCTRL=y -# CONFIG_DEBUG_PINCTRL is not set -# CONFIG_PINCTRL_AMD is not set -# CONFIG_PINCTRL_CY8C95X0 is not set -# CONFIG_PINCTRL_MCP23S08 is not set -# CONFIG_PINCTRL_SX150X is not set - -# -# Intel pinctrl drivers -# -# CONFIG_PINCTRL_BAYTRAIL is not set -# CONFIG_PINCTRL_CHERRYVIEW is not set -# CONFIG_PINCTRL_LYNXPOINT is not set -# CONFIG_PINCTRL_INTEL_PLATFORM is not set -# CONFIG_PINCTRL_ALDERLAKE is not set -# CONFIG_PINCTRL_BROXTON is not set -# CONFIG_PINCTRL_CANNONLAKE is not set -# CONFIG_PINCTRL_CEDARFORK is not set -# CONFIG_PINCTRL_DENVERTON is not set -# CONFIG_PINCTRL_ELKHARTLAKE is not set -# CONFIG_PINCTRL_EMMITSBURG is not set -# CONFIG_PINCTRL_GEMINILAKE is not set -# CONFIG_PINCTRL_ICELAKE is not set -# CONFIG_PINCTRL_JASPERLAKE is not set -# CONFIG_PINCTRL_LAKEFIELD is not set -# CONFIG_PINCTRL_LEWISBURG is not set -# CONFIG_PINCTRL_METEORLAKE is not set -# CONFIG_PINCTRL_METEORPOINT is not set -# CONFIG_PINCTRL_SUNRISEPOINT is not set -# CONFIG_PINCTRL_TIGERLAKE is not set -# end of Intel pinctrl drivers - -# -# Renesas pinctrl drivers -# -# end of Renesas pinctrl drivers - -# CONFIG_GPIOLIB is not set -# CONFIG_W1 is not set -# CONFIG_POWER_RESET is not set -# CONFIG_POWER_SEQUENCING is not set -CONFIG_POWER_SUPPLY=y -# CONFIG_POWER_SUPPLY_DEBUG is not set -CONFIG_POWER_SUPPLY_HWMON=y -# CONFIG_IP5XXX_POWER is not set -# CONFIG_TEST_POWER is not set -# CONFIG_CHARGER_ADP5061 is not set -# CONFIG_BATTERY_CW2015 is not set -# CONFIG_BATTERY_DS2780 is not set -# CONFIG_BATTERY_DS2781 is not set -# CONFIG_BATTERY_DS2782 is not set -# CONFIG_BATTERY_SAMSUNG_SDI is not set -# CONFIG_BATTERY_SBS is not set -# CONFIG_CHARGER_SBS is not set -# CONFIG_BATTERY_BQ27XXX is not set -# CONFIG_BATTERY_MAX17042 is not set -# CONFIG_BATTERY_MAX1720X is not set -# CONFIG_CHARGER_ISP1704 is not set -# CONFIG_CHARGER_MAX8903 is not set -# CONFIG_CHARGER_LP8727 is not set -# CONFIG_CHARGER_LTC4162L is not set -# CONFIG_CHARGER_MAX77976 is not set -# CONFIG_CHARGER_BQ2415X is not set -# CONFIG_BATTERY_GAUGE_LTC2941 is not set -# CONFIG_BATTERY_GOLDFISH is not set -# CONFIG_BATTERY_RT5033 is not set -# CONFIG_CHARGER_BD99954 is not set -# CONFIG_BATTERY_UG3105 is not set -# CONFIG_FUEL_GAUGE_MM8013 is not set -CONFIG_HWMON=y -CONFIG_HWMON_VID=m -# CONFIG_HWMON_DEBUG_CHIP is not set - -# -# Native drivers -# -# CONFIG_SENSORS_ABITUGURU is not set -# CONFIG_SENSORS_ABITUGURU3 is not set -# CONFIG_SENSORS_AD7314 is not set -# CONFIG_SENSORS_AD7414 is not set -# CONFIG_SENSORS_AD7418 is not set -# CONFIG_SENSORS_ADM1025 is not set -# CONFIG_SENSORS_ADM1026 is not set -# CONFIG_SENSORS_ADM1029 is not set -# CONFIG_SENSORS_ADM1031 is not set -# CONFIG_SENSORS_ADM1177 is not set -# CONFIG_SENSORS_ADM9240 is not set -# CONFIG_SENSORS_ADT7310 is not set -# CONFIG_SENSORS_ADT7410 is not set -# CONFIG_SENSORS_ADT7411 is not set -# CONFIG_SENSORS_ADT7462 is not set -# CONFIG_SENSORS_ADT7470 is not set -# CONFIG_SENSORS_ADT7475 is not set -# CONFIG_SENSORS_AHT10 is not set -# CONFIG_SENSORS_AQUACOMPUTER_D5NEXT is not set -# CONFIG_SENSORS_AS370 is not set -# CONFIG_SENSORS_ASC7621 is not set -# CONFIG_SENSORS_ASUS_ROG_RYUJIN is not set -# CONFIG_SENSORS_AXI_FAN_CONTROL is not set -# CONFIG_SENSORS_K8TEMP is not set -CONFIG_SENSORS_K10TEMP=m -CONFIG_SENSORS_FAM15H_POWER=m -# CONFIG_SENSORS_APPLESMC is not set -# CONFIG_SENSORS_ASB100 is not set -# CONFIG_SENSORS_ATXP1 is not set -# CONFIG_SENSORS_CHIPCAP2 is not set -# CONFIG_SENSORS_CORSAIR_CPRO is not set -# CONFIG_SENSORS_CORSAIR_PSU is not set -CONFIG_SENSORS_DRIVETEMP=m -# CONFIG_SENSORS_DS620 is not set -# CONFIG_SENSORS_DS1621 is not set -# CONFIG_SENSORS_DELL_SMM is not set -# CONFIG_SENSORS_I5K_AMB is not set -# CONFIG_SENSORS_F71805F is not set -# CONFIG_SENSORS_F71882FG is not set -# CONFIG_SENSORS_F75375S is not set -# CONFIG_SENSORS_FSCHMD is not set -# CONFIG_SENSORS_FTSTEUTATES is not set -# CONFIG_SENSORS_GIGABYTE_WATERFORCE is not set -# CONFIG_SENSORS_GL518SM is not set -# CONFIG_SENSORS_GL520SM is not set -# CONFIG_SENSORS_G760A is not set -# CONFIG_SENSORS_G762 is not set -# CONFIG_SENSORS_HIH6130 is not set -# CONFIG_SENSORS_HS3001 is not set -# CONFIG_SENSORS_I5500 is not set -CONFIG_SENSORS_CORETEMP=m -# CONFIG_SENSORS_IT87 is not set -# CONFIG_SENSORS_JC42 is not set -# CONFIG_SENSORS_POWERZ is not set -# CONFIG_SENSORS_POWR1220 is not set -CONFIG_SENSORS_LENOVO_EC=m -# CONFIG_SENSORS_LINEAGE is not set -# CONFIG_SENSORS_LTC2945 is not set -# CONFIG_SENSORS_LTC2947_I2C is not set -# CONFIG_SENSORS_LTC2947_SPI is not set -# CONFIG_SENSORS_LTC2990 is not set -# CONFIG_SENSORS_LTC2991 is not set -# CONFIG_SENSORS_LTC4151 is not set -# CONFIG_SENSORS_LTC4215 is not set -# CONFIG_SENSORS_LTC4222 is not set -# CONFIG_SENSORS_LTC4245 is not set -# CONFIG_SENSORS_LTC4260 is not set -# CONFIG_SENSORS_LTC4261 is not set -# CONFIG_SENSORS_LTC4282 is not set -# CONFIG_SENSORS_MAX1111 is not set -# CONFIG_SENSORS_MAX127 is not set -# CONFIG_SENSORS_MAX16065 is not set -# CONFIG_SENSORS_MAX1619 is not set -# CONFIG_SENSORS_MAX1668 is not set -# CONFIG_SENSORS_MAX197 is not set -# CONFIG_SENSORS_MAX31722 is not set -# CONFIG_SENSORS_MAX31730 is not set -# CONFIG_SENSORS_MAX31760 is not set -# CONFIG_MAX31827 is not set -# CONFIG_SENSORS_MAX6620 is not set -# CONFIG_SENSORS_MAX6621 is not set -# CONFIG_SENSORS_MAX6639 is not set -# CONFIG_SENSORS_MAX6650 is not set -# CONFIG_SENSORS_MAX6697 is not set -# CONFIG_SENSORS_MAX31790 is not set -# CONFIG_SENSORS_MC34VR500 is not set -# CONFIG_SENSORS_MCP3021 is not set -# CONFIG_SENSORS_TC654 is not set -# CONFIG_SENSORS_TPS23861 is not set -# CONFIG_SENSORS_MR75203 is not set -# CONFIG_SENSORS_ADCXX is not set -# CONFIG_SENSORS_LM63 is not set -# CONFIG_SENSORS_LM70 is not set -# CONFIG_SENSORS_LM73 is not set -# CONFIG_SENSORS_LM75 is not set -# CONFIG_SENSORS_LM77 is not set -# CONFIG_SENSORS_LM78 is not set -# CONFIG_SENSORS_LM80 is not set -# CONFIG_SENSORS_LM83 is not set -# CONFIG_SENSORS_LM85 is not set -# CONFIG_SENSORS_LM87 is not set -# CONFIG_SENSORS_LM90 is not set -# CONFIG_SENSORS_LM92 is not set -# CONFIG_SENSORS_LM93 is not set -# CONFIG_SENSORS_LM95234 is not set -# CONFIG_SENSORS_LM95241 is not set -# CONFIG_SENSORS_LM95245 is not set -# CONFIG_SENSORS_PC87360 is not set -# CONFIG_SENSORS_PC87427 is not set -# CONFIG_SENSORS_NCT6683 is not set -CONFIG_SENSORS_NCT6775_CORE=m -CONFIG_SENSORS_NCT6775=m -CONFIG_SENSORS_NCT6775_I2C=m -# CONFIG_SENSORS_NCT7802 is not set -# CONFIG_SENSORS_NCT7904 is not set -# CONFIG_SENSORS_NPCM7XX is not set -# CONFIG_SENSORS_NZXT_KRAKEN2 is not set -# CONFIG_SENSORS_NZXT_KRAKEN3 is not set -# CONFIG_SENSORS_NZXT_SMART2 is not set -# CONFIG_SENSORS_OCC_P8_I2C is not set -# CONFIG_SENSORS_OXP is not set -# CONFIG_SENSORS_PCF8591 is not set -# CONFIG_PMBUS is not set -# CONFIG_SENSORS_PT5161L is not set -CONFIG_SENSORS_PWM_FAN=m -# CONFIG_SENSORS_SBTSI is not set -# CONFIG_SENSORS_SBRMI is not set -# CONFIG_SENSORS_SHT21 is not set -# CONFIG_SENSORS_SHT3x is not set -# CONFIG_SENSORS_SHT4x is not set -# CONFIG_SENSORS_SHTC1 is not set -# CONFIG_SENSORS_SIS5595 is not set -# CONFIG_SENSORS_DME1737 is not set -# CONFIG_SENSORS_EMC1403 is not set -# CONFIG_SENSORS_EMC2103 is not set -# CONFIG_SENSORS_EMC2305 is not set -# CONFIG_SENSORS_EMC6W201 is not set -# CONFIG_SENSORS_SMSC47M1 is not set -# CONFIG_SENSORS_SMSC47M192 is not set -# CONFIG_SENSORS_SMSC47B397 is not set -# CONFIG_SENSORS_SCH5627 is not set -# CONFIG_SENSORS_SCH5636 is not set -# CONFIG_SENSORS_STTS751 is not set -# CONFIG_SENSORS_ADC128D818 is not set -# CONFIG_SENSORS_ADS7828 is not set -# CONFIG_SENSORS_ADS7871 is not set -# CONFIG_SENSORS_AMC6821 is not set -# CONFIG_SENSORS_INA209 is not set -# CONFIG_SENSORS_INA2XX is not set -# CONFIG_SENSORS_INA238 is not set -# CONFIG_SENSORS_INA3221 is not set -# CONFIG_SENSORS_SPD5118 is not set -# CONFIG_SENSORS_TC74 is not set -# CONFIG_SENSORS_THMC50 is not set -# CONFIG_SENSORS_TMP102 is not set -# CONFIG_SENSORS_TMP103 is not set -# CONFIG_SENSORS_TMP108 is not set -# CONFIG_SENSORS_TMP401 is not set -# CONFIG_SENSORS_TMP421 is not set -# CONFIG_SENSORS_TMP464 is not set -# CONFIG_SENSORS_TMP513 is not set -# CONFIG_SENSORS_VIA_CPUTEMP is not set -# CONFIG_SENSORS_VIA686A is not set -# CONFIG_SENSORS_VT1211 is not set -# CONFIG_SENSORS_VT8231 is not set -# CONFIG_SENSORS_W83773G is not set -# CONFIG_SENSORS_W83781D is not set -# CONFIG_SENSORS_W83791D is not set -# CONFIG_SENSORS_W83792D is not set -# CONFIG_SENSORS_W83793 is not set -# CONFIG_SENSORS_W83795 is not set -# CONFIG_SENSORS_W83L785TS is not set -# CONFIG_SENSORS_W83L786NG is not set -# CONFIG_SENSORS_W83627HF is not set -# CONFIG_SENSORS_W83627EHF is not set -# CONFIG_SENSORS_XGENE is not set - -# -# ACPI drivers -# -CONFIG_SENSORS_ACPI_POWER=m -# CONFIG_SENSORS_ATK0110 is not set -# CONFIG_SENSORS_ASUS_WMI is not set -# CONFIG_SENSORS_ASUS_EC is not set -# CONFIG_SENSORS_HP_WMI is not set -CONFIG_THERMAL=y -CONFIG_THERMAL_NETLINK=y -# CONFIG_THERMAL_STATISTICS is not set -# CONFIG_THERMAL_DEBUGFS is not set -CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0 -CONFIG_THERMAL_HWMON=y -# CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE is not set -CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE=y -# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set -# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set -CONFIG_THERMAL_GOV_FAIR_SHARE=y -CONFIG_THERMAL_GOV_STEP_WISE=y -# CONFIG_THERMAL_GOV_BANG_BANG is not set -CONFIG_THERMAL_GOV_USER_SPACE=y -CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y -# CONFIG_DEVFREQ_THERMAL is not set -# CONFIG_THERMAL_EMULATION is not set - -# -# Intel thermal drivers -# -# CONFIG_INTEL_POWERCLAMP is not set -CONFIG_X86_THERMAL_VECTOR=y -CONFIG_INTEL_TCC=y -CONFIG_X86_PKG_TEMP_THERMAL=m -CONFIG_INTEL_SOC_DTS_IOSF_CORE=m -# CONFIG_INTEL_SOC_DTS_THERMAL is not set - -# -# ACPI INT340X thermal drivers -# -CONFIG_INT340X_THERMAL=m -CONFIG_ACPI_THERMAL_REL=m -CONFIG_INT3406_THERMAL=m -CONFIG_PROC_THERMAL_MMIO_RAPL=m -# end of ACPI INT340X thermal drivers - -CONFIG_INTEL_PCH_THERMAL=m -CONFIG_INTEL_TCC_COOLING=m -CONFIG_INTEL_HFI_THERMAL=y -# end of Intel thermal drivers - -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_CORE=y -# CONFIG_WATCHDOG_NOWAYOUT is not set -CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y -CONFIG_WATCHDOG_OPEN_TIMEOUT=0 -CONFIG_WATCHDOG_SYSFS=y -# CONFIG_WATCHDOG_HRTIMER_PRETIMEOUT is not set - -# -# Watchdog Pretimeout Governors -# -# CONFIG_WATCHDOG_PRETIMEOUT_GOV is not set - -# -# Watchdog Device Drivers -# -# CONFIG_SOFT_WATCHDOG is not set -# CONFIG_LENOVO_SE10_WDT is not set -# CONFIG_WDAT_WDT is not set -# CONFIG_XILINX_WATCHDOG is not set -# CONFIG_ZIIRAVE_WATCHDOG is not set -# CONFIG_CADENCE_WATCHDOG is not set -# CONFIG_DW_WATCHDOG is not set -# CONFIG_MAX63XX_WATCHDOG is not set -# CONFIG_ACQUIRE_WDT is not set -# CONFIG_ADVANTECH_WDT is not set -# CONFIG_ADVANTECH_EC_WDT is not set -# CONFIG_ALIM1535_WDT is not set -# CONFIG_ALIM7101_WDT is not set -# CONFIG_EBC_C384_WDT is not set -# CONFIG_EXAR_WDT is not set -# CONFIG_F71808E_WDT is not set -# CONFIG_SP5100_TCO is not set -# CONFIG_SBC_FITPC2_WATCHDOG is not set -# CONFIG_EUROTECH_WDT is not set -# CONFIG_IB700_WDT is not set -# CONFIG_IBMASR is not set -# CONFIG_WAFER_WDT is not set -# CONFIG_I6300ESB_WDT is not set -# CONFIG_IE6XX_WDT is not set -CONFIG_ITCO_WDT=m -CONFIG_ITCO_VENDOR_SUPPORT=y -CONFIG_IT8712F_WDT=m -CONFIG_IT87_WDT=m -# CONFIG_HP_WATCHDOG is not set -# CONFIG_SC1200_WDT is not set -# CONFIG_PC87413_WDT is not set -# CONFIG_NV_TCO is not set -# CONFIG_60XX_WDT is not set -# CONFIG_CPU5_WDT is not set -# CONFIG_SMSC_SCH311X_WDT is not set -# CONFIG_SMSC37B787_WDT is not set -# CONFIG_TQMX86_WDT is not set -# CONFIG_VIA_WDT is not set -# CONFIG_W83627HF_WDT is not set -# CONFIG_W83877F_WDT is not set -# CONFIG_W83977F_WDT is not set -# CONFIG_MACHZ_WDT is not set -# CONFIG_SBC_EPX_C3_WATCHDOG is not set -# CONFIG_NI903X_WDT is not set -# CONFIG_NIC7018_WDT is not set - -# -# PCI-based Watchdog Cards -# -# CONFIG_PCIPCWATCHDOG is not set -# CONFIG_WDTPCI is not set - -# -# USB-based Watchdog Cards -# -# CONFIG_USBPCWATCHDOG is not set -CONFIG_SSB_POSSIBLE=y -CONFIG_SSB=m -CONFIG_SSB_SPROM=y -CONFIG_SSB_PCIHOST_POSSIBLE=y -CONFIG_SSB_PCIHOST=y -CONFIG_SSB_SDIOHOST_POSSIBLE=y -# CONFIG_SSB_SDIOHOST is not set -CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y -CONFIG_SSB_DRIVER_PCICORE=y -CONFIG_BCMA_POSSIBLE=y -# CONFIG_BCMA is not set - -# -# Multifunction device drivers -# -CONFIG_MFD_CORE=m -# CONFIG_MFD_AS3711 is not set -# CONFIG_MFD_SMPRO is not set -# CONFIG_PMIC_ADP5520 is not set -# CONFIG_MFD_BCM590XX is not set -# CONFIG_MFD_BD9571MWV is not set -# CONFIG_MFD_AXP20X_I2C is not set -# CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_MADERA is not set -# CONFIG_PMIC_DA903X is not set -# CONFIG_MFD_DA9052_SPI is not set -# CONFIG_MFD_DA9052_I2C is not set -# CONFIG_MFD_DA9055 is not set -# CONFIG_MFD_DA9062 is not set -# CONFIG_MFD_DA9063 is not set -# CONFIG_MFD_DA9150 is not set -# CONFIG_MFD_DLN2 is not set -# CONFIG_MFD_MC13XXX_SPI is not set -# CONFIG_MFD_MC13XXX_I2C is not set -# CONFIG_MFD_MP2629 is not set -# CONFIG_MFD_INTEL_QUARK_I2C_GPIO is not set -CONFIG_LPC_ICH=m -CONFIG_LPC_SCH=m -CONFIG_MFD_INTEL_LPSS=m -CONFIG_MFD_INTEL_LPSS_ACPI=m -CONFIG_MFD_INTEL_LPSS_PCI=m -# CONFIG_MFD_INTEL_PMC_BXT is not set -# CONFIG_MFD_IQS62X is not set -# CONFIG_MFD_JANZ_CMODIO is not set -# CONFIG_MFD_KEMPLD is not set -# CONFIG_MFD_88PM800 is not set -# CONFIG_MFD_88PM805 is not set -# CONFIG_MFD_88PM860X is not set -# CONFIG_MFD_MAX14577 is not set -# CONFIG_MFD_MAX77541 is not set -# CONFIG_MFD_MAX77693 is not set -# CONFIG_MFD_MAX77843 is not set -# CONFIG_MFD_MAX8907 is not set -# CONFIG_MFD_MAX8925 is not set -# CONFIG_MFD_MAX8997 is not set -# CONFIG_MFD_MAX8998 is not set -# CONFIG_MFD_MT6360 is not set -# CONFIG_MFD_MT6370 is not set -# CONFIG_MFD_MT6397 is not set -# CONFIG_MFD_MENF21BMC is not set -# CONFIG_MFD_OCELOT is not set -# CONFIG_EZX_PCAP is not set -# CONFIG_MFD_VIPERBOARD is not set -# CONFIG_MFD_RETU is not set -# CONFIG_MFD_PCF50633 is not set -# CONFIG_MFD_SY7636A is not set -# CONFIG_MFD_RDC321X is not set -# CONFIG_MFD_RT4831 is not set -# CONFIG_MFD_RT5033 is not set -# CONFIG_MFD_RT5120 is not set -# CONFIG_MFD_RC5T583 is not set -# CONFIG_MFD_SI476X_CORE is not set -# CONFIG_MFD_SM501 is not set -# CONFIG_MFD_SKY81452 is not set -# CONFIG_MFD_SYSCON is not set -# CONFIG_MFD_LP3943 is not set -# CONFIG_MFD_LP8788 is not set -# CONFIG_MFD_TI_LMU is not set -# CONFIG_MFD_PALMAS is not set -# CONFIG_TPS6105X is not set -# CONFIG_TPS6507X is not set -# CONFIG_MFD_TPS65086 is not set -# CONFIG_MFD_TPS65090 is not set -# CONFIG_MFD_TI_LP873X is not set -# CONFIG_MFD_TPS6586X is not set -# CONFIG_MFD_TPS65912_I2C is not set -# CONFIG_MFD_TPS65912_SPI is not set -# CONFIG_MFD_TPS6594_I2C is not set -# CONFIG_MFD_TPS6594_SPI is not set -# CONFIG_TWL4030_CORE is not set -# CONFIG_TWL6040_CORE is not set -# CONFIG_MFD_WL1273_CORE is not set -# CONFIG_MFD_LM3533 is not set -# CONFIG_MFD_TQMX86 is not set -# CONFIG_MFD_VX855 is not set -# CONFIG_MFD_ARIZONA_I2C is not set -# CONFIG_MFD_ARIZONA_SPI is not set -# CONFIG_MFD_WM8400 is not set -# CONFIG_MFD_WM831X_I2C is not set -# CONFIG_MFD_WM831X_SPI is not set -# CONFIG_MFD_WM8350_I2C is not set -# CONFIG_MFD_WM8994 is not set -# CONFIG_MFD_ATC260X_I2C is not set -# CONFIG_MFD_CS40L50_I2C is not set -# CONFIG_MFD_CS40L50_SPI is not set -# CONFIG_MFD_INTEL_M10_BMC_SPI is not set -# end of Multifunction device drivers - -# CONFIG_REGULATOR is not set -# CONFIG_RC_CORE is not set -CONFIG_CEC_CORE=m - -# -# CEC support -# -# CONFIG_MEDIA_CEC_SUPPORT is not set -# end of CEC support - -CONFIG_MEDIA_SUPPORT=m -CONFIG_MEDIA_SUPPORT_FILTER=y -CONFIG_MEDIA_SUBDRV_AUTOSELECT=y - -# -# Media device types -# -CONFIG_MEDIA_CAMERA_SUPPORT=y -# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set -# CONFIG_MEDIA_DIGITAL_TV_SUPPORT is not set -# CONFIG_MEDIA_RADIO_SUPPORT is not set -# CONFIG_MEDIA_SDR_SUPPORT is not set -# CONFIG_MEDIA_PLATFORM_SUPPORT is not set -# CONFIG_MEDIA_TEST_SUPPORT is not set -# end of Media device types - -CONFIG_VIDEO_DEV=m -CONFIG_MEDIA_CONTROLLER=y - -# -# Video4Linux options -# -CONFIG_VIDEO_V4L2_I2C=y -CONFIG_VIDEO_V4L2_SUBDEV_API=y -# CONFIG_VIDEO_ADV_DEBUG is not set -# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set -CONFIG_V4L2_FWNODE=m -CONFIG_V4L2_ASYNC=m -# end of Video4Linux options - -# -# Media controller options -# -# end of Media controller options - -# -# Media drivers -# - -# -# Drivers filtered as selected at 'Filter media drivers' -# - -# -# Media drivers -# -CONFIG_MEDIA_USB_SUPPORT=y - -# -# Webcam devices -# -# CONFIG_USB_GSPCA is not set -# CONFIG_USB_PWC is not set -# CONFIG_USB_S2255 is not set -# CONFIG_VIDEO_USBTV is not set -CONFIG_USB_VIDEO_CLASS=m -CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y - -# -# Webcam, TV (analog/digital) USB devices -# -# CONFIG_VIDEO_EM28XX is not set -# CONFIG_MEDIA_PCI_SUPPORT is not set -CONFIG_UVC_COMMON=m -CONFIG_VIDEOBUF2_CORE=m -CONFIG_VIDEOBUF2_V4L2=m -CONFIG_VIDEOBUF2_MEMOPS=m -CONFIG_VIDEOBUF2_VMALLOC=m -# end of Media drivers - -CONFIG_MEDIA_HIDE_ANCILLARY_SUBDRV=y - -# -# Media ancillary drivers -# -CONFIG_VIDEO_CAMERA_SENSOR=y -# CONFIG_VIDEO_ALVIUM_CSI2 is not set -# CONFIG_VIDEO_AR0521 is not set -# CONFIG_VIDEO_GC0308 is not set -# CONFIG_VIDEO_GC05A2 is not set -# CONFIG_VIDEO_GC08A3 is not set -# CONFIG_VIDEO_GC2145 is not set -# CONFIG_VIDEO_HI556 is not set -# CONFIG_VIDEO_HI846 is not set -# CONFIG_VIDEO_HI847 is not set -# CONFIG_VIDEO_IMX208 is not set -# CONFIG_VIDEO_IMX219 is not set -# CONFIG_VIDEO_IMX258 is not set -# CONFIG_VIDEO_IMX274 is not set -# CONFIG_VIDEO_IMX283 is not set -# CONFIG_VIDEO_IMX290 is not set -# CONFIG_VIDEO_IMX296 is not set -# CONFIG_VIDEO_IMX319 is not set -# CONFIG_VIDEO_IMX355 is not set -# CONFIG_VIDEO_MT9M001 is not set -# CONFIG_VIDEO_MT9M111 is not set -# CONFIG_VIDEO_MT9M114 is not set -# CONFIG_VIDEO_MT9P031 is not set -# CONFIG_VIDEO_MT9T112 is not set -# CONFIG_VIDEO_MT9V011 is not set -# CONFIG_VIDEO_MT9V032 is not set -# CONFIG_VIDEO_MT9V111 is not set -# CONFIG_VIDEO_OG01A1B is not set -# CONFIG_VIDEO_OV01A10 is not set -# CONFIG_VIDEO_OV02A10 is not set -# CONFIG_VIDEO_OV08D10 is not set -# CONFIG_VIDEO_OV08X40 is not set -# CONFIG_VIDEO_OV13858 is not set -# CONFIG_VIDEO_OV13B10 is not set -# CONFIG_VIDEO_OV2640 is not set -# CONFIG_VIDEO_OV2680 is not set -# CONFIG_VIDEO_OV2685 is not set -# CONFIG_VIDEO_OV2740 is not set -# CONFIG_VIDEO_OV5647 is not set -# CONFIG_VIDEO_OV5648 is not set -# CONFIG_VIDEO_OV5670 is not set -# CONFIG_VIDEO_OV5675 is not set -# CONFIG_VIDEO_OV5693 is not set -# CONFIG_VIDEO_OV5695 is not set -# CONFIG_VIDEO_OV64A40 is not set -# CONFIG_VIDEO_OV6650 is not set -# CONFIG_VIDEO_OV7251 is not set -# CONFIG_VIDEO_OV7640 is not set -# CONFIG_VIDEO_OV7670 is not set -# CONFIG_VIDEO_OV772X is not set -# CONFIG_VIDEO_OV7740 is not set -# CONFIG_VIDEO_OV8856 is not set -# CONFIG_VIDEO_OV8858 is not set -# CONFIG_VIDEO_OV8865 is not set -# CONFIG_VIDEO_OV9640 is not set -# CONFIG_VIDEO_OV9650 is not set -# CONFIG_VIDEO_OV9734 is not set -# CONFIG_VIDEO_RDACM20 is not set -# CONFIG_VIDEO_RDACM21 is not set -# CONFIG_VIDEO_RJ54N1 is not set -# CONFIG_VIDEO_S5C73M3 is not set -# CONFIG_VIDEO_S5K5BAF is not set -# CONFIG_VIDEO_S5K6A3 is not set -# CONFIG_VIDEO_CCS is not set -# CONFIG_VIDEO_ET8EK8 is not set - -# -# Camera ISPs -# -# CONFIG_VIDEO_THP7312 is not set -# end of Camera ISPs - -# -# Lens drivers -# -# CONFIG_VIDEO_AK7375 is not set -# CONFIG_VIDEO_DW9714 is not set -# CONFIG_VIDEO_DW9719 is not set -# CONFIG_VIDEO_DW9768 is not set -# CONFIG_VIDEO_DW9807_VCM is not set -# end of Lens drivers - -# -# Flash devices -# -# CONFIG_VIDEO_ADP1653 is not set -# CONFIG_VIDEO_LM3560 is not set -# CONFIG_VIDEO_LM3646 is not set -# end of Flash devices - -# -# audio, video and radio I2C drivers auto-selected by 'Autoselect ancillary drivers' -# - -# -# Video and audio decoders -# - -# -# Video serializers and deserializers -# -# end of Video serializers and deserializers - -# -# SPI I2C drivers auto-selected by 'Autoselect ancillary drivers' -# - -# -# Media SPI Adapters -# -# CONFIG_VIDEO_GS1662 is not set -# end of Media SPI Adapters -# end of Media ancillary drivers - -# -# Graphics support -# -CONFIG_APERTURE_HELPERS=y -CONFIG_SCREEN_INFO=y -CONFIG_VIDEO=y -# CONFIG_AUXDISPLAY is not set -CONFIG_AGP=y -CONFIG_AGP_AMD64=m -CONFIG_AGP_INTEL=m -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_VIA is not set -CONFIG_INTEL_GTT=m -CONFIG_VGA_SWITCHEROO=y -CONFIG_DRM=y -CONFIG_DRM_MIPI_DSI=y -# CONFIG_DRM_DEBUG_MM is not set -CONFIG_DRM_KMS_HELPER=y -CONFIG_DRM_FBDEV_EMULATION=y -CONFIG_DRM_FBDEV_OVERALLOC=100 -# CONFIG_DRM_LOAD_EDID_FIRMWARE is not set -CONFIG_DRM_DISPLAY_HELPER=m -CONFIG_DRM_DISPLAY_DP_AUX_CEC=y -CONFIG_DRM_DISPLAY_DP_AUX_CHARDEV=y -CONFIG_DRM_DISPLAY_DP_HELPER=y -CONFIG_DRM_DISPLAY_DP_TUNNEL=y -CONFIG_DRM_DISPLAY_HDCP_HELPER=y -CONFIG_DRM_DISPLAY_HDMI_HELPER=y -CONFIG_DRM_TTM=m -CONFIG_DRM_EXEC=m -CONFIG_DRM_GPUVM=m -CONFIG_DRM_BUDDY=m -CONFIG_DRM_VRAM_HELPER=m -CONFIG_DRM_TTM_HELPER=m -CONFIG_DRM_GEM_SHMEM_HELPER=y -CONFIG_DRM_SUBALLOC_HELPER=m -CONFIG_DRM_SCHED=m - -# -# I2C encoder or helper chips -# -# CONFIG_DRM_I2C_CH7006 is not set -# CONFIG_DRM_I2C_SIL164 is not set -# CONFIG_DRM_I2C_NXP_TDA998X is not set -# CONFIG_DRM_I2C_NXP_TDA9950 is not set -# end of I2C encoder or helper chips - -# -# ARM devices -# -# end of ARM devices - -# CONFIG_DRM_RADEON is not set -CONFIG_DRM_AMDGPU=m -CONFIG_DRM_AMDGPU_SI=y -CONFIG_DRM_AMDGPU_CIK=y -CONFIG_DRM_AMDGPU_USERPTR=y -# CONFIG_DRM_AMD_ISP is not set - -# -# ACP (Audio CoProcessor) Configuration -# -# CONFIG_DRM_AMD_ACP is not set -# end of ACP (Audio CoProcessor) Configuration - -# -# Display Engine Configuration -# -CONFIG_DRM_AMD_DC=y -CONFIG_DRM_AMD_DC_FP=y -CONFIG_DRM_AMD_DC_SI=y -CONFIG_DRM_AMD_SECURE_DISPLAY=y -# end of Display Engine Configuration - -CONFIG_HSA_AMD=y -CONFIG_HSA_AMD_SVM=y -CONFIG_DRM_NOUVEAU=m -CONFIG_NOUVEAU_DEBUG=5 -CONFIG_NOUVEAU_DEBUG_DEFAULT=3 -# CONFIG_NOUVEAU_DEBUG_MMU is not set -# CONFIG_NOUVEAU_DEBUG_PUSH is not set -CONFIG_DRM_NOUVEAU_BACKLIGHT=y -# CONFIG_DRM_NOUVEAU_GSP_DEFAULT is not set -CONFIG_DRM_I915=m -CONFIG_DRM_I915_FORCE_PROBE="" -CONFIG_DRM_I915_CAPTURE_ERROR=y -CONFIG_DRM_I915_COMPRESS_ERROR=y -CONFIG_DRM_I915_USERPTR=y -CONFIG_DRM_I915_GVT_KVMGT=m -CONFIG_DRM_I915_DP_TUNNEL=y -CONFIG_DRM_I915_REQUEST_TIMEOUT=20000 -CONFIG_DRM_I915_FENCE_TIMEOUT=10000 -CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND=250 -CONFIG_DRM_I915_HEARTBEAT_INTERVAL=2500 -CONFIG_DRM_I915_PREEMPT_TIMEOUT=640 -CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE=7500 -CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT=8000 -CONFIG_DRM_I915_STOP_TIMEOUT=100 -CONFIG_DRM_I915_TIMESLICE_DURATION=1 -CONFIG_DRM_I915_GVT=y -# CONFIG_DRM_XE is not set -CONFIG_DRM_VGEM=m -CONFIG_DRM_VKMS=m -# CONFIG_DRM_VMWGFX is not set -# CONFIG_DRM_GMA500 is not set -CONFIG_DRM_UDL=m -# CONFIG_DRM_AST is not set -# CONFIG_DRM_MGAG200 is not set -CONFIG_DRM_QXL=m -CONFIG_DRM_VIRTIO_GPU=m -CONFIG_DRM_VIRTIO_GPU_KMS=y -CONFIG_DRM_PANEL=y - -# -# Display Panels -# -# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set -# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set -# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set -# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set -# end of Display Panels - -CONFIG_DRM_BRIDGE=y -CONFIG_DRM_PANEL_BRIDGE=y - -# -# Display Interface Bridges -# -# CONFIG_DRM_ANALOGIX_ANX78XX is not set -# end of Display Interface Bridges - -# CONFIG_DRM_ETNAVIV is not set -CONFIG_DRM_BOCHS=m -# CONFIG_DRM_CIRRUS_QEMU is not set -# CONFIG_DRM_GM12U320 is not set -# CONFIG_DRM_PANEL_MIPI_DBI is not set -CONFIG_DRM_SIMPLEDRM=y -# CONFIG_TINYDRM_HX8357D is not set -# CONFIG_TINYDRM_ILI9163 is not set -# CONFIG_TINYDRM_ILI9225 is not set -# CONFIG_TINYDRM_ILI9341 is not set -# CONFIG_TINYDRM_ILI9486 is not set -# CONFIG_TINYDRM_MI0283QT is not set -# CONFIG_TINYDRM_REPAPER is not set -# CONFIG_TINYDRM_ST7586 is not set -# CONFIG_TINYDRM_ST7735R is not set -# CONFIG_DRM_VBOXVIDEO is not set -CONFIG_DRM_GUD=m -# CONFIG_DRM_SSD130X is not set -CONFIG_DRM_PRIVACY_SCREEN=y -CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y - -# -# Frame buffer Devices -# -CONFIG_FB=y -# CONFIG_FB_CIRRUS is not set -# CONFIG_FB_PM2 is not set -# CONFIG_FB_CYBER2000 is not set -# CONFIG_FB_ARC is not set -# CONFIG_FB_ASILIANT is not set -# CONFIG_FB_IMSTT is not set -# CONFIG_FB_VGA16 is not set -# CONFIG_FB_UVESA is not set -# CONFIG_FB_VESA is not set -CONFIG_FB_EFI=y -# CONFIG_FB_N411 is not set -# CONFIG_FB_HGA is not set -# CONFIG_FB_OPENCORES is not set -# CONFIG_FB_S1D13XXX is not set -# CONFIG_FB_NVIDIA is not set -# CONFIG_FB_RIVA is not set -# CONFIG_FB_I740 is not set -# CONFIG_FB_MATROX is not set -# CONFIG_FB_RADEON is not set -# CONFIG_FB_ATY128 is not set -# CONFIG_FB_ATY is not set -# CONFIG_FB_S3 is not set -# CONFIG_FB_SAVAGE is not set -# CONFIG_FB_SIS is not set -# CONFIG_FB_NEOMAGIC is not set -# CONFIG_FB_KYRO is not set -# CONFIG_FB_3DFX is not set -# CONFIG_FB_VT8623 is not set -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_ARK is not set -# CONFIG_FB_PM3 is not set -# CONFIG_FB_CARMINE is not set -# CONFIG_FB_SMSCUFX is not set -# CONFIG_FB_IBM_GXT4500 is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FB_METRONOME is not set -# CONFIG_FB_MB862XX is not set -# CONFIG_FB_SM712 is not set -CONFIG_FB_CORE=y -CONFIG_FB_NOTIFY=y -CONFIG_FIRMWARE_EDID=y -# CONFIG_FB_DEVICE is not set -CONFIG_FB_CFB_FILLRECT=y -CONFIG_FB_CFB_COPYAREA=y -CONFIG_FB_CFB_IMAGEBLIT=y -CONFIG_FB_SYS_FILLRECT=y -CONFIG_FB_SYS_COPYAREA=y -CONFIG_FB_SYS_IMAGEBLIT=y -# CONFIG_FB_FOREIGN_ENDIAN is not set -CONFIG_FB_SYSMEM_FOPS=y -CONFIG_FB_DEFERRED_IO=y -CONFIG_FB_IOMEM_FOPS=y -CONFIG_FB_IOMEM_HELPERS=y -CONFIG_FB_SYSMEM_HELPERS=y -CONFIG_FB_SYSMEM_HELPERS_DEFERRED=y -# CONFIG_FB_MODE_HELPERS is not set -# CONFIG_FB_TILEBLITTING is not set -# end of Frame buffer Devices - -# -# Backlight & LCD device support -# -CONFIG_LCD_CLASS_DEVICE=m -# CONFIG_LCD_LTV350QV is not set -# CONFIG_LCD_ILI922X is not set -# CONFIG_LCD_ILI9320 is not set -# CONFIG_LCD_TDO24M is not set -# CONFIG_LCD_VGG2432A4 is not set -CONFIG_LCD_PLATFORM=m -# CONFIG_LCD_AMS369FG06 is not set -# CONFIG_LCD_LMS501KF03 is not set -# CONFIG_LCD_HX8357 is not set -# CONFIG_LCD_OTM3225A is not set -CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_BACKLIGHT_KTD2801 is not set -# CONFIG_BACKLIGHT_KTZ8866 is not set -CONFIG_BACKLIGHT_PWM=m -# CONFIG_BACKLIGHT_APPLE is not set -# CONFIG_BACKLIGHT_QCOM_WLED is not set -# CONFIG_BACKLIGHT_SAHARA is not set -# CONFIG_BACKLIGHT_ADP8860 is not set -# CONFIG_BACKLIGHT_ADP8870 is not set -# CONFIG_BACKLIGHT_LM3509 is not set -# CONFIG_BACKLIGHT_LM3630A is not set -CONFIG_BACKLIGHT_LM3639=m -# CONFIG_BACKLIGHT_LP855X is not set -# CONFIG_BACKLIGHT_MP3309C is not set -# CONFIG_BACKLIGHT_LV5207LP is not set -# CONFIG_BACKLIGHT_BD6107 is not set -# CONFIG_BACKLIGHT_ARCXCNN is not set -# end of Backlight & LCD device support - -CONFIG_HDMI=y - -# -# Console display driver support -# -CONFIG_VGA_CONSOLE=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_DUMMY_CONSOLE_COLUMNS=80 -CONFIG_DUMMY_CONSOLE_ROWS=25 -CONFIG_FRAMEBUFFER_CONSOLE=y -# CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION is not set -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y -# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set -CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER=y -# end of Console display driver support - -# CONFIG_LOGO is not set -# end of Graphics support - -# CONFIG_DRM_ACCEL is not set -CONFIG_SOUND=y -CONFIG_SND=m -CONFIG_SND_TIMER=m -CONFIG_SND_PCM=m -CONFIG_SND_PCM_ELD=y -CONFIG_SND_HWDEP=m -CONFIG_SND_SEQ_DEVICE=m -CONFIG_SND_RAWMIDI=m -CONFIG_SND_UMP=m -# CONFIG_SND_UMP_LEGACY_RAWMIDI is not set -CONFIG_SND_COMPRESS_OFFLOAD=m -CONFIG_SND_JACK=y -CONFIG_SND_JACK_INPUT_DEV=y -# CONFIG_SND_OSSEMUL is not set -CONFIG_SND_PCM_TIMER=y -CONFIG_SND_HRTIMER=m -CONFIG_SND_DYNAMIC_MINORS=y -CONFIG_SND_MAX_CARDS=6 -# CONFIG_SND_SUPPORT_OLD_API is not set -CONFIG_SND_PROC_FS=y -# CONFIG_SND_VERBOSE_PROCFS is not set -# CONFIG_SND_VERBOSE_PRINTK is not set -CONFIG_SND_CTL_FAST_LOOKUP=y -# CONFIG_SND_DEBUG is not set -CONFIG_SND_CTL_INPUT_VALIDATION=y -CONFIG_SND_VMASTER=y -CONFIG_SND_DMA_SGBUF=y -CONFIG_SND_CTL_LED=m -CONFIG_SND_SEQUENCER=m -# CONFIG_SND_SEQ_DUMMY is not set -CONFIG_SND_SEQ_HRTIMER_DEFAULT=y -CONFIG_SND_SEQ_MIDI_EVENT=m -CONFIG_SND_SEQ_MIDI=m -# CONFIG_SND_SEQ_UMP is not set -CONFIG_SND_SEQ_UMP_CLIENT=m -CONFIG_SND_MPU401_UART=m -CONFIG_SND_AC97_CODEC=m -CONFIG_SND_DRIVERS=y -# CONFIG_SND_PCSP is not set -# CONFIG_SND_DUMMY is not set -# CONFIG_SND_ALOOP is not set -# CONFIG_SND_PCMTEST is not set -# CONFIG_SND_VIRMIDI is not set -# CONFIG_SND_MTPAV is not set -# CONFIG_SND_SERIAL_U16550 is not set -CONFIG_SND_MPU401=m -# CONFIG_SND_AC97_POWER_SAVE is not set -CONFIG_SND_PCI=y -# CONFIG_SND_AD1889 is not set -# CONFIG_SND_ALS300 is not set -# CONFIG_SND_ALS4000 is not set -# CONFIG_SND_ALI5451 is not set -# CONFIG_SND_ASIHPI is not set -# CONFIG_SND_ATIIXP is not set -# CONFIG_SND_ATIIXP_MODEM is not set -# CONFIG_SND_AU8810 is not set -# CONFIG_SND_AU8820 is not set -# CONFIG_SND_AU8830 is not set -# CONFIG_SND_AW2 is not set -# CONFIG_SND_AZT3328 is not set -# CONFIG_SND_BT87X is not set -# CONFIG_SND_CA0106 is not set -# CONFIG_SND_CMIPCI is not set -# CONFIG_SND_OXYGEN is not set -# CONFIG_SND_CS4281 is not set -# CONFIG_SND_CS46XX is not set -# CONFIG_SND_CTXFI is not set -# CONFIG_SND_DARLA20 is not set -# CONFIG_SND_GINA20 is not set -# CONFIG_SND_LAYLA20 is not set -# CONFIG_SND_DARLA24 is not set -# CONFIG_SND_GINA24 is not set -# CONFIG_SND_LAYLA24 is not set -# CONFIG_SND_MONA is not set -# CONFIG_SND_MIA is not set -# CONFIG_SND_ECHO3G is not set -# CONFIG_SND_INDIGO is not set -# CONFIG_SND_INDIGOIO is not set -# CONFIG_SND_INDIGODJ is not set -# CONFIG_SND_INDIGOIOX is not set -# CONFIG_SND_INDIGODJX is not set -# CONFIG_SND_EMU10K1 is not set -# CONFIG_SND_EMU10K1X is not set -# CONFIG_SND_ENS1370 is not set -# CONFIG_SND_ENS1371 is not set -# CONFIG_SND_ES1938 is not set -# CONFIG_SND_ES1968 is not set -# CONFIG_SND_FM801 is not set -# CONFIG_SND_HDSP is not set -# CONFIG_SND_HDSPM is not set -# CONFIG_SND_ICE1712 is not set -# CONFIG_SND_ICE1724 is not set -CONFIG_SND_INTEL8X0=m -# CONFIG_SND_INTEL8X0M is not set -# CONFIG_SND_KORG1212 is not set -# CONFIG_SND_LOLA is not set -# CONFIG_SND_LX6464ES is not set -# CONFIG_SND_MAESTRO3 is not set -# CONFIG_SND_MIXART is not set -# CONFIG_SND_NM256 is not set -# CONFIG_SND_PCXHR is not set -# CONFIG_SND_RIPTIDE is not set -# CONFIG_SND_RME32 is not set -# CONFIG_SND_RME96 is not set -# CONFIG_SND_RME9652 is not set -# CONFIG_SND_SE6X is not set -# CONFIG_SND_SONICVIBES is not set -# CONFIG_SND_TRIDENT is not set -# CONFIG_SND_VIA82XX is not set -# CONFIG_SND_VIA82XX_MODEM is not set -# CONFIG_SND_VIRTUOSO is not set -# CONFIG_SND_VX222 is not set -# CONFIG_SND_YMFPCI is not set - -# -# HD-Audio -# -CONFIG_SND_HDA=m -CONFIG_SND_HDA_GENERIC_LEDS=y -CONFIG_SND_HDA_INTEL=m -CONFIG_SND_HDA_HWDEP=y -CONFIG_SND_HDA_RECONFIG=y -# CONFIG_SND_HDA_INPUT_BEEP is not set -CONFIG_SND_HDA_PATCH_LOADER=y -CONFIG_SND_HDA_SCODEC_COMPONENT=m -# CONFIG_SND_HDA_SCODEC_CS35L41_I2C is not set -# CONFIG_SND_HDA_SCODEC_CS35L41_SPI is not set -# CONFIG_SND_HDA_SCODEC_CS35L56_I2C is not set -# CONFIG_SND_HDA_SCODEC_CS35L56_SPI is not set -# CONFIG_SND_HDA_SCODEC_TAS2781_I2C is not set -CONFIG_SND_HDA_CODEC_REALTEK=m -CONFIG_SND_HDA_CODEC_ANALOG=m -CONFIG_SND_HDA_CODEC_SIGMATEL=m -CONFIG_SND_HDA_CODEC_VIA=m -CONFIG_SND_HDA_CODEC_HDMI=m -CONFIG_SND_HDA_CODEC_CIRRUS=m -# CONFIG_SND_HDA_CODEC_CS8409 is not set -CONFIG_SND_HDA_CODEC_CONEXANT=m -# CONFIG_SND_HDA_CODEC_SENARYTECH is not set -CONFIG_SND_HDA_CODEC_CA0110=m -CONFIG_SND_HDA_CODEC_CA0132=m -# CONFIG_SND_HDA_CODEC_CA0132_DSP is not set -CONFIG_SND_HDA_CODEC_CMEDIA=m -CONFIG_SND_HDA_CODEC_SI3054=m -CONFIG_SND_HDA_GENERIC=m -CONFIG_SND_HDA_POWER_SAVE_DEFAULT=60 -# CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM is not set -# CONFIG_SND_HDA_CTL_DEV_ID is not set -# end of HD-Audio - -CONFIG_SND_HDA_CORE=m -CONFIG_SND_HDA_COMPONENT=y -CONFIG_SND_HDA_I915=y -CONFIG_SND_HDA_EXT_CORE=m -CONFIG_SND_HDA_PREALLOC_SIZE=0 -CONFIG_SND_INTEL_NHLT=y -CONFIG_SND_INTEL_DSP_CONFIG=m -CONFIG_SND_INTEL_SOUNDWIRE_ACPI=m -CONFIG_SND_SPI=y -CONFIG_SND_USB=y -CONFIG_SND_USB_AUDIO=m -CONFIG_SND_USB_AUDIO_MIDI_V2=y -CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y -# CONFIG_SND_USB_UA101 is not set -# CONFIG_SND_USB_USX2Y is not set -# CONFIG_SND_USB_CAIAQ is not set -# CONFIG_SND_USB_US122L is not set -# CONFIG_SND_USB_6FIRE is not set -# CONFIG_SND_USB_HIFACE is not set -# CONFIG_SND_BCD2000 is not set -# CONFIG_SND_USB_POD is not set -# CONFIG_SND_USB_PODHD is not set -# CONFIG_SND_USB_TONEPORT is not set -# CONFIG_SND_USB_VARIAX is not set -CONFIG_SND_SOC=m -CONFIG_SND_SOC_COMPRESS=y -CONFIG_SND_SOC_TOPOLOGY=y -CONFIG_SND_SOC_ACPI=m -# CONFIG_SND_SOC_ADI is not set -# CONFIG_SND_SOC_AMD_ACP is not set -# CONFIG_SND_SOC_AMD_ACP3x is not set -# CONFIG_SND_SOC_AMD_RENOIR is not set -# CONFIG_SND_SOC_AMD_ACP5x is not set -# CONFIG_SND_SOC_AMD_ACP6x is not set -# CONFIG_SND_AMD_ACP_CONFIG is not set -# CONFIG_SND_SOC_AMD_ACP_COMMON is not set -# CONFIG_SND_SOC_AMD_RPL_ACP6x is not set -# CONFIG_SND_ATMEL_SOC is not set -# CONFIG_SND_BCM63XX_I2S_WHISTLER is not set -# CONFIG_SND_DESIGNWARE_I2S is not set - -# -# SoC Audio for Freescale CPUs -# - -# -# Common SoC Audio options for Freescale CPUs: -# -# CONFIG_SND_SOC_FSL_ASRC is not set -# CONFIG_SND_SOC_FSL_SAI is not set -# CONFIG_SND_SOC_FSL_AUDMIX is not set -# CONFIG_SND_SOC_FSL_SSI is not set -# CONFIG_SND_SOC_FSL_SPDIF is not set -# CONFIG_SND_SOC_FSL_ESAI is not set -# CONFIG_SND_SOC_FSL_MICFIL is not set -# CONFIG_SND_SOC_FSL_XCVR is not set -# CONFIG_SND_SOC_IMX_AUDMUX is not set -# end of SoC Audio for Freescale CPUs - -# CONFIG_SND_SOC_CHV3_I2S is not set -# CONFIG_SND_I2S_HI6210_I2S is not set -# CONFIG_SND_SOC_IMG is not set -# CONFIG_SND_SOC_INTEL_SST_TOPLEVEL is not set -CONFIG_SND_SOC_ACPI_INTEL_MATCH=m -# CONFIG_SND_SOC_INTEL_AVS is not set -CONFIG_SND_SOC_INTEL_MACH=y -# CONFIG_SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES is not set -CONFIG_SND_SOC_INTEL_HDA_DSP_COMMON=m -# CONFIG_SND_SOC_INTEL_CHT_BSW_RT5645_MACH is not set -# CONFIG_SND_SOC_INTEL_CHT_BSW_NAU8824_MACH is not set -# CONFIG_SND_SOC_INTEL_BYT_CHT_DA7213_MACH is not set -# CONFIG_SND_SOC_INTEL_GLK_DA7219_MAX98357A_MACH is not set -# CONFIG_SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH is not set -CONFIG_SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH=m -# CONFIG_SND_SOC_INTEL_SOF_RT5682_MACH is not set -# CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH is not set -# CONFIG_SND_SOC_INTEL_SOF_PCM512x_MACH is not set -# CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH is not set -# CONFIG_SND_SOC_INTEL_CML_LP_DA7219_MAX98357A_MACH is not set -# CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH is not set -# CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH is not set -# CONFIG_SND_SOC_INTEL_SOF_SSP_AMP_MACH is not set -# CONFIG_SND_SOC_INTEL_EHL_RT5660_MACH is not set -# CONFIG_SND_SOC_MTK_BTCVSD is not set -CONFIG_SND_SOC_SOF_TOPLEVEL=y -CONFIG_SND_SOC_SOF_PCI_DEV=m -CONFIG_SND_SOC_SOF_PCI=m -CONFIG_SND_SOC_SOF_ACPI=m -CONFIG_SND_SOC_SOF_ACPI_DEV=m -CONFIG_SND_SOC_SOF_DEBUG_PROBES=m -CONFIG_SND_SOC_SOF_CLIENT=m -CONFIG_SND_SOC_SOF=m -CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE=y -CONFIG_SND_SOC_SOF_IPC3=y -CONFIG_SND_SOC_SOF_IPC4=y -CONFIG_SND_SOC_SOF_AMD_TOPLEVEL=m -# CONFIG_SND_SOC_SOF_AMD_RENOIR is not set -# CONFIG_SND_SOC_SOF_AMD_VANGOGH is not set -# CONFIG_SND_SOC_SOF_AMD_REMBRANDT is not set -# CONFIG_SND_SOC_SOF_AMD_ACP63 is not set -CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL=y -CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC=m -CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP=m -CONFIG_SND_SOC_SOF_INTEL_COMMON=m -CONFIG_SND_SOC_SOF_BAYTRAIL=m -CONFIG_SND_SOC_SOF_BROADWELL=m -CONFIG_SND_SOC_SOF_MERRIFIELD=m -CONFIG_SND_SOC_SOF_INTEL_SKL=m -CONFIG_SND_SOC_SOF_SKYLAKE=m -CONFIG_SND_SOC_SOF_KABYLAKE=m -CONFIG_SND_SOC_SOF_INTEL_APL=m -CONFIG_SND_SOC_SOF_APOLLOLAKE=m -CONFIG_SND_SOC_SOF_GEMINILAKE=m -CONFIG_SND_SOC_SOF_INTEL_CNL=m -CONFIG_SND_SOC_SOF_CANNONLAKE=m -CONFIG_SND_SOC_SOF_COFFEELAKE=m -CONFIG_SND_SOC_SOF_COMETLAKE=m -CONFIG_SND_SOC_SOF_INTEL_ICL=m -CONFIG_SND_SOC_SOF_ICELAKE=m -CONFIG_SND_SOC_SOF_JASPERLAKE=m -CONFIG_SND_SOC_SOF_INTEL_TGL=m -CONFIG_SND_SOC_SOF_TIGERLAKE=m -CONFIG_SND_SOC_SOF_ELKHARTLAKE=m -CONFIG_SND_SOC_SOF_ALDERLAKE=m -CONFIG_SND_SOC_SOF_INTEL_MTL=m -CONFIG_SND_SOC_SOF_METEORLAKE=m -CONFIG_SND_SOC_SOF_INTEL_LNL=m -CONFIG_SND_SOC_SOF_LUNARLAKE=m -CONFIG_SND_SOC_SOF_HDA_COMMON=m -CONFIG_SND_SOC_SOF_HDA_GENERIC=m -CONFIG_SND_SOC_SOF_HDA_MLINK=m -CONFIG_SND_SOC_SOF_HDA_LINK=y -CONFIG_SND_SOC_SOF_HDA_AUDIO_CODEC=y -CONFIG_SND_SOC_SOF_HDA_LINK_BASELINE=m -CONFIG_SND_SOC_SOF_HDA=m -CONFIG_SND_SOC_SOF_HDA_PROBES=m -CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE=m -CONFIG_SND_SOC_SOF_XTENSA=m - -# -# STMicroelectronics STM32 SOC audio support -# -# end of STMicroelectronics STM32 SOC audio support - -# CONFIG_SND_SOC_XILINX_I2S is not set -# CONFIG_SND_SOC_XILINX_AUDIO_FORMATTER is not set -# CONFIG_SND_SOC_XILINX_SPDIF is not set -# CONFIG_SND_SOC_XTFPGA_I2S is not set -CONFIG_SND_SOC_I2C_AND_SPI=m - -# -# CODEC drivers -# -# CONFIG_SND_SOC_AC97_CODEC is not set -# CONFIG_SND_SOC_ADAU1372_I2C is not set -# CONFIG_SND_SOC_ADAU1372_SPI is not set -# CONFIG_SND_SOC_ADAU1701 is not set -# CONFIG_SND_SOC_ADAU1761_I2C is not set -# CONFIG_SND_SOC_ADAU1761_SPI is not set -# CONFIG_SND_SOC_ADAU7002 is not set -# CONFIG_SND_SOC_ADAU7118_HW is not set -# CONFIG_SND_SOC_ADAU7118_I2C is not set -# CONFIG_SND_SOC_AK4104 is not set -# CONFIG_SND_SOC_AK4118 is not set -# CONFIG_SND_SOC_AK4375 is not set -# CONFIG_SND_SOC_AK4458 is not set -# CONFIG_SND_SOC_AK4554 is not set -# CONFIG_SND_SOC_AK4613 is not set -# CONFIG_SND_SOC_AK4619 is not set -# CONFIG_SND_SOC_AK4642 is not set -# CONFIG_SND_SOC_AK5386 is not set -# CONFIG_SND_SOC_AK5558 is not set -# CONFIG_SND_SOC_ALC5623 is not set -# CONFIG_SND_SOC_AW8738 is not set -# CONFIG_SND_SOC_AW88395 is not set -# CONFIG_SND_SOC_AW88261 is not set -# CONFIG_SND_SOC_AW87390 is not set -# CONFIG_SND_SOC_AW88399 is not set -# CONFIG_SND_SOC_BD28623 is not set -# CONFIG_SND_SOC_BT_SCO is not set -# CONFIG_SND_SOC_CHV3_CODEC is not set -# CONFIG_SND_SOC_CS35L32 is not set -# CONFIG_SND_SOC_CS35L33 is not set -# CONFIG_SND_SOC_CS35L34 is not set -# CONFIG_SND_SOC_CS35L35 is not set -# CONFIG_SND_SOC_CS35L36 is not set -# CONFIG_SND_SOC_CS35L41_SPI is not set -# CONFIG_SND_SOC_CS35L41_I2C is not set -# CONFIG_SND_SOC_CS35L45_SPI is not set -# CONFIG_SND_SOC_CS35L45_I2C is not set -# CONFIG_SND_SOC_CS35L56_I2C is not set -# CONFIG_SND_SOC_CS35L56_SPI is not set -# CONFIG_SND_SOC_CS42L42 is not set -# CONFIG_SND_SOC_CS42L51_I2C is not set -# CONFIG_SND_SOC_CS42L52 is not set -# CONFIG_SND_SOC_CS42L56 is not set -# CONFIG_SND_SOC_CS42L73 is not set -# CONFIG_SND_SOC_CS42L83 is not set -# CONFIG_SND_SOC_CS4234 is not set -# CONFIG_SND_SOC_CS4265 is not set -# CONFIG_SND_SOC_CS4270 is not set -# CONFIG_SND_SOC_CS4271_I2C is not set -# CONFIG_SND_SOC_CS4271_SPI is not set -# CONFIG_SND_SOC_CS42XX8_I2C is not set -# CONFIG_SND_SOC_CS43130 is not set -# CONFIG_SND_SOC_CS4341 is not set -# CONFIG_SND_SOC_CS4349 is not set -# CONFIG_SND_SOC_CS53L30 is not set -# CONFIG_SND_SOC_CS530X_I2C is not set -# CONFIG_SND_SOC_CX2072X is not set -# CONFIG_SND_SOC_DA7213 is not set -CONFIG_SND_SOC_DMIC=m -# CONFIG_SND_SOC_ES7134 is not set -# CONFIG_SND_SOC_ES7241 is not set -# CONFIG_SND_SOC_ES8311 is not set -# CONFIG_SND_SOC_ES8316 is not set -# CONFIG_SND_SOC_ES8326 is not set -# CONFIG_SND_SOC_ES8328_I2C is not set -# CONFIG_SND_SOC_ES8328_SPI is not set -# CONFIG_SND_SOC_GTM601 is not set -CONFIG_SND_SOC_HDAC_HDMI=m -CONFIG_SND_SOC_HDAC_HDA=m -# CONFIG_SND_SOC_HDA is not set -# CONFIG_SND_SOC_ICS43432 is not set -# CONFIG_SND_SOC_IDT821034 is not set -# CONFIG_SND_SOC_MAX98088 is not set -# CONFIG_SND_SOC_MAX98090 is not set -# CONFIG_SND_SOC_MAX98357A is not set -# CONFIG_SND_SOC_MAX98504 is not set -# CONFIG_SND_SOC_MAX9867 is not set -# CONFIG_SND_SOC_MAX98927 is not set -# CONFIG_SND_SOC_MAX98520 is not set -# CONFIG_SND_SOC_MAX98373_I2C is not set -# CONFIG_SND_SOC_MAX98388 is not set -# CONFIG_SND_SOC_MAX98390 is not set -# CONFIG_SND_SOC_MAX98396 is not set -# CONFIG_SND_SOC_MAX9860 is not set -# CONFIG_SND_SOC_MSM8916_WCD_ANALOG is not set -# CONFIG_SND_SOC_MSM8916_WCD_DIGITAL is not set -# CONFIG_SND_SOC_PCM1681 is not set -# CONFIG_SND_SOC_PCM1789_I2C is not set -# CONFIG_SND_SOC_PCM179X_I2C is not set -# CONFIG_SND_SOC_PCM179X_SPI is not set -# CONFIG_SND_SOC_PCM186X_I2C is not set -# CONFIG_SND_SOC_PCM186X_SPI is not set -# CONFIG_SND_SOC_PCM3060_I2C is not set -# CONFIG_SND_SOC_PCM3060_SPI is not set -# CONFIG_SND_SOC_PCM3168A_I2C is not set -# CONFIG_SND_SOC_PCM3168A_SPI is not set -# CONFIG_SND_SOC_PCM5102A is not set -# CONFIG_SND_SOC_PCM512x_I2C is not set -# CONFIG_SND_SOC_PCM512x_SPI is not set -# CONFIG_SND_SOC_PCM6240 is not set -# CONFIG_SND_SOC_PEB2466 is not set -# CONFIG_SND_SOC_RT5616 is not set -# CONFIG_SND_SOC_RT5631 is not set -# CONFIG_SND_SOC_RT5640 is not set -# CONFIG_SND_SOC_RT5659 is not set -# CONFIG_SND_SOC_RT9120 is not set -# CONFIG_SND_SOC_RTQ9128 is not set -# CONFIG_SND_SOC_SGTL5000 is not set -# CONFIG_SND_SOC_SIMPLE_AMPLIFIER is not set -# CONFIG_SND_SOC_SMA1303 is not set -# CONFIG_SND_SOC_SPDIF is not set -# CONFIG_SND_SOC_SRC4XXX_I2C is not set -# CONFIG_SND_SOC_SSM2305 is not set -# CONFIG_SND_SOC_SSM2518 is not set -# CONFIG_SND_SOC_SSM2602_SPI is not set -# CONFIG_SND_SOC_SSM2602_I2C is not set -# CONFIG_SND_SOC_SSM4567 is not set -# CONFIG_SND_SOC_STA32X is not set -# CONFIG_SND_SOC_STA350 is not set -# CONFIG_SND_SOC_STI_SAS is not set -# CONFIG_SND_SOC_TAS2552 is not set -# CONFIG_SND_SOC_TAS2562 is not set -# CONFIG_SND_SOC_TAS2764 is not set -# CONFIG_SND_SOC_TAS2770 is not set -# CONFIG_SND_SOC_TAS2780 is not set -# CONFIG_SND_SOC_TAS2781_I2C is not set -# CONFIG_SND_SOC_TAS5086 is not set -# CONFIG_SND_SOC_TAS571X is not set -# CONFIG_SND_SOC_TAS5720 is not set -# CONFIG_SND_SOC_TAS5805M is not set -# CONFIG_SND_SOC_TAS6424 is not set -# CONFIG_SND_SOC_TDA7419 is not set -# CONFIG_SND_SOC_TFA9879 is not set -# CONFIG_SND_SOC_TFA989X is not set -# CONFIG_SND_SOC_TLV320AIC23_I2C is not set -# CONFIG_SND_SOC_TLV320AIC23_SPI is not set -# CONFIG_SND_SOC_TLV320AIC31XX is not set -# CONFIG_SND_SOC_TLV320AIC32X4_I2C is not set -# CONFIG_SND_SOC_TLV320AIC32X4_SPI is not set -# CONFIG_SND_SOC_TLV320AIC3X_I2C is not set -# CONFIG_SND_SOC_TLV320AIC3X_SPI is not set -# CONFIG_SND_SOC_TLV320ADCX140 is not set -# CONFIG_SND_SOC_TS3A227E is not set -# CONFIG_SND_SOC_TSCS42XX is not set -# CONFIG_SND_SOC_TSCS454 is not set -# CONFIG_SND_SOC_WM8510 is not set -# CONFIG_SND_SOC_WM8523 is not set -# CONFIG_SND_SOC_WM8580 is not set -# CONFIG_SND_SOC_WM8711 is not set -# CONFIG_SND_SOC_WM8728 is not set -# CONFIG_SND_SOC_WM8731_I2C is not set -# CONFIG_SND_SOC_WM8731_SPI is not set -# CONFIG_SND_SOC_WM8737 is not set -# CONFIG_SND_SOC_WM8741 is not set -# CONFIG_SND_SOC_WM8750 is not set -# CONFIG_SND_SOC_WM8753 is not set -# CONFIG_SND_SOC_WM8770 is not set -# CONFIG_SND_SOC_WM8776 is not set -# CONFIG_SND_SOC_WM8782 is not set -# CONFIG_SND_SOC_WM8804_I2C is not set -# CONFIG_SND_SOC_WM8804_SPI is not set -# CONFIG_SND_SOC_WM8903 is not set -# CONFIG_SND_SOC_WM8904 is not set -# CONFIG_SND_SOC_WM8940 is not set -# CONFIG_SND_SOC_WM8960 is not set -# CONFIG_SND_SOC_WM8961 is not set -# CONFIG_SND_SOC_WM8962 is not set -# CONFIG_SND_SOC_WM8974 is not set -# CONFIG_SND_SOC_WM8978 is not set -# CONFIG_SND_SOC_WM8985 is not set -# CONFIG_SND_SOC_MT6351 is not set -# CONFIG_SND_SOC_MT6358 is not set -# CONFIG_SND_SOC_MT6660 is not set -# CONFIG_SND_SOC_NAU8315 is not set -# CONFIG_SND_SOC_NAU8540 is not set -# CONFIG_SND_SOC_NAU8810 is not set -# CONFIG_SND_SOC_NAU8821 is not set -# CONFIG_SND_SOC_NAU8822 is not set -# CONFIG_SND_SOC_NAU8824 is not set -# CONFIG_SND_SOC_TPA6130A2 is not set -# CONFIG_SND_SOC_LPASS_WSA_MACRO is not set -# CONFIG_SND_SOC_LPASS_VA_MACRO is not set -# CONFIG_SND_SOC_LPASS_RX_MACRO is not set -# CONFIG_SND_SOC_LPASS_TX_MACRO is not set -# end of CODEC drivers - -# CONFIG_SND_SIMPLE_CARD is not set -CONFIG_SND_X86=y -# CONFIG_HDMI_LPE_AUDIO is not set -CONFIG_SND_VIRTIO=m -CONFIG_AC97_BUS=m -CONFIG_HID_SUPPORT=y -CONFIG_HID=m -CONFIG_HID_BATTERY_STRENGTH=y -CONFIG_HIDRAW=y -CONFIG_UHID=m -CONFIG_HID_GENERIC=m - -# -# Special HID drivers -# -CONFIG_HID_A4TECH=m -# CONFIG_HID_ACCUTOUCH is not set -# CONFIG_HID_ACRUX is not set -# CONFIG_HID_APPLE is not set -# CONFIG_HID_APPLEIR is not set -# CONFIG_HID_ASUS is not set -# CONFIG_HID_AUREAL is not set -# CONFIG_HID_BELKIN is not set -# CONFIG_HID_BETOP_FF is not set -# CONFIG_HID_BIGBEN_FF is not set -# CONFIG_HID_CHERRY is not set -CONFIG_HID_CHICONY=m -# CONFIG_HID_CORSAIR is not set -# CONFIG_HID_COUGAR is not set -# CONFIG_HID_MACALLY is not set -# CONFIG_HID_PRODIKEYS is not set -# CONFIG_HID_CMEDIA is not set -# CONFIG_HID_CREATIVE_SB0540 is not set -CONFIG_HID_CYPRESS=m -# CONFIG_HID_DRAGONRISE is not set -# CONFIG_HID_EMS_FF is not set -# CONFIG_HID_ELAN is not set -# CONFIG_HID_ELECOM is not set -# CONFIG_HID_ELO is not set -# CONFIG_HID_EVISION is not set -# CONFIG_HID_EZKEY is not set -# CONFIG_HID_FT260 is not set -# CONFIG_HID_GEMBIRD is not set -# CONFIG_HID_GFRM is not set -# CONFIG_HID_GLORIOUS is not set -# CONFIG_HID_HOLTEK is not set -# CONFIG_HID_GOOGLE_STADIA_FF is not set -# CONFIG_HID_VIVALDI is not set -# CONFIG_HID_GT683R is not set -# CONFIG_HID_KEYTOUCH is not set -CONFIG_HID_KYE=m -# CONFIG_HID_UCLOGIC is not set -# CONFIG_HID_WALTOP is not set -# CONFIG_HID_VIEWSONIC is not set -# CONFIG_HID_VRC2 is not set -CONFIG_HID_XIAOMI=m -# CONFIG_HID_GYRATION is not set -# CONFIG_HID_ICADE is not set -# CONFIG_HID_ITE is not set -# CONFIG_HID_JABRA is not set -# CONFIG_HID_TWINHAN is not set -# CONFIG_HID_KENSINGTON is not set -# CONFIG_HID_LCPOWER is not set -# CONFIG_HID_LED is not set -CONFIG_HID_LENOVO=m -# CONFIG_HID_LETSKETCH is not set -CONFIG_HID_LOGITECH=m -CONFIG_HID_LOGITECH_DJ=m -CONFIG_HID_LOGITECH_HIDPP=m -# CONFIG_LOGITECH_FF is not set -# CONFIG_LOGIRUMBLEPAD2_FF is not set -# CONFIG_LOGIG940_FF is not set -# CONFIG_LOGIWHEELS_FF is not set -# CONFIG_HID_MAGICMOUSE is not set -# CONFIG_HID_MALTRON is not set -# CONFIG_HID_MAYFLASH is not set -# CONFIG_HID_MEGAWORLD_FF is not set -# CONFIG_HID_REDRAGON is not set -CONFIG_HID_MICROSOFT=m -# CONFIG_HID_MONTEREY is not set -CONFIG_HID_MULTITOUCH=m -# CONFIG_HID_NINTENDO is not set -# CONFIG_HID_NTI is not set -# CONFIG_HID_NTRIG is not set -# CONFIG_HID_ORTEK is not set -# CONFIG_HID_PANTHERLORD is not set -# CONFIG_HID_PENMOUNT is not set -# CONFIG_HID_PETALYNX is not set -# CONFIG_HID_PICOLCD is not set -# CONFIG_HID_PLANTRONICS is not set -# CONFIG_HID_PXRC is not set -# CONFIG_HID_RAZER is not set -# CONFIG_HID_PRIMAX is not set -# CONFIG_HID_RETRODE is not set -# CONFIG_HID_ROCCAT is not set -# CONFIG_HID_SAITEK is not set -# CONFIG_HID_SAMSUNG is not set -# CONFIG_HID_SEMITEK is not set -# CONFIG_HID_SIGMAMICRO is not set -# CONFIG_HID_SONY is not set -# CONFIG_HID_SPEEDLINK is not set -# CONFIG_HID_STEAM is not set -# CONFIG_HID_STEELSERIES is not set -# CONFIG_HID_SUNPLUS is not set -CONFIG_HID_RMI=m -# CONFIG_HID_GREENASIA is not set -# CONFIG_HID_SMARTJOYPLUS is not set -# CONFIG_HID_TIVO is not set -# CONFIG_HID_TOPSEED is not set -# CONFIG_HID_TOPRE is not set -# CONFIG_HID_THINGM is not set -# CONFIG_HID_THRUSTMASTER is not set -# CONFIG_HID_UDRAW_PS3 is not set -CONFIG_HID_U2FZERO=m -# CONFIG_HID_WACOM is not set -# CONFIG_HID_WIIMOTE is not set -# CONFIG_HID_WINWING is not set -# CONFIG_HID_XINMO is not set -# CONFIG_HID_ZEROPLUS is not set -# CONFIG_HID_ZYDACRON is not set -CONFIG_HID_SENSOR_HUB=m -CONFIG_HID_SENSOR_CUSTOM_SENSOR=m -# CONFIG_HID_ALPS is not set -# CONFIG_HID_MCP2221 is not set -# end of Special HID drivers - -# -# HID-BPF support -# -CONFIG_HID_BPF=y -# end of HID-BPF support - -# -# USB HID support -# -CONFIG_USB_HID=m -# CONFIG_HID_PID is not set -CONFIG_USB_HIDDEV=y -# end of USB HID support - -CONFIG_I2C_HID=m -CONFIG_I2C_HID_ACPI=m -CONFIG_I2C_HID_OF=m -CONFIG_I2C_HID_CORE=m - -# -# Intel ISH HID support -# -CONFIG_INTEL_ISH_HID=m -CONFIG_INTEL_ISH_FIRMWARE_DOWNLOADER=m -# end of Intel ISH HID support - -# -# AMD SFH HID Support -# -CONFIG_AMD_SFH_HID=m -# end of AMD SFH HID Support - -CONFIG_USB_OHCI_LITTLE_ENDIAN=y -CONFIG_USB_SUPPORT=y -CONFIG_USB_COMMON=m -# CONFIG_USB_LED_TRIG is not set -# CONFIG_USB_ULPI_BUS is not set -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB=m -CONFIG_USB_PCI=y -CONFIG_USB_PCI_AMD=y -CONFIG_USB_ANNOUNCE_NEW_DEVICES=y - -# -# Miscellaneous USB options -# -CONFIG_USB_DEFAULT_PERSIST=y -# CONFIG_USB_FEW_INIT_RETRIES is not set -# CONFIG_USB_DYNAMIC_MINORS is not set -CONFIG_USB_OTG=y -# CONFIG_USB_OTG_PRODUCTLIST is not set -# CONFIG_USB_OTG_DISABLE_EXTERNAL_HUB is not set -CONFIG_USB_OTG_FSM=m -# CONFIG_USB_LEDS_TRIGGER_USBPORT is not set -CONFIG_USB_AUTOSUSPEND_DELAY=2 -CONFIG_USB_DEFAULT_AUTHORIZATION_MODE=1 -# CONFIG_USB_MON is not set - -# -# USB Host Controller Drivers -# -# CONFIG_USB_C67X00_HCD is not set -CONFIG_USB_XHCI_HCD=m -# CONFIG_USB_XHCI_DBGCAP is not set -CONFIG_USB_XHCI_PCI=m -# CONFIG_USB_XHCI_PCI_RENESAS is not set -CONFIG_USB_XHCI_PLATFORM=m -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_EHCI_ROOT_HUB_TT=y -CONFIG_USB_EHCI_TT_NEWSCHED=y -CONFIG_USB_EHCI_PCI=m -CONFIG_USB_EHCI_FSL=m -CONFIG_USB_EHCI_HCD_PLATFORM=m -# CONFIG_USB_OXU210HP_HCD is not set -# CONFIG_USB_ISP116X_HCD is not set -# CONFIG_USB_MAX3421_HCD is not set -CONFIG_USB_OHCI_HCD=m -CONFIG_USB_OHCI_HCD_PCI=m -# CONFIG_USB_OHCI_HCD_SSB is not set -# CONFIG_USB_OHCI_HCD_PLATFORM is not set -CONFIG_USB_UHCI_HCD=m -# CONFIG_USB_SL811_HCD is not set -# CONFIG_USB_R8A66597_HCD is not set -# CONFIG_USB_HCD_SSB is not set -# CONFIG_USB_HCD_TEST_MODE is not set - -# -# USB Device Class drivers -# -CONFIG_USB_ACM=m -# CONFIG_USB_PRINTER is not set -CONFIG_USB_WDM=m -CONFIG_USB_TMC=m - -# -# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may -# - -# -# also be needed; see USB_STORAGE Help for more info -# -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -# CONFIG_USB_STORAGE_REALTEK is not set -# CONFIG_USB_STORAGE_DATAFAB is not set -# CONFIG_USB_STORAGE_FREECOM is not set -# CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_USBAT is not set -# CONFIG_USB_STORAGE_SDDR09 is not set -# CONFIG_USB_STORAGE_SDDR55 is not set -# CONFIG_USB_STORAGE_JUMPSHOT is not set -# CONFIG_USB_STORAGE_ALAUDA is not set -# CONFIG_USB_STORAGE_ONETOUCH is not set -# CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set -# CONFIG_USB_STORAGE_ENE_UB6250 is not set -CONFIG_USB_UAS=m - -# -# USB Imaging devices -# -# CONFIG_USB_MDC800 is not set -# CONFIG_USB_MICROTEK is not set -# CONFIG_USBIP_CORE is not set - -# -# USB dual-mode controller drivers -# -# CONFIG_USB_CDNS_SUPPORT is not set -# CONFIG_USB_MUSB_HDRC is not set -# CONFIG_USB_DWC3 is not set -# CONFIG_USB_DWC2 is not set -# CONFIG_USB_CHIPIDEA is not set -# CONFIG_USB_ISP1760 is not set - -# -# USB port drivers -# -CONFIG_USB_SERIAL=m -CONFIG_USB_SERIAL_GENERIC=y -# CONFIG_USB_SERIAL_SIMPLE is not set -# CONFIG_USB_SERIAL_AIRCABLE is not set -CONFIG_USB_SERIAL_ARK3116=m -# CONFIG_USB_SERIAL_BELKIN is not set -CONFIG_USB_SERIAL_CH341=m -# CONFIG_USB_SERIAL_WHITEHEAT is not set -# CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set -CONFIG_USB_SERIAL_CP210X=m -# CONFIG_USB_SERIAL_CYPRESS_M8 is not set -# CONFIG_USB_SERIAL_EMPEG is not set -# CONFIG_USB_SERIAL_FTDI_SIO is not set -# CONFIG_USB_SERIAL_VISOR is not set -# CONFIG_USB_SERIAL_IPAQ is not set -# CONFIG_USB_SERIAL_IR is not set -# CONFIG_USB_SERIAL_EDGEPORT is not set -# CONFIG_USB_SERIAL_EDGEPORT_TI is not set -# CONFIG_USB_SERIAL_F81232 is not set -# CONFIG_USB_SERIAL_F8153X is not set -# CONFIG_USB_SERIAL_GARMIN is not set -# CONFIG_USB_SERIAL_IPW is not set -# CONFIG_USB_SERIAL_IUU is not set -# CONFIG_USB_SERIAL_KEYSPAN_PDA is not set -# CONFIG_USB_SERIAL_KEYSPAN is not set -# CONFIG_USB_SERIAL_KLSI is not set -# CONFIG_USB_SERIAL_KOBIL_SCT is not set -# CONFIG_USB_SERIAL_MCT_U232 is not set -# CONFIG_USB_SERIAL_METRO is not set -# CONFIG_USB_SERIAL_MOS7720 is not set -# CONFIG_USB_SERIAL_MOS7840 is not set -# CONFIG_USB_SERIAL_MXUPORT is not set -# CONFIG_USB_SERIAL_NAVMAN is not set -# CONFIG_USB_SERIAL_PL2303 is not set -# CONFIG_USB_SERIAL_OTI6858 is not set -# CONFIG_USB_SERIAL_QCAUX is not set -# CONFIG_USB_SERIAL_QUALCOMM is not set -# CONFIG_USB_SERIAL_SPCP8X5 is not set -# CONFIG_USB_SERIAL_SAFE is not set -# CONFIG_USB_SERIAL_SIERRAWIRELESS is not set -# CONFIG_USB_SERIAL_SYMBOL is not set -# CONFIG_USB_SERIAL_TI is not set -# CONFIG_USB_SERIAL_CYBERJACK is not set -CONFIG_USB_SERIAL_WWAN=m -CONFIG_USB_SERIAL_OPTION=m -# CONFIG_USB_SERIAL_OMNINET is not set -# CONFIG_USB_SERIAL_OPTICON is not set -# CONFIG_USB_SERIAL_XSENS_MT is not set -# CONFIG_USB_SERIAL_WISHBONE is not set -# CONFIG_USB_SERIAL_SSU100 is not set -# CONFIG_USB_SERIAL_QT2 is not set -# CONFIG_USB_SERIAL_UPD78F0730 is not set -# CONFIG_USB_SERIAL_XR is not set -# CONFIG_USB_SERIAL_DEBUG is not set - -# -# USB Miscellaneous drivers -# -# CONFIG_USB_EMI62 is not set -# CONFIG_USB_EMI26 is not set -# CONFIG_USB_ADUTUX is not set -# CONFIG_USB_SEVSEG is not set -# CONFIG_USB_LEGOTOWER is not set -# CONFIG_USB_LCD is not set -# CONFIG_USB_CYPRESS_CY7C63 is not set -# CONFIG_USB_CYTHERM is not set -# CONFIG_USB_IDMOUSE is not set -# CONFIG_USB_APPLEDISPLAY is not set -# CONFIG_APPLE_MFI_FASTCHARGE is not set -# CONFIG_USB_LJCA is not set -# CONFIG_USB_SISUSBVGA is not set -# CONFIG_USB_LD is not set -# CONFIG_USB_TRANCEVIBRATOR is not set -# CONFIG_USB_IOWARRIOR is not set -# CONFIG_USB_TEST is not set -# CONFIG_USB_EHSET_TEST_FIXTURE is not set -# CONFIG_USB_ISIGHTFW is not set -# CONFIG_USB_YUREX is not set -# CONFIG_USB_EZUSB_FX2 is not set -# CONFIG_USB_HUB_USB251XB is not set -# CONFIG_USB_HSIC_USB3503 is not set -# CONFIG_USB_HSIC_USB4604 is not set -# CONFIG_USB_LINK_LAYER_TEST is not set -# CONFIG_USB_CHAOSKEY is not set -# CONFIG_USB_ATM is not set - -# -# USB Physical Layer drivers -# -CONFIG_USB_PHY=y -# CONFIG_NOP_USB_XCEIV is not set -# CONFIG_USB_ISP1301 is not set -# end of USB Physical Layer drivers - -# CONFIG_USB_GADGET is not set -CONFIG_TYPEC=m -CONFIG_TYPEC_TCPM=m -CONFIG_TYPEC_TCPCI=m -CONFIG_TYPEC_RT1711H=m -CONFIG_TYPEC_TCPCI_MAXIM=m -# CONFIG_TYPEC_FUSB302 is not set -CONFIG_TYPEC_UCSI=m -CONFIG_UCSI_CCG=m -CONFIG_UCSI_ACPI=m -# CONFIG_UCSI_STM32G0 is not set -# CONFIG_TYPEC_TPS6598X is not set -# CONFIG_TYPEC_ANX7411 is not set -# CONFIG_TYPEC_RT1719 is not set -# CONFIG_TYPEC_HD3SS3220 is not set -# CONFIG_TYPEC_STUSB160X is not set -# CONFIG_TYPEC_WUSB3801 is not set - -# -# USB Type-C Multiplexer/DeMultiplexer Switch support -# -# CONFIG_TYPEC_MUX_FSA4480 is not set -# CONFIG_TYPEC_MUX_GPIO_SBU is not set -# CONFIG_TYPEC_MUX_PI3USB30532 is not set -# CONFIG_TYPEC_MUX_INTEL_PMC is not set -# CONFIG_TYPEC_MUX_IT5205 is not set -# CONFIG_TYPEC_MUX_NB7VPQ904M is not set -# CONFIG_TYPEC_MUX_PTN36502 is not set -# CONFIG_TYPEC_MUX_WCD939X_USBSS is not set -# end of USB Type-C Multiplexer/DeMultiplexer Switch support - -# -# USB Type-C Alternate Mode drivers -# -CONFIG_TYPEC_DP_ALTMODE=m -# CONFIG_TYPEC_NVIDIA_ALTMODE is not set -# end of USB Type-C Alternate Mode drivers - -CONFIG_USB_ROLE_SWITCH=m -# CONFIG_USB_ROLES_INTEL_XHCI is not set -CONFIG_MMC=y -CONFIG_MMC_BLOCK=m -CONFIG_MMC_BLOCK_MINORS=8 -# CONFIG_SDIO_UART is not set -CONFIG_MMC_TEST=m -# CONFIG_MMC_CRYPTO is not set - -# -# MMC/SD/SDIO Host Controller Drivers -# -# CONFIG_MMC_DEBUG is not set -CONFIG_MMC_SDHCI=m -CONFIG_MMC_SDHCI_IO_ACCESSORS=y -CONFIG_MMC_SDHCI_PCI=m -CONFIG_MMC_RICOH_MMC=y -CONFIG_MMC_SDHCI_ACPI=m -CONFIG_MMC_SDHCI_PLTFM=m -# CONFIG_MMC_SDHCI_F_SDH30 is not set -# CONFIG_MMC_WBSD is not set -# CONFIG_MMC_TIFM_SD is not set -# CONFIG_MMC_SPI is not set -# CONFIG_MMC_CB710 is not set -# CONFIG_MMC_VIA_SDMMC is not set -# CONFIG_MMC_VUB300 is not set -CONFIG_MMC_USHC=m -# CONFIG_MMC_USDHI6ROL0 is not set -CONFIG_MMC_CQHCI=m -CONFIG_MMC_HSQ=m -# CONFIG_MMC_TOSHIBA_PCI is not set -# CONFIG_MMC_MTK is not set -# CONFIG_MMC_SDHCI_XENON is not set -# CONFIG_SCSI_UFSHCD is not set -# CONFIG_MEMSTICK is not set -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=m -# CONFIG_LEDS_CLASS_FLASH is not set -# CONFIG_LEDS_CLASS_MULTICOLOR is not set -# CONFIG_LEDS_BRIGHTNESS_HW_CHANGED is not set - -# -# LED drivers -# -# CONFIG_LEDS_APU is not set -# CONFIG_LEDS_AW200XX is not set -# CONFIG_LEDS_LM3530 is not set -# CONFIG_LEDS_LM3532 is not set -# CONFIG_LEDS_LM3642 is not set -# CONFIG_LEDS_PCA9532 is not set -# CONFIG_LEDS_LP3944 is not set -# CONFIG_LEDS_PCA955X is not set -# CONFIG_LEDS_PCA963X is not set -# CONFIG_LEDS_PCA995X is not set -# CONFIG_LEDS_DAC124S085 is not set -# CONFIG_LEDS_PWM is not set -# CONFIG_LEDS_BD2606MVV is not set -# CONFIG_LEDS_BD2802 is not set -# CONFIG_LEDS_INTEL_SS4200 is not set -# CONFIG_LEDS_TCA6507 is not set -# CONFIG_LEDS_TLC591XX is not set -# CONFIG_LEDS_LM355x is not set -# CONFIG_LEDS_IS31FL319X is not set - -# -# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM) -# -# CONFIG_LEDS_BLINKM is not set -# CONFIG_LEDS_MLXCPLD is not set -# CONFIG_LEDS_MLXREG is not set -# CONFIG_LEDS_USER is not set -# CONFIG_LEDS_NIC78BX is not set -# CONFIG_LEDS_SPI_BYTE is not set - -# -# Flash and Torch LED drivers -# - -# -# RGB LED drivers -# - -# -# LED Triggers -# -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_TIMER=m -# CONFIG_LEDS_TRIGGER_ONESHOT is not set -# CONFIG_LEDS_TRIGGER_DISK is not set -CONFIG_LEDS_TRIGGER_HEARTBEAT=m -# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set -# CONFIG_LEDS_TRIGGER_CPU is not set -# CONFIG_LEDS_TRIGGER_ACTIVITY is not set -# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set - -# -# iptables trigger is under Netfilter config (LED target) -# -# CONFIG_LEDS_TRIGGER_TRANSIENT is not set -# CONFIG_LEDS_TRIGGER_CAMERA is not set -# CONFIG_LEDS_TRIGGER_PANIC is not set -# CONFIG_LEDS_TRIGGER_NETDEV is not set -# CONFIG_LEDS_TRIGGER_PATTERN is not set -# CONFIG_LEDS_TRIGGER_TTY is not set -# CONFIG_LEDS_TRIGGER_INPUT_EVENTS is not set - -# -# Simple LED drivers -# -# CONFIG_ACCESSIBILITY is not set -# CONFIG_INFINIBAND is not set -CONFIG_EDAC_ATOMIC_SCRUB=y -CONFIG_EDAC_SUPPORT=y -CONFIG_EDAC=y -# CONFIG_EDAC_LEGACY_SYSFS is not set -# CONFIG_EDAC_DEBUG is not set -CONFIG_EDAC_DECODE_MCE=m -# CONFIG_EDAC_GHES is not set -CONFIG_EDAC_AMD64=m -# CONFIG_EDAC_E752X is not set -# CONFIG_EDAC_I82975X is not set -# CONFIG_EDAC_I3000 is not set -# CONFIG_EDAC_I3200 is not set -CONFIG_EDAC_IE31200=m -# CONFIG_EDAC_X38 is not set -# CONFIG_EDAC_I5400 is not set -CONFIG_EDAC_I7CORE=m -# CONFIG_EDAC_I5100 is not set -# CONFIG_EDAC_I7300 is not set -CONFIG_EDAC_SBRIDGE=m -# CONFIG_EDAC_SKX is not set -# CONFIG_EDAC_I10NM is not set -# CONFIG_EDAC_PND2 is not set -# CONFIG_EDAC_IGEN6 is not set -CONFIG_RTC_LIB=y -CONFIG_RTC_MC146818_LIB=y -CONFIG_RTC_CLASS=y -CONFIG_RTC_HCTOSYS=y -CONFIG_RTC_HCTOSYS_DEVICE="rtc0" -CONFIG_RTC_SYSTOHC=y -CONFIG_RTC_SYSTOHC_DEVICE="rtc0" -# CONFIG_RTC_DEBUG is not set -# CONFIG_RTC_NVMEM is not set - -# -# RTC interfaces -# -CONFIG_RTC_INTF_SYSFS=y -CONFIG_RTC_INTF_PROC=y -CONFIG_RTC_INTF_DEV=y -# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set -# CONFIG_RTC_DRV_TEST is not set - -# -# I2C RTC drivers -# -# CONFIG_RTC_DRV_ABB5ZES3 is not set -# CONFIG_RTC_DRV_ABEOZ9 is not set -# CONFIG_RTC_DRV_ABX80X is not set -# CONFIG_RTC_DRV_DS1307 is not set -# CONFIG_RTC_DRV_DS1374 is not set -# CONFIG_RTC_DRV_DS1672 is not set -# CONFIG_RTC_DRV_MAX6900 is not set -# CONFIG_RTC_DRV_MAX31335 is not set -# CONFIG_RTC_DRV_RS5C372 is not set -# CONFIG_RTC_DRV_ISL1208 is not set -# CONFIG_RTC_DRV_ISL12022 is not set -# CONFIG_RTC_DRV_X1205 is not set -# CONFIG_RTC_DRV_PCF8523 is not set -# CONFIG_RTC_DRV_PCF85063 is not set -# CONFIG_RTC_DRV_PCF85363 is not set -# CONFIG_RTC_DRV_PCF8563 is not set -# CONFIG_RTC_DRV_PCF8583 is not set -# CONFIG_RTC_DRV_M41T80 is not set -# CONFIG_RTC_DRV_BQ32K is not set -# CONFIG_RTC_DRV_S35390A is not set -# CONFIG_RTC_DRV_FM3130 is not set -# CONFIG_RTC_DRV_RX8010 is not set -# CONFIG_RTC_DRV_RX8111 is not set -# CONFIG_RTC_DRV_RX8581 is not set -# CONFIG_RTC_DRV_RX8025 is not set -# CONFIG_RTC_DRV_EM3027 is not set -# CONFIG_RTC_DRV_RV3028 is not set -# CONFIG_RTC_DRV_RV3032 is not set -# CONFIG_RTC_DRV_RV8803 is not set -# CONFIG_RTC_DRV_SD3078 is not set - -# -# SPI RTC drivers -# -# CONFIG_RTC_DRV_M41T93 is not set -# CONFIG_RTC_DRV_M41T94 is not set -# CONFIG_RTC_DRV_DS1302 is not set -# CONFIG_RTC_DRV_DS1305 is not set -# CONFIG_RTC_DRV_DS1343 is not set -# CONFIG_RTC_DRV_DS1347 is not set -# CONFIG_RTC_DRV_DS1390 is not set -# CONFIG_RTC_DRV_MAX6916 is not set -# CONFIG_RTC_DRV_R9701 is not set -# CONFIG_RTC_DRV_RX4581 is not set -# CONFIG_RTC_DRV_RS5C348 is not set -# CONFIG_RTC_DRV_MAX6902 is not set -# CONFIG_RTC_DRV_PCF2123 is not set -# CONFIG_RTC_DRV_MCP795 is not set -CONFIG_RTC_I2C_AND_SPI=y - -# -# SPI and I2C RTC drivers -# -# CONFIG_RTC_DRV_DS3232 is not set -# CONFIG_RTC_DRV_PCF2127 is not set -# CONFIG_RTC_DRV_RV3029C2 is not set -# CONFIG_RTC_DRV_RX6110 is not set - -# -# Platform RTC drivers -# -CONFIG_RTC_DRV_CMOS=y -# CONFIG_RTC_DRV_DS1286 is not set -# CONFIG_RTC_DRV_DS1511 is not set -# CONFIG_RTC_DRV_DS1553 is not set -# CONFIG_RTC_DRV_DS1685_FAMILY is not set -# CONFIG_RTC_DRV_DS1742 is not set -# CONFIG_RTC_DRV_DS2404 is not set -# CONFIG_RTC_DRV_STK17TA8 is not set -# CONFIG_RTC_DRV_M48T86 is not set -# CONFIG_RTC_DRV_M48T35 is not set -# CONFIG_RTC_DRV_M48T59 is not set -# CONFIG_RTC_DRV_MSM6242 is not set -# CONFIG_RTC_DRV_RP5C01 is not set - -# -# on-CPU RTC drivers -# -# CONFIG_RTC_DRV_FTRTC010 is not set - -# -# HID Sensor RTC drivers -# -# CONFIG_RTC_DRV_GOLDFISH is not set -CONFIG_DMADEVICES=y -# CONFIG_DMADEVICES_DEBUG is not set - -# -# DMA Devices -# -CONFIG_DMA_ENGINE=y -CONFIG_DMA_VIRTUAL_CHANNELS=y -CONFIG_DMA_ACPI=y -# CONFIG_ALTERA_MSGDMA is not set -CONFIG_INTEL_IDMA64=m -# CONFIG_INTEL_IDXD is not set -# CONFIG_INTEL_IDXD_COMPAT is not set -CONFIG_INTEL_IOATDMA=m -# CONFIG_PLX_DMA is not set -# CONFIG_XILINX_DMA is not set -# CONFIG_XILINX_XDMA is not set -CONFIG_AMD_PTDMA=m -# CONFIG_QCOM_HIDMA_MGMT is not set -# CONFIG_QCOM_HIDMA is not set -CONFIG_DW_DMAC_CORE=y -# CONFIG_DW_DMAC is not set -CONFIG_DW_DMAC_PCI=y -# CONFIG_DW_EDMA is not set -CONFIG_HSU_DMA=y -# CONFIG_SF_PDMA is not set -# CONFIG_INTEL_LDMA is not set - -# -# DMA Clients -# -CONFIG_ASYNC_TX_DMA=y -# CONFIG_DMATEST is not set -CONFIG_DMA_ENGINE_RAID=y - -# -# DMABUF options -# -CONFIG_SYNC_FILE=y -# CONFIG_SW_SYNC is not set -CONFIG_UDMABUF=y -CONFIG_DMABUF_MOVE_NOTIFY=y -# CONFIG_DMABUF_DEBUG is not set -# CONFIG_DMABUF_SELFTESTS is not set -# CONFIG_DMABUF_HEAPS is not set -# CONFIG_DMABUF_SYSFS_STATS is not set -# end of DMABUF options - -CONFIG_DCA=m -# CONFIG_UIO is not set -CONFIG_VFIO=m -CONFIG_VFIO_GROUP=y -CONFIG_VFIO_CONTAINER=y -CONFIG_VFIO_IOMMU_TYPE1=m -# CONFIG_VFIO_NOIOMMU is not set -CONFIG_VFIO_VIRQFD=y -# CONFIG_VFIO_DEBUGFS is not set - -# -# VFIO support for PCI devices -# -CONFIG_VFIO_PCI_CORE=m -CONFIG_VFIO_PCI_MMAP=y -CONFIG_VFIO_PCI_INTX=y -CONFIG_VFIO_PCI=m -CONFIG_VFIO_PCI_VGA=y -CONFIG_VFIO_PCI_IGD=y -CONFIG_VIRTIO_VFIO_PCI=m -CONFIG_QAT_VFIO_PCI=m -# end of VFIO support for PCI devices - -CONFIG_VFIO_MDEV=m -CONFIG_IRQ_BYPASS_MANAGER=y -CONFIG_VIRT_DRIVERS=y -# CONFIG_VMGENID is not set -# CONFIG_VBOXGUEST is not set -# CONFIG_NITRO_ENCLAVES is not set -CONFIG_EFI_SECRET=m -CONFIG_VIRTIO_ANCHOR=y -CONFIG_VIRTIO=y -CONFIG_VIRTIO_PCI_LIB=y -CONFIG_VIRTIO_PCI_LIB_LEGACY=y -CONFIG_VIRTIO_MENU=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_PCI_ADMIN_LEGACY=y -CONFIG_VIRTIO_PCI_LEGACY=y -CONFIG_VIRTIO_VDPA=m -CONFIG_VIRTIO_PMEM=m -CONFIG_VIRTIO_BALLOON=m -CONFIG_VIRTIO_MEM=m -CONFIG_VIRTIO_INPUT=m -CONFIG_VIRTIO_MMIO=m -# CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES is not set -CONFIG_VIRTIO_DMA_SHARED_BUFFER=m -# CONFIG_VIRTIO_DEBUG is not set -CONFIG_VDPA=m -# CONFIG_VDPA_SIM is not set -CONFIG_VDPA_USER=m -CONFIG_IFCVF=m -# CONFIG_MLX5_VDPA_STEERING_DEBUG is not set -CONFIG_VP_VDPA=m -# CONFIG_ALIBABA_ENI_VDPA is not set -CONFIG_SNET_VDPA=m -CONFIG_OCTEONEP_VDPA=m -CONFIG_VHOST_IOTLB=m -CONFIG_VHOST_TASK=y -CONFIG_VHOST=m -CONFIG_VHOST_MENU=y -CONFIG_VHOST_NET=m -CONFIG_VHOST_VDPA=m -# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set - -# -# Microsoft Hyper-V guest support -# -# CONFIG_HYPERV is not set -# end of Microsoft Hyper-V guest support - -# CONFIG_GREYBUS is not set -# CONFIG_COMEDI is not set -# CONFIG_STAGING is not set -# CONFIG_GOLDFISH is not set -# CONFIG_CHROME_PLATFORMS is not set -# CONFIG_CZNIC_PLATFORMS is not set -# CONFIG_MELLANOX_PLATFORM is not set -# CONFIG_SURFACE_PLATFORMS is not set -CONFIG_X86_PLATFORM_DEVICES=y -CONFIG_ACPI_WMI=m -CONFIG_WMI_BMOF=m -# CONFIG_HUAWEI_WMI is not set -CONFIG_MXM_WMI=m -# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -# CONFIG_XIAOMI_WMI is not set -# CONFIG_GIGABYTE_WMI is not set -CONFIG_YOGABOOK=m -# CONFIG_ACERHDF is not set -# CONFIG_ACER_WIRELESS is not set -# CONFIG_ACER_WMI is not set -# CONFIG_AMD_PMC is not set -# CONFIG_AMD_HSMP is not set -CONFIG_AMD_WBRF=y -# CONFIG_ADV_SWBUTTON is not set -# CONFIG_APPLE_GMUX is not set -# CONFIG_ASUS_LAPTOP is not set -# CONFIG_ASUS_WIRELESS is not set -# CONFIG_ASUS_WMI is not set -# CONFIG_EEEPC_LAPTOP is not set -# CONFIG_X86_PLATFORM_DRIVERS_DELL is not set -# CONFIG_AMILO_RFKILL is not set -# CONFIG_FUJITSU_LAPTOP is not set -# CONFIG_FUJITSU_TABLET is not set -# CONFIG_GPD_POCKET_FAN is not set -# CONFIG_X86_PLATFORM_DRIVERS_HP is not set -CONFIG_WIRELESS_HOTKEY=m -# CONFIG_IBM_RTL is not set -CONFIG_IDEAPAD_LAPTOP=m -CONFIG_LENOVO_YMC=m -CONFIG_SENSORS_HDAPS=m -CONFIG_THINKPAD_ACPI=m -CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y -# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set -# CONFIG_THINKPAD_ACPI_DEBUG is not set -# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set -CONFIG_THINKPAD_ACPI_VIDEO=y -CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y -CONFIG_THINKPAD_LMI=m -# CONFIG_INTEL_ATOMISP2_PM is not set -# CONFIG_INTEL_IFS is not set -# CONFIG_INTEL_SAR_INT1092 is not set -# CONFIG_INTEL_PMT_TELEMETRY is not set -# CONFIG_INTEL_PMT_CRASHLOG is not set - -# -# Intel Speed Select Technology interface support -# -CONFIG_INTEL_SPEED_SELECT_TPMI=m -CONFIG_INTEL_SPEED_SELECT_INTERFACE=m -# end of Intel Speed Select Technology interface support - -CONFIG_INTEL_WMI=y -# CONFIG_INTEL_WMI_SBL_FW_UPDATE is not set -CONFIG_INTEL_WMI_THUNDERBOLT=m - -# -# Intel Uncore Frequency Control -# -CONFIG_INTEL_UNCORE_FREQ_CONTROL_TPMI=m -CONFIG_INTEL_UNCORE_FREQ_CONTROL=m -# end of Intel Uncore Frequency Control - -CONFIG_INTEL_HID_EVENT=m -CONFIG_INTEL_VBTN=m -CONFIG_INTEL_OAKTRAIL=m -CONFIG_INTEL_ISHTP_ECLITE=m -# CONFIG_INTEL_PUNIT_IPC is not set -CONFIG_INTEL_RST=m -# CONFIG_INTEL_SDSI is not set -CONFIG_INTEL_SMARTCONNECT=m -CONFIG_INTEL_TPMI_POWER_DOMAINS=m -CONFIG_INTEL_TPMI=m -# CONFIG_INTEL_PLR_TPMI is not set -CONFIG_INTEL_TURBO_MAX_3=y -CONFIG_INTEL_VSEC=m -# CONFIG_ACPI_QUICKSTART is not set -# CONFIG_MSI_EC is not set -# CONFIG_MSI_LAPTOP is not set -# CONFIG_MSI_WMI is not set -# CONFIG_MSI_WMI_PLATFORM is not set -# CONFIG_SAMSUNG_LAPTOP is not set -# CONFIG_SAMSUNG_Q10 is not set -# CONFIG_TOSHIBA_BT_RFKILL is not set -# CONFIG_TOSHIBA_HAPS is not set -# CONFIG_TOSHIBA_WMI is not set -# CONFIG_ACPI_CMPC is not set -# CONFIG_COMPAL_LAPTOP is not set -# CONFIG_LG_LAPTOP is not set -# CONFIG_PANASONIC_LAPTOP is not set -# CONFIG_SONY_LAPTOP is not set -# CONFIG_SYSTEM76_ACPI is not set -# CONFIG_TOPSTAR_LAPTOP is not set -# CONFIG_SERIAL_MULTI_INSTANTIATE is not set -# CONFIG_MLX_PLATFORM is not set -# CONFIG_INSPUR_PLATFORM_PROFILE is not set -# CONFIG_LENOVO_WMI_CAMERA is not set -CONFIG_FW_ATTR_CLASS=m -CONFIG_INTEL_IPS=m -CONFIG_INTEL_SCU_IPC=y -CONFIG_INTEL_SCU=y -CONFIG_INTEL_SCU_PCI=y -CONFIG_INTEL_SCU_PLATFORM=m -CONFIG_INTEL_SCU_IPC_UTIL=m -# CONFIG_SIEMENS_SIMATIC_IPC is not set -# CONFIG_WINMATE_FM07_KEYS is not set -CONFIG_P2SB=y -CONFIG_HAVE_CLK=y -CONFIG_HAVE_CLK_PREPARE=y -CONFIG_COMMON_CLK=y -# CONFIG_LMK04832 is not set -# CONFIG_COMMON_CLK_MAX9485 is not set -# CONFIG_COMMON_CLK_SI5341 is not set -# CONFIG_COMMON_CLK_SI5351 is not set -# CONFIG_COMMON_CLK_SI544 is not set -# CONFIG_COMMON_CLK_CDCE706 is not set -# CONFIG_COMMON_CLK_CS2000_CP is not set -# CONFIG_COMMON_CLK_PWM is not set -# CONFIG_XILINX_VCU is not set -CONFIG_HWSPINLOCK=y - -# -# Clock Source drivers -# -CONFIG_CLKEVT_I8253=y -CONFIG_I8253_LOCK=y -CONFIG_CLKBLD_I8253=y -# end of Clock Source drivers - -CONFIG_MAILBOX=y -CONFIG_PCC=y -# CONFIG_ALTERA_MBOX is not set -CONFIG_IOMMU_IOVA=y -CONFIG_IOMMU_API=y -CONFIG_IOMMU_SUPPORT=y - -# -# Generic IOMMU Pagetable Support -# -CONFIG_IOMMU_IO_PGTABLE=y -# end of Generic IOMMU Pagetable Support - -# CONFIG_IOMMU_DEBUGFS is not set -# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set -CONFIG_IOMMU_DEFAULT_DMA_LAZY=y -# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -CONFIG_IOMMU_DMA=y -CONFIG_IOMMU_SVA=y -CONFIG_IOMMU_IOPF=y -CONFIG_AMD_IOMMU=y -CONFIG_DMAR_TABLE=y -CONFIG_INTEL_IOMMU=y -CONFIG_INTEL_IOMMU_SVM=y -CONFIG_INTEL_IOMMU_DEFAULT_ON=y -CONFIG_INTEL_IOMMU_FLOPPY_WA=y -CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON=y -CONFIG_INTEL_IOMMU_PERF_EVENTS=y -# CONFIG_IOMMUFD is not set -CONFIG_IRQ_REMAP=y -CONFIG_VIRTIO_IOMMU=m - -# -# Remoteproc drivers -# -# CONFIG_REMOTEPROC is not set -# end of Remoteproc drivers - -# -# Rpmsg drivers -# -# CONFIG_RPMSG_QCOM_GLINK_RPM is not set -# CONFIG_RPMSG_VIRTIO is not set -# end of Rpmsg drivers - -# CONFIG_SOUNDWIRE is not set - -# -# SOC (System On Chip) specific Drivers -# - -# -# Amlogic SoC drivers -# -# end of Amlogic SoC drivers - -# -# Broadcom SoC drivers -# -# end of Broadcom SoC drivers - -# -# NXP/Freescale QorIQ SoC drivers -# -# end of NXP/Freescale QorIQ SoC drivers - -# -# fujitsu SoC drivers -# -# end of fujitsu SoC drivers - -# -# i.MX SoC drivers -# -# end of i.MX SoC drivers - -# -# Enable LiteX SoC Builder specific drivers -# -# end of Enable LiteX SoC Builder specific drivers - -# CONFIG_WPCM450_SOC is not set - -# -# Qualcomm SoC drivers -# -# CONFIG_QCOM_PBS is not set -# end of Qualcomm SoC drivers - -# CONFIG_SOC_TI is not set - -# -# Xilinx SoC drivers -# -# end of Xilinx SoC drivers -# end of SOC (System On Chip) specific Drivers - -# -# PM Domains -# - -# -# Amlogic PM Domains -# -# end of Amlogic PM Domains - -# -# Broadcom PM Domains -# -# end of Broadcom PM Domains - -# -# i.MX PM Domains -# -# end of i.MX PM Domains - -# -# Qualcomm PM Domains -# -# end of Qualcomm PM Domains -# end of PM Domains - -CONFIG_PM_DEVFREQ=y - -# -# DEVFREQ Governors -# -CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y -CONFIG_DEVFREQ_GOV_PERFORMANCE=m -CONFIG_DEVFREQ_GOV_POWERSAVE=m -CONFIG_DEVFREQ_GOV_USERSPACE=m -CONFIG_DEVFREQ_GOV_PASSIVE=m - -# -# DEVFREQ Drivers -# -# CONFIG_PM_DEVFREQ_EVENT is not set -CONFIG_EXTCON=y - -# -# Extcon Device Drivers -# -# CONFIG_EXTCON_FSA9480 is not set -# CONFIG_EXTCON_RT8973A is not set -# CONFIG_EXTCON_SM5502 is not set -# CONFIG_EXTCON_USBC_TUSB320 is not set -CONFIG_MEMORY=y -# CONFIG_IIO is not set -# CONFIG_NTB is not set -CONFIG_PWM=y -# CONFIG_PWM_DEBUG is not set -CONFIG_PWM_CLK=m -# CONFIG_PWM_DWC is not set -CONFIG_PWM_LPSS=m -CONFIG_PWM_LPSS_PCI=m -CONFIG_PWM_LPSS_PLATFORM=m -# CONFIG_PWM_PCA9685 is not set - -# -# IRQ chip support -# -# CONFIG_LAN966X_OIC is not set -# end of IRQ chip support - -# CONFIG_IPACK_BUS is not set -# CONFIG_RESET_CONTROLLER is not set - -# -# PHY Subsystem -# -# CONFIG_GENERIC_PHY is not set -# CONFIG_USB_LGM_PHY is not set -# CONFIG_PHY_CAN_TRANSCEIVER is not set - -# -# PHY drivers for Broadcom platforms -# -# CONFIG_BCM_KONA_USB2_PHY is not set -# end of PHY drivers for Broadcom platforms - -# CONFIG_PHY_PXA_28NM_HSIC is not set -# CONFIG_PHY_PXA_28NM_USB2 is not set -# CONFIG_PHY_INTEL_LGM_EMMC is not set -# end of PHY Subsystem - -CONFIG_POWERCAP=y -CONFIG_INTEL_RAPL_CORE=m -CONFIG_INTEL_RAPL=m -CONFIG_INTEL_RAPL_TPMI=m -# CONFIG_IDLE_INJECT is not set -# CONFIG_MCB is not set - -# -# Performance monitor support -# -# CONFIG_DWC_PCIE_PMU is not set -# end of Performance monitor support - -CONFIG_RAS=y -# CONFIG_RAS_CEC is not set -CONFIG_AMD_ATL=m -CONFIG_RAS_FMPM=m -CONFIG_USB4=m -# CONFIG_USB4_DEBUGFS_WRITE is not set -# CONFIG_USB4_DMA_TEST is not set - -# -# Android -# -# CONFIG_ANDROID_BINDER_IPC is not set -# end of Android - -CONFIG_LIBNVDIMM=y -CONFIG_BLK_DEV_PMEM=y -CONFIG_ND_CLAIM=y -CONFIG_ND_BTT=y -CONFIG_BTT=y -CONFIG_ND_PFN=y -CONFIG_NVDIMM_PFN=y -CONFIG_NVDIMM_DAX=y -CONFIG_NVDIMM_KEYS=y -# CONFIG_NVDIMM_SECURITY_TEST is not set -CONFIG_DAX=y -CONFIG_DEV_DAX=m -CONFIG_DEV_DAX_PMEM=m -CONFIG_DEV_DAX_KMEM=m -CONFIG_NVMEM=y -CONFIG_NVMEM_SYSFS=y -# CONFIG_NVMEM_LAYOUTS is not set -# CONFIG_NVMEM_RMEM is not set -CONFIG_NVMEM_SPMI_SDAM=m - -# -# HW tracing support -# -# CONFIG_STM is not set -# CONFIG_INTEL_TH is not set -# end of HW tracing support - -# CONFIG_FPGA is not set -# CONFIG_TEE is not set -CONFIG_PM_OPP=y -# CONFIG_SIOX is not set -# CONFIG_SLIMBUS is not set -# CONFIG_INTERCONNECT is not set -CONFIG_COUNTER=m -# CONFIG_INTEL_QEP is not set -# CONFIG_MOST is not set -# CONFIG_PECI is not set -CONFIG_HTE=y -# end of Device Drivers - -# -# File systems -# -CONFIG_DCACHE_WORD_ACCESS=y -CONFIG_VALIDATE_FS_PARSER=y -CONFIG_FS_IOMAP=y -CONFIG_FS_STACK=y -CONFIG_BUFFER_HEAD=y -CONFIG_LEGACY_DIRECT_IO=y -CONFIG_EXT2_FS=m -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y -# CONFIG_EXT4_DEBUG is not set -CONFIG_JBD2=y -# CONFIG_JBD2_DEBUG is not set -CONFIG_FS_MBCACHE=y -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set -CONFIG_XFS_FS=m -# CONFIG_XFS_SUPPORT_V4 is not set -# CONFIG_XFS_SUPPORT_ASCII_CI is not set -CONFIG_XFS_QUOTA=y -CONFIG_XFS_POSIX_ACL=y -CONFIG_XFS_RT=y -# CONFIG_XFS_ONLINE_SCRUB is not set -# CONFIG_XFS_WARN is not set -# CONFIG_XFS_DEBUG is not set -# CONFIG_GFS2_FS is not set -# CONFIG_OCFS2_FS is not set -CONFIG_BTRFS_FS=m -CONFIG_BTRFS_FS_POSIX_ACL=y -# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set -# CONFIG_BTRFS_DEBUG is not set -# CONFIG_BTRFS_ASSERT is not set -# CONFIG_BTRFS_FS_REF_VERIFY is not set -CONFIG_NILFS2_FS=m -CONFIG_F2FS_FS=m -CONFIG_F2FS_STAT_FS=y -CONFIG_F2FS_FS_XATTR=y -CONFIG_F2FS_FS_POSIX_ACL=y -CONFIG_F2FS_FS_SECURITY=y -CONFIG_F2FS_CHECK_FS=y -# CONFIG_F2FS_FAULT_INJECTION is not set -CONFIG_F2FS_FS_COMPRESSION=y -CONFIG_F2FS_FS_LZO=y -CONFIG_F2FS_FS_LZORLE=y -CONFIG_F2FS_FS_LZ4=y -CONFIG_F2FS_FS_LZ4HC=y -CONFIG_F2FS_FS_ZSTD=y -CONFIG_F2FS_IOSTAT=y -# CONFIG_F2FS_UNFAIR_RWSEM is not set -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_ERASURE_CODING=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -CONFIG_BCACHEFS_LOCK_TIME_STATS=y -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -CONFIG_BCACHEFS_SIX_OPTIMISTIC_SPIN=y -CONFIG_ZONEFS_FS=m -CONFIG_FS_DAX=y -CONFIG_FS_DAX_PMD=y -CONFIG_FS_POSIX_ACL=y -CONFIG_EXPORTFS=y -CONFIG_EXPORTFS_BLOCK_OPS=y -CONFIG_FILE_LOCKING=y -CONFIG_FS_ENCRYPTION=y -CONFIG_FS_ENCRYPTION_ALGS=y -CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y -CONFIG_FS_VERITY=y -CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y -CONFIG_FSNOTIFY=y -CONFIG_DNOTIFY=y -CONFIG_INOTIFY_USER=y -CONFIG_FANOTIFY=y -# CONFIG_FANOTIFY_ACCESS_PERMISSIONS is not set -CONFIG_QUOTA=y -CONFIG_QUOTA_NETLINK_INTERFACE=y -CONFIG_QUOTA_DEBUG=y -CONFIG_QUOTA_TREE=m -# CONFIG_QFMT_V1 is not set -CONFIG_QFMT_V2=m -CONFIG_QUOTACTL=y -CONFIG_AUTOFS_FS=y -CONFIG_FUSE_FS=m -# CONFIG_CUSE is not set -CONFIG_VIRTIO_FS=m -CONFIG_FUSE_DAX=y -CONFIG_FUSE_PASSTHROUGH=y -CONFIG_OVERLAY_FS=m -CONFIG_OVERLAY_FS_REDIRECT_DIR=y -# CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW is not set -CONFIG_OVERLAY_FS_INDEX=y -CONFIG_OVERLAY_FS_XINO_AUTO=y -CONFIG_OVERLAY_FS_METACOPY=y -# CONFIG_OVERLAY_FS_DEBUG is not set -CONFIG_OVERLAY_FS_UNPRIVILEGED=y - -# -# Caches -# -CONFIG_NETFS_SUPPORT=m -CONFIG_NETFS_STATS=y -# CONFIG_NETFS_DEBUG is not set -CONFIG_FSCACHE=y -CONFIG_FSCACHE_STATS=y -CONFIG_CACHEFILES=m -# CONFIG_CACHEFILES_DEBUG is not set -# CONFIG_CACHEFILES_ERROR_INJECTION is not set -CONFIG_CACHEFILES_ONDEMAND=y -# end of Caches - -# -# CD-ROM/DVD Filesystems -# -CONFIG_ISO9660_FS=m -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -# end of CD-ROM/DVD Filesystems - -# -# DOS/FAT/EXFAT/NT Filesystems -# -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_FAT_DEFAULT_CODEPAGE=437 -CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" -CONFIG_FAT_DEFAULT_UTF8=y -CONFIG_EXFAT_FS=m -CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8" -CONFIG_NTFS3_FS=m -# CONFIG_NTFS3_64BIT_CLUSTER is not set -CONFIG_NTFS3_LZX_XPRESS=y -CONFIG_NTFS3_FS_POSIX_ACL=y -# CONFIG_NTFS_FS is not set -# end of DOS/FAT/EXFAT/NT Filesystems - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -# CONFIG_PROC_KCORE is not set -CONFIG_PROC_SYSCTL=y -CONFIG_PROC_PAGE_MONITOR=y -CONFIG_PROC_CHILDREN=y -CONFIG_PROC_PID_ARCH_STATUS=y -CONFIG_PROC_CPU_RESCTRL=y -CONFIG_KERNFS=y -CONFIG_SYSFS=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_INODE64=y -CONFIG_TMPFS_QUOTA=y -CONFIG_HUGETLBFS=y -# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set -CONFIG_HUGETLB_PAGE=y -CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y -CONFIG_ARCH_HAS_GIGANTIC_PAGE=y -CONFIG_CONFIGFS_FS=y -CONFIG_EFIVAR_FS=m -# end of Pseudo filesystems - -CONFIG_MISC_FILESYSTEMS=y -# CONFIG_ORANGEFS_FS is not set -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -CONFIG_ECRYPT_FS=m -# CONFIG_ECRYPT_FS_MESSAGING is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -CONFIG_SQUASHFS=m -# CONFIG_SQUASHFS_FILE_CACHE is not set -CONFIG_SQUASHFS_FILE_DIRECT=y -CONFIG_SQUASHFS_DECOMP_SINGLE=y -CONFIG_SQUASHFS_DECOMP_MULTI=y -CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y -CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y -CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS=y -CONFIG_SQUASHFS_XATTR=y -CONFIG_SQUASHFS_ZLIB=y -CONFIG_SQUASHFS_LZ4=y -CONFIG_SQUASHFS_LZO=y -CONFIG_SQUASHFS_XZ=y -CONFIG_SQUASHFS_ZSTD=y -CONFIG_SQUASHFS_4K_DEVBLK_SIZE=y -CONFIG_SQUASHFS_EMBEDDED=y -CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 -# CONFIG_VXFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_OMFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX6FS_FS is not set -# CONFIG_ROMFS_FS is not set -CONFIG_PSTORE=y -CONFIG_PSTORE_DEFAULT_KMSG_BYTES=10240 -CONFIG_PSTORE_COMPRESS=y -# CONFIG_PSTORE_CONSOLE is not set -# CONFIG_PSTORE_PMSG is not set -# CONFIG_PSTORE_FTRACE is not set -CONFIG_PSTORE_RAM=m -CONFIG_PSTORE_ZONE=y -CONFIG_PSTORE_BLK=y -CONFIG_PSTORE_BLK_BLKDEV="" -CONFIG_PSTORE_BLK_KMSG_SIZE=64 -CONFIG_PSTORE_BLK_MAX_REASON=2 -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set -# CONFIG_EROFS_FS is not set -CONFIG_NETWORK_FILESYSTEMS=y -CONFIG_NFS_FS=m -# CONFIG_NFS_V2 is not set -# CONFIG_NFS_V3 is not set -CONFIG_NFS_V4=m -# CONFIG_NFS_SWAP is not set -CONFIG_NFS_V4_1=y -CONFIG_NFS_V4_2=y -CONFIG_PNFS_FILE_LAYOUT=m -CONFIG_PNFS_BLOCK=m -CONFIG_PNFS_FLEXFILE_LAYOUT=m -CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="millerson.name" -CONFIG_NFS_V4_1_MIGRATION=y -CONFIG_NFS_V4_SECURITY_LABEL=y -CONFIG_NFS_FSCACHE=y -# CONFIG_NFS_USE_LEGACY_DNS is not set -CONFIG_NFS_USE_KERNEL_DNS=y -CONFIG_NFS_DISABLE_UDP_SUPPORT=y -CONFIG_NFS_V4_2_READ_PLUS=y -CONFIG_NFSD=m -# CONFIG_NFSD_V2 is not set -# CONFIG_NFSD_V3_ACL is not set -CONFIG_NFSD_V4=y -CONFIG_NFSD_PNFS=y -CONFIG_NFSD_BLOCKLAYOUT=y -# CONFIG_NFSD_SCSILAYOUT is not set -# CONFIG_NFSD_FLEXFILELAYOUT is not set -CONFIG_NFSD_V4_2_INTER_SSC=y -CONFIG_NFSD_V4_SECURITY_LABEL=y -# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set -CONFIG_GRACE_PERIOD=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_NFS_COMMON=y -CONFIG_NFS_V4_2_SSC_HELPER=y -CONFIG_SUNRPC=m -CONFIG_SUNRPC_GSS=m -CONFIG_SUNRPC_BACKCHANNEL=y -CONFIG_RPCSEC_GSS_KRB5=m -CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1=y -# CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA is not set -# CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2 is not set -# CONFIG_SUNRPC_DEBUG is not set -# CONFIG_CEPH_FS is not set -CONFIG_CIFS=m -CONFIG_CIFS_STATS2=y -CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y -CONFIG_CIFS_UPCALL=y -# CONFIG_CIFS_XATTR is not set -# CONFIG_CIFS_DEBUG is not set -CONFIG_CIFS_DFS_UPCALL=y -CONFIG_CIFS_SWN_UPCALL=y -CONFIG_CIFS_FSCACHE=y -# CONFIG_SMB_SERVER is not set -CONFIG_SMBFS=m -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set -CONFIG_NLS=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=y -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -CONFIG_NLS_CODEPAGE_866=y -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -# CONFIG_NLS_CODEPAGE_1251 is not set -# CONFIG_NLS_ASCII is not set -CONFIG_NLS_ISO8859_1=y -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -# CONFIG_NLS_ISO8859_5 is not set -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -CONFIG_NLS_KOI8_R=y -# CONFIG_NLS_KOI8_U is not set -# CONFIG_NLS_MAC_ROMAN is not set -# CONFIG_NLS_MAC_CELTIC is not set -# CONFIG_NLS_MAC_CENTEURO is not set -# CONFIG_NLS_MAC_CROATIAN is not set -# CONFIG_NLS_MAC_CYRILLIC is not set -# CONFIG_NLS_MAC_GAELIC is not set -# CONFIG_NLS_MAC_GREEK is not set -# CONFIG_NLS_MAC_ICELAND is not set -# CONFIG_NLS_MAC_INUIT is not set -# CONFIG_NLS_MAC_ROMANIAN is not set -# CONFIG_NLS_MAC_TURKISH is not set -CONFIG_NLS_UTF8=y -CONFIG_NLS_UCS2_UTILS=m -# CONFIG_DLM is not set -CONFIG_UNICODE=y -# CONFIG_UNICODE_NORMALIZATION_SELFTEST is not set -CONFIG_IO_WQ=y -# end of File systems - -# -# Security options -# -CONFIG_KEYS=y -CONFIG_KEYS_REQUEST_CACHE=y -CONFIG_PERSISTENT_KEYRINGS=y -# CONFIG_TRUSTED_KEYS is not set -CONFIG_ENCRYPTED_KEYS=y -# CONFIG_USER_DECRYPTED_DATA is not set -# CONFIG_KEY_DH_OPERATIONS is not set -CONFIG_KEY_NOTIFICATIONS=y -CONFIG_SECURITY_DMESG_RESTRICT=y -# CONFIG_PROC_MEM_ALWAYS_FORCE is not set -CONFIG_PROC_MEM_FORCE_PTRACE=y -# CONFIG_PROC_MEM_NO_FORCE is not set -CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y -CONFIG_SECURITY_TIOCSTI_RESTRICT=y -CONFIG_SECURITY=y -CONFIG_SECURITYFS=y -CONFIG_SECURITY_NETWORK=y -# CONFIG_SECURITY_NETWORK_XFRM is not set -CONFIG_SECURITY_PATH=y -# CONFIG_INTEL_TXT is not set -CONFIG_LSM_MMAP_MIN_ADDR=65536 -CONFIG_HARDENED_USERCOPY=y -CONFIG_FORTIFY_SOURCE=y -# CONFIG_STATIC_USERMODEHELPER is not set -CONFIG_SECURITY_SELINUX=y -CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_DEVELOP=y -CONFIG_SECURITY_SELINUX_AVC_STATS=y -CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9 -CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256 -# CONFIG_SECURITY_SELINUX_DEBUG is not set -# CONFIG_SECURITY_SMACK is not set -# CONFIG_SECURITY_TOMOYO is not set -CONFIG_SECURITY_APPARMOR=y -# CONFIG_SECURITY_APPARMOR_DEBUG is not set -CONFIG_SECURITY_APPARMOR_INTROSPECT_POLICY=y -CONFIG_SECURITY_APPARMOR_HASH=y -CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y -CONFIG_SECURITY_APPARMOR_EXPORT_BINARY=y -CONFIG_SECURITY_APPARMOR_PARANOID_LOAD=y -# CONFIG_SECURITY_LOADPIN is not set -CONFIG_SECURITY_YAMA=y -CONFIG_SECURITY_SAFESETID=y -CONFIG_SECURITY_LOCKDOWN_LSM=y -CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y -CONFIG_LOCK_DOWN_KERNEL_FORCE_NONE=y -# CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY is not set -# CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY is not set -CONFIG_SECURITY_LANDLOCK=y -CONFIG_INTEGRITY=y -CONFIG_INTEGRITY_SIGNATURE=y -CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y -CONFIG_INTEGRITY_TRUSTED_KEYRING=y -CONFIG_INTEGRITY_AUDIT=y -# CONFIG_IMA is not set -# CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set -# CONFIG_EVM is not set -# CONFIG_DEFAULT_SECURITY_SELINUX is not set -# CONFIG_DEFAULT_SECURITY_APPARMOR is not set -CONFIG_DEFAULT_SECURITY_DAC=y -CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock" - -# -# Kernel hardening options -# - -# -# Memory initialization -# -CONFIG_CC_HAS_AUTO_VAR_INIT_PATTERN=y -CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_BARE=y -CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO=y -# CONFIG_INIT_STACK_NONE is not set -# CONFIG_INIT_STACK_ALL_PATTERN is not set -CONFIG_INIT_STACK_ALL_ZERO=y -CONFIG_GCC_PLUGIN_STACKLEAK=y -CONFIG_STACKLEAK_TRACK_MIN_SIZE=100 -# CONFIG_STACKLEAK_METRICS is not set -# CONFIG_STACKLEAK_RUNTIME_DISABLE is not set -CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y -CONFIG_INIT_ON_FREE_DEFAULT_ON=y -CONFIG_CC_HAS_ZERO_CALL_USED_REGS=y -CONFIG_ZERO_CALL_USED_REGS=y -CONFIG_PAGE_SANITIZE_VERIFY=y -CONFIG_SLAB_SANITIZE_VERIFY=y -# end of Memory initialization - -# -# Hardening of kernel data structures -# -CONFIG_LIST_HARDENED=y -CONFIG_BUG_ON_DATA_CORRUPTION=y -# end of Hardening of kernel data structures - -# CONFIG_RANDSTRUCT_NONE is not set -CONFIG_RANDSTRUCT_FULL=y -# CONFIG_RANDSTRUCT_PERFORMANCE is not set -CONFIG_RANDSTRUCT=y -CONFIG_GCC_PLUGIN_RANDSTRUCT=y -# end of Kernel hardening options -# end of Security options - -CONFIG_XOR_BLOCKS=m -CONFIG_ASYNC_CORE=m -CONFIG_ASYNC_MEMCPY=m -CONFIG_ASYNC_XOR=m -CONFIG_ASYNC_PQ=m -CONFIG_ASYNC_RAID6_RECOV=m -CONFIG_CRYPTO=y - -# -# Crypto core or helper -# -CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_ALGAPI2=y -CONFIG_CRYPTO_AEAD=y -CONFIG_CRYPTO_AEAD2=y -CONFIG_CRYPTO_SIG=y -CONFIG_CRYPTO_SIG2=y -CONFIG_CRYPTO_SKCIPHER=y -CONFIG_CRYPTO_SKCIPHER2=y -CONFIG_CRYPTO_HASH=y -CONFIG_CRYPTO_HASH2=y -CONFIG_CRYPTO_RNG=y -CONFIG_CRYPTO_RNG2=y -CONFIG_CRYPTO_RNG_DEFAULT=y -CONFIG_CRYPTO_AKCIPHER2=y -CONFIG_CRYPTO_AKCIPHER=y -CONFIG_CRYPTO_KPP2=y -CONFIG_CRYPTO_KPP=m -CONFIG_CRYPTO_ACOMP2=y -CONFIG_CRYPTO_MANAGER=y -CONFIG_CRYPTO_MANAGER2=y -# CONFIG_CRYPTO_USER is not set -CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y -CONFIG_CRYPTO_NULL=y -CONFIG_CRYPTO_NULL2=y -CONFIG_CRYPTO_PCRYPT=y -CONFIG_CRYPTO_CRYPTD=y -CONFIG_CRYPTO_AUTHENC=y -# CONFIG_CRYPTO_TEST is not set -CONFIG_CRYPTO_SIMD=y -CONFIG_CRYPTO_ENGINE=m -# end of Crypto core or helper - -# -# Public-key cryptography -# -CONFIG_CRYPTO_RSA=y -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_DH_RFC7919_GROUPS=y -CONFIG_CRYPTO_ECC=y -CONFIG_CRYPTO_ECDH=m -CONFIG_CRYPTO_ECDSA=y -CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m -# end of Public-key cryptography - -# -# Block ciphers -# -CONFIG_CRYPTO_AES=y -CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_ARIA=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_BLOWFISH_COMMON=m -CONFIG_CRYPTO_CAMELLIA=m -CONFIG_CRYPTO_CAST_COMMON=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_SM4=m -CONFIG_CRYPTO_SM4_GENERIC=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_TWOFISH_COMMON=m -# end of Block ciphers - -# -# Length-preserving ciphers and modes -# -CONFIG_CRYPTO_ADIANTUM=m -CONFIG_CRYPTO_CHACHA20=m -CONFIG_CRYPTO_CBC=y -CONFIG_CRYPTO_CTR=y -CONFIG_CRYPTO_CTS=y -CONFIG_CRYPTO_ECB=y -CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XCTR=m -CONFIG_CRYPTO_XTS=y -CONFIG_CRYPTO_NHPOLY1305=m -# end of Length-preserving ciphers and modes - -# -# AEAD (authenticated encryption with associated data) ciphers -# -CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m -CONFIG_CRYPTO_GENIV=m -CONFIG_CRYPTO_SEQIV=m -CONFIG_CRYPTO_ECHAINIV=m -CONFIG_CRYPTO_ESSIV=m -# end of AEAD (authenticated encryption with associated data) ciphers - -# -# Hashes, digests, and MACs -# -CONFIG_CRYPTO_BLAKE2B=y -CONFIG_CRYPTO_CMAC=m -CONFIG_CRYPTO_GHASH=m -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_POLYVAL=m -CONFIG_CRYPTO_POLY1305=m -# CONFIG_CRYPTO_RMD160 is not set -CONFIG_CRYPTO_SHA1=y -CONFIG_CRYPTO_SHA256=y -CONFIG_CRYPTO_SHA512=y -CONFIG_CRYPTO_SHA3=y -CONFIG_CRYPTO_SM3=m -# CONFIG_CRYPTO_SM3_GENERIC is not set -CONFIG_CRYPTO_STREEBOG=m -# CONFIG_CRYPTO_VMAC is not set -# CONFIG_CRYPTO_WP512 is not set -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_XXHASH=y -# end of Hashes, digests, and MACs - -# -# CRCs (cyclic redundancy checks) -# -CONFIG_CRYPTO_CRC32C=y -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRCT10DIF=y -CONFIG_CRYPTO_CRC64_ROCKSOFT=y -# end of CRCs (cyclic redundancy checks) - -# -# Compression -# -CONFIG_CRYPTO_DEFLATE=m -CONFIG_CRYPTO_LZO=y -# CONFIG_CRYPTO_842 is not set -CONFIG_CRYPTO_LZ4=m -CONFIG_CRYPTO_LZ4HC=m -CONFIG_CRYPTO_ZSTD=y -# end of Compression - -# -# Random number generation -# -CONFIG_CRYPTO_ANSI_CPRNG=y -CONFIG_CRYPTO_DRBG_MENU=y -CONFIG_CRYPTO_DRBG_HMAC=y -CONFIG_CRYPTO_DRBG_HASH=y -CONFIG_CRYPTO_DRBG_CTR=y -CONFIG_CRYPTO_DRBG=y -CONFIG_CRYPTO_JITTERENTROPY=y -CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKS=64 -CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKSIZE=32 -CONFIG_CRYPTO_JITTERENTROPY_OSR=1 -# end of Random number generation - -# -# Userspace interface -# -CONFIG_CRYPTO_USER_API=y -CONFIG_CRYPTO_USER_API_HASH=y -CONFIG_CRYPTO_USER_API_SKCIPHER=y -CONFIG_CRYPTO_USER_API_RNG=y -# CONFIG_CRYPTO_USER_API_RNG_CAVP is not set -CONFIG_CRYPTO_USER_API_AEAD=m -# CONFIG_CRYPTO_USER_API_ENABLE_OBSOLETE is not set -# end of Userspace interface - -CONFIG_CRYPTO_HASH_INFO=y - -# -# Accelerated Cryptographic Algorithms for CPU (x86) -# -CONFIG_CRYPTO_CURVE25519_X86=m -CONFIG_CRYPTO_AES_NI_INTEL=y -CONFIG_CRYPTO_BLOWFISH_X86_64=m -CONFIG_CRYPTO_CAMELLIA_X86_64=m -CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m -CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m -CONFIG_CRYPTO_CAST5_AVX_X86_64=m -CONFIG_CRYPTO_CAST6_AVX_X86_64=m -CONFIG_CRYPTO_DES3_EDE_X86_64=m -CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m -CONFIG_CRYPTO_SERPENT_AVX_X86_64=m -CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m -CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64=m -CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64=m -CONFIG_CRYPTO_TWOFISH_X86_64=m -CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m -CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m -CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64=m -CONFIG_CRYPTO_ARIA_AESNI_AVX2_X86_64=m -CONFIG_CRYPTO_ARIA_GFNI_AVX512_X86_64=m -CONFIG_CRYPTO_CHACHA20_X86_64=y -CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m -CONFIG_CRYPTO_NHPOLY1305_SSE2=m -CONFIG_CRYPTO_NHPOLY1305_AVX2=m -CONFIG_CRYPTO_BLAKE2S_X86=y -CONFIG_CRYPTO_POLYVAL_CLMUL_NI=m -CONFIG_CRYPTO_POLY1305_X86_64=y -CONFIG_CRYPTO_SHA1_SSSE3=m -CONFIG_CRYPTO_SHA256_SSSE3=m -CONFIG_CRYPTO_SHA512_SSSE3=m -CONFIG_CRYPTO_SM3_AVX_X86_64=m -CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m -CONFIG_CRYPTO_CRC32C_INTEL=y -CONFIG_CRYPTO_CRC32_PCLMUL=m -CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m -# end of Accelerated Cryptographic Algorithms for CPU (x86) - -CONFIG_CRYPTO_HW=y -# CONFIG_CRYPTO_DEV_PADLOCK is not set -# CONFIG_CRYPTO_DEV_ATMEL_ECC is not set -# CONFIG_CRYPTO_DEV_ATMEL_SHA204A is not set -CONFIG_CRYPTO_DEV_CCP=y -CONFIG_CRYPTO_DEV_CCP_DD=m -CONFIG_CRYPTO_DEV_SP_CCP=y -CONFIG_CRYPTO_DEV_CCP_CRYPTO=m -CONFIG_CRYPTO_DEV_SP_PSP=y -# CONFIG_CRYPTO_DEV_CCP_DEBUGFS is not set -# CONFIG_CRYPTO_DEV_NITROX_CNN55XX is not set -CONFIG_CRYPTO_DEV_QAT=m -CONFIG_CRYPTO_DEV_QAT_DH895xCC=m -CONFIG_CRYPTO_DEV_QAT_C3XXX=m -CONFIG_CRYPTO_DEV_QAT_C62X=m -CONFIG_CRYPTO_DEV_QAT_4XXX=m -# CONFIG_CRYPTO_DEV_QAT_420XX is not set -CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m -CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m -CONFIG_CRYPTO_DEV_QAT_C62XVF=m -# CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION is not set -CONFIG_CRYPTO_DEV_VIRTIO=m -# CONFIG_CRYPTO_DEV_SAFEXCEL is not set -# CONFIG_CRYPTO_DEV_AMLOGIC_GXL is not set -CONFIG_ASYMMETRIC_KEY_TYPE=y -CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y -CONFIG_X509_CERTIFICATE_PARSER=y -CONFIG_PKCS8_PRIVATE_KEY_PARSER=m -CONFIG_PKCS7_MESSAGE_PARSER=y -# CONFIG_PKCS7_TEST_KEY is not set -# CONFIG_SIGNED_PE_FILE_VERIFICATION is not set -# CONFIG_FIPS_SIGNATURE_SELFTEST is not set - -# -# Certificates for signature checking -# -CONFIG_MODULE_SIG_KEY="certs/signing_key.pem" -# CONFIG_MODULE_SIG_KEY_TYPE_RSA is not set -CONFIG_MODULE_SIG_KEY_TYPE_ECDSA=y -CONFIG_SYSTEM_TRUSTED_KEYRING=y -CONFIG_SYSTEM_TRUSTED_KEYS="" -# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set -# CONFIG_SECONDARY_TRUSTED_KEYRING is not set -# CONFIG_SYSTEM_BLACKLIST_KEYRING is not set -# end of Certificates for signature checking - -CONFIG_BINARY_PRINTF=y - -# -# Library routines -# -CONFIG_RAID6_PQ=m -CONFIG_RAID6_PQ_BENCHMARK=y -CONFIG_PACKING=y -CONFIG_BITREVERSE=y -CONFIG_GENERIC_STRNCPY_FROM_USER=y -CONFIG_GENERIC_STRNLEN_USER=y -CONFIG_GENERIC_NET_UTILS=y -# CONFIG_CORDIC is not set -# CONFIG_PRIME_NUMBERS is not set -CONFIG_RATIONAL=y -CONFIG_GENERIC_IOMAP=y -CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y -CONFIG_ARCH_HAS_FAST_MULTIPLIER=y -CONFIG_ARCH_USE_SYM_ANNOTATIONS=y - -# -# Crypto library routines -# -CONFIG_CRYPTO_LIB_UTILS=y -CONFIG_CRYPTO_LIB_AES=y -CONFIG_CRYPTO_LIB_AESCFB=m -CONFIG_CRYPTO_LIB_ARC4=m -CONFIG_CRYPTO_LIB_GF128MUL=y -CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S=y -CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y -CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA=y -CONFIG_CRYPTO_LIB_CHACHA_GENERIC=y -CONFIG_CRYPTO_LIB_CHACHA=m -CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_DES=m -CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11 -CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=y -CONFIG_CRYPTO_LIB_POLY1305_GENERIC=y -CONFIG_CRYPTO_LIB_POLY1305=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRYPTO_LIB_SHA1=y -CONFIG_CRYPTO_LIB_SHA256=y -# end of Crypto library routines - -CONFIG_CRC_CCITT=y -CONFIG_CRC16=y -CONFIG_CRC_T10DIF=y -CONFIG_CRC64_ROCKSOFT=y -CONFIG_CRC_ITU_T=y -CONFIG_CRC32=y -# CONFIG_CRC32_SELFTEST is not set -CONFIG_CRC32_SLICEBY8=y -# CONFIG_CRC32_SLICEBY4 is not set -# CONFIG_CRC32_SARWATE is not set -# CONFIG_CRC32_BIT is not set -CONFIG_CRC64=y -# CONFIG_CRC4 is not set -CONFIG_CRC7=m -CONFIG_LIBCRC32C=y -CONFIG_CRC8=m -CONFIG_XXHASH=y -# CONFIG_RANDOM32_SELFTEST is not set -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y -CONFIG_LZO_COMPRESS=y -CONFIG_LZO_DECOMPRESS=y -CONFIG_LZ4_COMPRESS=m -CONFIG_LZ4HC_COMPRESS=m -CONFIG_LZ4_DECOMPRESS=m -CONFIG_ZSTD_COMMON=y -CONFIG_ZSTD_COMPRESS=y -CONFIG_ZSTD_DECOMPRESS=y -CONFIG_XZ_DEC=y -CONFIG_XZ_DEC_X86=y -CONFIG_XZ_DEC_POWERPC=y -CONFIG_XZ_DEC_ARM=y -CONFIG_XZ_DEC_ARMTHUMB=y -CONFIG_XZ_DEC_SPARC=y -CONFIG_XZ_DEC_MICROLZMA=y -CONFIG_XZ_DEC_BCJ=y -# CONFIG_XZ_DEC_TEST is not set -CONFIG_DECOMPRESS_XZ=y -CONFIG_DECOMPRESS_ZSTD=y -CONFIG_GENERIC_ALLOCATOR=y -CONFIG_REED_SOLOMON=m -CONFIG_REED_SOLOMON_ENC8=y -CONFIG_REED_SOLOMON_DEC8=y -CONFIG_TEXTSEARCH=y -CONFIG_TEXTSEARCH_KMP=m -CONFIG_TEXTSEARCH_BM=m -CONFIG_TEXTSEARCH_FSM=m -CONFIG_INTERVAL_TREE=y -CONFIG_XARRAY_MULTI=y -CONFIG_ASSOCIATIVE_ARRAY=y -CONFIG_CLOSURES=y -CONFIG_HAS_IOMEM=y -CONFIG_HAS_IOPORT=y -CONFIG_HAS_IOPORT_MAP=y -CONFIG_HAS_DMA=y -CONFIG_DMA_OPS=y -CONFIG_NEED_SG_DMA_FLAGS=y -CONFIG_NEED_SG_DMA_LENGTH=y -CONFIG_NEED_DMA_MAP_STATE=y -CONFIG_ARCH_DMA_ADDR_T_64BIT=y -CONFIG_SWIOTLB=y -# CONFIG_SWIOTLB_DYNAMIC is not set -CONFIG_DMA_NEED_SYNC=y -# CONFIG_DMA_API_DEBUG is not set -# CONFIG_DMA_MAP_BENCHMARK is not set -CONFIG_SGL_ALLOC=y -CONFIG_CHECK_SIGNATURE=y -CONFIG_CPU_RMAP=y -CONFIG_DQL=y -CONFIG_GLOB=y -# CONFIG_GLOB_SELFTEST is not set -CONFIG_NLATTR=y -CONFIG_LRU_CACHE=m -CONFIG_CLZ_TAB=y -# CONFIG_IRQ_POLL is not set -CONFIG_MPILIB=y -CONFIG_SIGNATURE=y -CONFIG_DIMLIB=y -# CONFIG_CTF is not set -CONFIG_OID_REGISTRY=y -CONFIG_UCS2_STRING=y -CONFIG_HAVE_GENERIC_VDSO=y -CONFIG_GENERIC_GETTIMEOFDAY=y -CONFIG_GENERIC_VDSO_TIME_NS=y -CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT=y -CONFIG_VDSO_GETRANDOM=y -CONFIG_FONT_SUPPORT=y -# CONFIG_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -CONFIG_SG_POOL=y -CONFIG_ARCH_HAS_PMEM_API=y -CONFIG_MEMREGION=y -CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION=y -CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y -CONFIG_ARCH_HAS_COPY_MC=y -CONFIG_ARCH_STACKWALK=y -CONFIG_STACKDEPOT=y -CONFIG_STACKDEPOT_MAX_FRAMES=64 -CONFIG_SBITMAP=y -# CONFIG_LWQ_TEST is not set -# end of Library routines - -CONFIG_FIRMWARE_TABLE=y - -# -# Kernel hacking -# - -# -# printk and dmesg options -# -CONFIG_PRINTK_TIME=y -# CONFIG_PRINTK_CALLER is not set -# CONFIG_STACKTRACE_BUILD_ID is not set -CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 -CONFIG_CONSOLE_LOGLEVEL_QUIET=4 -CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 -# CONFIG_BOOT_PRINTK_DELAY is not set -# CONFIG_DYNAMIC_DEBUG is not set -# CONFIG_DYNAMIC_DEBUG_CORE is not set -CONFIG_SYMBOLIC_ERRNAME=y -CONFIG_DEBUG_BUGVERBOSE=y -# end of printk and dmesg options - -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_MISC=y - -# -# Compile-time checks and compiler options -# -CONFIG_AS_HAS_NON_CONST_ULEB128=y -CONFIG_DEBUG_INFO_NONE=y -# CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set -# CONFIG_DEBUG_INFO_DWARF4 is not set -# CONFIG_DEBUG_INFO_DWARF5 is not set -CONFIG_FRAME_WARN=2048 -CONFIG_STRIP_ASM_SYMS=y -# CONFIG_READABLE_ASM is not set -# CONFIG_HEADERS_INSTALL is not set -# CONFIG_DEBUG_SECTION_MISMATCH is not set -CONFIG_SECTION_MISMATCH_WARN_ONLY=y -# CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE is not set -CONFIG_OBJTOOL=y -# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set -# end of Compile-time checks and compiler options - -# -# Generic Kernel Debugging Instruments -# -CONFIG_MAGIC_SYSRQ=y -CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 -CONFIG_MAGIC_SYSRQ_SERIAL=y -CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE="" -CONFIG_DEBUG_FS=y -CONFIG_DEBUG_FS_ALLOW_ALL=y -# CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set -# CONFIG_DEBUG_FS_ALLOW_NONE is not set -CONFIG_HAVE_ARCH_KGDB=y -# CONFIG_KGDB is not set -CONFIG_ARCH_HAS_UBSAN=y -# CONFIG_UBSAN is not set -CONFIG_HAVE_ARCH_KCSAN=y -CONFIG_HAVE_KCSAN_COMPILER=y -# CONFIG_KCSAN is not set -# end of Generic Kernel Debugging Instruments - -# -# Networking Debugging -# -# CONFIG_NET_DEV_REFCNT_TRACKER is not set -# CONFIG_NET_NS_REFCNT_TRACKER is not set -# CONFIG_DEBUG_NET is not set -# end of Networking Debugging - -# -# Memory Debugging -# -CONFIG_PAGE_EXTENSION=y -# CONFIG_DEBUG_PAGEALLOC is not set -CONFIG_SLUB_DEBUG=y -# CONFIG_SLUB_DEBUG_ON is not set -# CONFIG_PAGE_OWNER is not set -# CONFIG_PAGE_TABLE_CHECK is not set -CONFIG_PAGE_POISONING=y -# CONFIG_DEBUG_PAGE_REF is not set -CONFIG_DEBUG_RODATA_TEST=y -CONFIG_ARCH_HAS_DEBUG_WX=y -CONFIG_DEBUG_WX=y -CONFIG_GENERIC_PTDUMP=y -CONFIG_PTDUMP_CORE=y -# CONFIG_PTDUMP_DEBUGFS is not set -CONFIG_HAVE_DEBUG_KMEMLEAK=y -# CONFIG_DEBUG_KMEMLEAK is not set -# CONFIG_PER_VMA_LOCK_STATS is not set -# CONFIG_DEBUG_OBJECTS is not set -# CONFIG_SHRINKER_DEBUG is not set -# CONFIG_DEBUG_STACK_USAGE is not set -CONFIG_SCHED_STACK_END_CHECK=y -CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y -# CONFIG_DEBUG_VM is not set -# CONFIG_DEBUG_VM_PGTABLE is not set -CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y -CONFIG_DEBUG_VIRTUAL=y -CONFIG_DEBUG_MEMORY_INIT=y -# CONFIG_DEBUG_PER_CPU_MAPS is not set -CONFIG_ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP=y -# CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP is not set -# CONFIG_MEM_ALLOC_PROFILING is not set -CONFIG_HAVE_ARCH_KASAN=y -CONFIG_HAVE_ARCH_KASAN_VMALLOC=y -CONFIG_CC_HAS_KASAN_GENERIC=y -CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y -# CONFIG_KASAN is not set -CONFIG_HAVE_ARCH_KFENCE=y -CONFIG_KFENCE=y -CONFIG_KFENCE_SAMPLE_INTERVAL=100 -CONFIG_KFENCE_NUM_OBJECTS=255 -CONFIG_KFENCE_DEFERRABLE=y -CONFIG_KFENCE_STRESS_TEST_FAULTS=0 -CONFIG_KFENCE_BUG_ON_DATA_CORRUPTION=y -CONFIG_HAVE_ARCH_KMSAN=y -# end of Memory Debugging - -# CONFIG_DEBUG_SHIRQ is not set - -# -# Debug Oops, Lockups and Hangs -# -# CONFIG_PANIC_ON_OOPS is not set -CONFIG_PANIC_ON_OOPS_VALUE=0 -CONFIG_PANIC_TIMEOUT=0 -CONFIG_LOCKUP_DETECTOR=y -CONFIG_SOFTLOCKUP_DETECTOR=y -CONFIG_SOFTLOCKUP_DETECTOR_INTR_STORM=y -# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set -CONFIG_HAVE_HARDLOCKUP_DETECTOR_BUDDY=y -CONFIG_HARDLOCKUP_DETECTOR=y -# CONFIG_HARDLOCKUP_DETECTOR_PREFER_BUDDY is not set -CONFIG_HARDLOCKUP_DETECTOR_PERF=y -# CONFIG_HARDLOCKUP_DETECTOR_BUDDY is not set -# CONFIG_HARDLOCKUP_DETECTOR_ARCH is not set -CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER=y -CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y -CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 -# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set -CONFIG_WQ_WATCHDOG=y -# CONFIG_WQ_CPU_INTENSIVE_REPORT is not set -# CONFIG_TEST_LOCKUP is not set -# end of Debug Oops, Lockups and Hangs - -# -# Scheduler Debugging -# -# CONFIG_SCHED_DEBUG is not set -CONFIG_SCHED_INFO=y -# CONFIG_SCHEDSTATS is not set -# end of Scheduler Debugging - -# CONFIG_DEBUG_TIMEKEEPING is not set -CONFIG_DEBUG_PREEMPT=y - -# -# Lock Debugging (spinlocks, mutexes, etc...) -# -CONFIG_LOCK_DEBUGGING_SUPPORT=y -# CONFIG_PROVE_LOCKING is not set -# CONFIG_LOCK_STAT is not set -# CONFIG_DEBUG_RT_MUTEXES is not set -# CONFIG_DEBUG_SPINLOCK is not set -CONFIG_DEBUG_MUTEXES=y -# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set -# CONFIG_DEBUG_RWSEMS is not set -# CONFIG_DEBUG_LOCK_ALLOC is not set -# CONFIG_DEBUG_ATOMIC_SLEEP is not set -# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set -# CONFIG_LOCK_TORTURE_TEST is not set -# CONFIG_WW_MUTEX_SELFTEST is not set -# CONFIG_SCF_TORTURE_TEST is not set -# CONFIG_CSD_LOCK_WAIT_DEBUG is not set -# end of Lock Debugging (spinlocks, mutexes, etc...) - -# CONFIG_NMI_CHECK_CPU is not set -# CONFIG_DEBUG_IRQFLAGS is not set -CONFIG_STACKTRACE=y -# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set -# CONFIG_DEBUG_KOBJECT is not set - -# -# Debug kernel data structures -# -CONFIG_DEBUG_LIST=y -# CONFIG_DEBUG_PLIST is not set -CONFIG_DEBUG_SG=y -CONFIG_DEBUG_NOTIFIERS=y -# CONFIG_DEBUG_CLOSURES is not set -# CONFIG_DEBUG_MAPLE_TREE is not set -# end of Debug kernel data structures - -# -# RCU Debugging -# -# CONFIG_RCU_SCALE_TEST is not set -# CONFIG_RCU_TORTURE_TEST is not set -# CONFIG_RCU_REF_SCALE_TEST is not set -CONFIG_RCU_CPU_STALL_TIMEOUT=60 -CONFIG_RCU_EXP_CPU_STALL_TIMEOUT=0 -# CONFIG_RCU_CPU_STALL_CPUTIME is not set -# CONFIG_RCU_TRACE is not set -# CONFIG_RCU_EQS_DEBUG is not set -# end of RCU Debugging - -# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set -# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set -# CONFIG_LATENCYTOP is not set -# CONFIG_DEBUG_CGROUP_REF is not set -CONFIG_USER_STACKTRACE_SUPPORT=y -CONFIG_NOP_TRACER=y -CONFIG_HAVE_RETHOOK=y -CONFIG_RETHOOK=y -CONFIG_HAVE_FUNCTION_TRACER=y -CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y -CONFIG_HAVE_FUNCTION_GRAPH_RETVAL=y -CONFIG_HAVE_DYNAMIC_FTRACE=y -CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y -CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y -CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y -CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE=y -CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y -CONFIG_HAVE_SYSCALL_TRACEPOINTS=y -CONFIG_HAVE_FENTRY=y -CONFIG_HAVE_OBJTOOL_MCOUNT=y -CONFIG_HAVE_OBJTOOL_NOP_MCOUNT=y -CONFIG_HAVE_C_RECORDMCOUNT=y -CONFIG_HAVE_BUILDTIME_MCOUNT_SORT=y -CONFIG_BUILDTIME_MCOUNT_SORT=y -CONFIG_TRACER_MAX_TRACE=y -CONFIG_TRACE_CLOCK=y -CONFIG_RING_BUFFER=y -CONFIG_EVENT_TRACING=y -CONFIG_CONTEXT_SWITCH_TRACER=y -CONFIG_TRACING=y -CONFIG_GENERIC_TRACER=y -CONFIG_TRACING_SUPPORT=y -CONFIG_FTRACE=y -# CONFIG_BOOTTIME_TRACING is not set -CONFIG_FUNCTION_TRACER=y -CONFIG_FUNCTION_GRAPH_TRACER=y -CONFIG_FUNCTION_GRAPH_RETVAL=y -CONFIG_DYNAMIC_FTRACE=y -CONFIG_DYNAMIC_FTRACE_WITH_REGS=y -CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y -CONFIG_DYNAMIC_FTRACE_WITH_ARGS=y -CONFIG_FPROBE=y -# CONFIG_FUNCTION_PROFILER is not set -# CONFIG_STACK_TRACER is not set -# CONFIG_IRQSOFF_TRACER is not set -# CONFIG_PREEMPT_TRACER is not set -# CONFIG_SCHED_TRACER is not set -# CONFIG_HWLAT_TRACER is not set -CONFIG_OSNOISE_TRACER=y -CONFIG_TIMERLAT_TRACER=y -# CONFIG_MMIOTRACE is not set -# CONFIG_FTRACE_SYSCALLS is not set -# CONFIG_TRACER_SNAPSHOT is not set -CONFIG_BRANCH_PROFILE_NONE=y -# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FPROBE_EVENTS=y -CONFIG_KPROBE_EVENTS=y -# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set -CONFIG_UPROBE_EVENTS=y -CONFIG_BPF_EVENTS=y -CONFIG_DYNAMIC_EVENTS=y -CONFIG_PROBE_EVENTS=y -# CONFIG_BPF_KPROBE_OVERRIDE is not set -CONFIG_FTRACE_MCOUNT_RECORD=y -CONFIG_FTRACE_MCOUNT_USE_CC=y -# CONFIG_SYNTH_EVENTS is not set -# CONFIG_USER_EVENTS is not set -# CONFIG_HIST_TRIGGERS is not set -# CONFIG_TRACE_EVENT_INJECT is not set -# CONFIG_TRACEPOINT_BENCHMARK is not set -# CONFIG_RING_BUFFER_BENCHMARK is not set -# CONFIG_TRACE_EVAL_MAP_FILE is not set -# CONFIG_FTRACE_RECORD_RECURSION is not set -# CONFIG_FTRACE_VALIDATE_RCU_IS_WATCHING is not set -# CONFIG_FTRACE_STARTUP_TEST is not set -# CONFIG_FTRACE_SORT_STARTUP_TEST is not set -# CONFIG_RING_BUFFER_STARTUP_TEST is not set -# CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS is not set -# CONFIG_PREEMPTIRQ_DELAY_TEST is not set -# CONFIG_KPROBE_EVENT_GEN_TEST is not set -# CONFIG_RV is not set -# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set -# CONFIG_SAMPLES is not set -CONFIG_HAVE_SAMPLE_FTRACE_DIRECT=y -CONFIG_HAVE_SAMPLE_FTRACE_DIRECT_MULTI=y -CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y -# CONFIG_STRICT_DEVMEM is not set - -# -# x86 Debugging -# -CONFIG_X86_VERBOSE_BOOTUP=y -CONFIG_EARLY_PRINTK=y -# CONFIG_EARLY_PRINTK_DBGP is not set -# CONFIG_EARLY_PRINTK_USB_XDBC is not set -# CONFIG_EFI_PGT_DUMP is not set -# CONFIG_DEBUG_TLBFLUSH is not set -CONFIG_HAVE_MMIOTRACE_SUPPORT=y -# CONFIG_X86_DECODER_SELFTEST is not set -# CONFIG_IO_DELAY_0X80 is not set -# CONFIG_IO_DELAY_0XED is not set -# CONFIG_IO_DELAY_UDELAY is not set -CONFIG_IO_DELAY_NONE=y -# CONFIG_DEBUG_BOOT_PARAMS is not set -# CONFIG_CPA_DEBUG is not set -# CONFIG_DEBUG_ENTRY is not set -# CONFIG_DEBUG_NMI_SELFTEST is not set -# CONFIG_X86_DEBUG_FPU is not set -# CONFIG_PUNIT_ATOM_DEBUG is not set -CONFIG_UNWINDER_ORC=y -# CONFIG_UNWINDER_FRAME_POINTER is not set -# end of x86 Debugging - -# -# Kernel Testing and Coverage -# -# CONFIG_KUNIT is not set -# CONFIG_NOTIFIER_ERROR_INJECTION is not set -CONFIG_FUNCTION_ERROR_INJECTION=y -# CONFIG_FAULT_INJECTION is not set -CONFIG_ARCH_HAS_KCOV=y -CONFIG_CC_HAS_SANCOV_TRACE_PC=y -# CONFIG_KCOV is not set -CONFIG_RUNTIME_TESTING_MENU=y -# CONFIG_TEST_DHRY is not set -# CONFIG_LKDTM is not set -# CONFIG_TEST_MIN_HEAP is not set -# CONFIG_TEST_DIV64 is not set -# CONFIG_BACKTRACE_SELF_TEST is not set -# CONFIG_TEST_REF_TRACKER is not set -# CONFIG_RBTREE_TEST is not set -# CONFIG_REED_SOLOMON_TEST is not set -# CONFIG_INTERVAL_TREE_TEST is not set -# CONFIG_PERCPU_TEST is not set -# CONFIG_ATOMIC64_SELFTEST is not set -# CONFIG_ASYNC_RAID6_TEST is not set -# CONFIG_TEST_HEXDUMP is not set -# CONFIG_TEST_KSTRTOX is not set -# CONFIG_TEST_PRINTF is not set -# CONFIG_TEST_SCANF is not set -# CONFIG_TEST_BITMAP is not set -# CONFIG_TEST_UUID is not set -# CONFIG_TEST_XARRAY is not set -# CONFIG_TEST_MAPLE_TREE is not set -# CONFIG_TEST_RHASHTABLE is not set -# CONFIG_TEST_IDA is not set -# CONFIG_TEST_LKM is not set -# CONFIG_TEST_BITOPS is not set -# CONFIG_TEST_VMALLOC is not set -CONFIG_TEST_BPF=m -# CONFIG_TEST_BLACKHOLE_DEV is not set -# CONFIG_FIND_BIT_BENCHMARK is not set -# CONFIG_TEST_FIRMWARE is not set -# CONFIG_TEST_SYSCTL is not set -# CONFIG_TEST_UDELAY is not set -# CONFIG_TEST_STATIC_KEYS is not set -# CONFIG_TEST_KMOD is not set -# CONFIG_TEST_DEBUG_VIRTUAL is not set -# CONFIG_TEST_MEMCAT_P is not set -# CONFIG_TEST_MEMINIT is not set -# CONFIG_TEST_HMM is not set -# CONFIG_TEST_FREE_PAGES is not set -# CONFIG_TEST_FPU is not set -# CONFIG_TEST_CLOCKSOURCE_WATCHDOG is not set -# CONFIG_TEST_OBJPOOL is not set -CONFIG_ARCH_USE_MEMTEST=y -# CONFIG_MEMTEST is not set -# end of Kernel Testing and Coverage - -# -# Rust hacking -# -# end of Rust hacking -# end of Kernel hacking - -# -# Gentoo Linux -# -CONFIG_GENTOO_LINUX=y -CONFIG_GENTOO_LINUX_UDEV=y -CONFIG_GENTOO_LINUX_PORTAGE=y - -# -# Support for init systems, system and service managers -# -CONFIG_GENTOO_LINUX_INIT_SCRIPT=y -CONFIG_GENTOO_LINUX_INIT_SYSTEMD=y -# end of Support for init systems, system and service managers - -CONFIG_GENTOO_KERNEL_SELF_PROTECTION=y -CONFIG_GENTOO_PRINT_FIRMWARE_INFO=y -# end of Gentoo Linux diff --git a/sys-kernel/hardened-kernel/files/linux-6.12/1191-bcachefs-cherry-pick-updates-from-master-1410769.patch b/sys-kernel/hardened-kernel/files/linux-6.12/1191-bcachefs-cherry-pick-updates-from-master-1410769.patch deleted file mode 100644 index 52f4f5f..0000000 --- a/sys-kernel/hardened-kernel/files/linux-6.12/1191-bcachefs-cherry-pick-updates-from-master-1410769.patch +++ /dev/null @@ -1,24672 +0,0 @@ -From 02db0d7cbe82bc442bb758ffc03bb32f1f7db952 Mon Sep 17 00:00:00 2001 -From: Alexander Miroshnichenko -Date: Sun, 26 Jan 2025 14:49:17 +0300 -Subject: [PATCH] bcachefs: cherry-pick updates from master 1410769 -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 8bit - -Signed-off-by: Alexander Miroshnichenko ---- - .../filesystems/bcachefs/CodingStyle.rst | 2 +- - fs/bcachefs/Kconfig | 2 +- - fs/bcachefs/Makefile | 1 + - fs/bcachefs/acl.c | 11 +- - fs/bcachefs/alloc_background.c | 558 ++++++------ - fs/bcachefs/alloc_background.h | 18 +- - fs/bcachefs/alloc_background_format.h | 4 +- - fs/bcachefs/alloc_foreground.c | 315 +++---- - fs/bcachefs/alloc_foreground.h | 21 +- - fs/bcachefs/backpointers.c | 838 +++++++++++------- - fs/bcachefs/backpointers.h | 97 +- - fs/bcachefs/bbpos.h | 2 +- - fs/bcachefs/bcachefs.h | 70 +- - fs/bcachefs/bcachefs_format.h | 106 ++- - fs/bcachefs/bkey.h | 7 - - fs/bcachefs/bkey_methods.c | 29 +- - fs/bcachefs/bkey_methods.h | 15 +- - fs/bcachefs/bkey_types.h | 28 + - fs/bcachefs/btree_cache.c | 64 +- - fs/bcachefs/btree_cache.h | 14 +- - fs/bcachefs/btree_gc.c | 178 +--- - fs/bcachefs/btree_gc.h | 4 +- - fs/bcachefs/btree_io.c | 225 +++-- - fs/bcachefs/btree_io.h | 6 +- - fs/bcachefs/btree_iter.c | 593 ++++++++----- - fs/bcachefs/btree_iter.h | 134 ++- - fs/bcachefs/btree_journal_iter.c | 237 ++++- - fs/bcachefs/btree_journal_iter.h | 22 +- - fs/bcachefs/btree_journal_iter_types.h | 36 + - fs/bcachefs/btree_key_cache.c | 73 +- - fs/bcachefs/btree_locking.c | 78 +- - fs/bcachefs/btree_locking.h | 50 +- - fs/bcachefs/btree_node_scan.c | 153 ++-- - fs/bcachefs/btree_node_scan_types.h | 1 - - fs/bcachefs/btree_trans_commit.c | 207 ++--- - fs/bcachefs/btree_types.h | 42 +- - fs/bcachefs/btree_update.c | 70 +- - fs/bcachefs/btree_update.h | 29 +- - fs/bcachefs/btree_update_interior.c | 293 +++--- - fs/bcachefs/btree_update_interior.h | 3 +- - fs/bcachefs/btree_write_buffer.c | 83 +- - fs/bcachefs/buckets.c | 133 +-- - fs/bcachefs/buckets.h | 30 +- - fs/bcachefs/buckets_types.h | 2 +- - fs/bcachefs/chardev.c | 219 +---- - fs/bcachefs/checksum.c | 10 +- - fs/bcachefs/checksum.h | 2 +- - fs/bcachefs/compress.c | 127 ++- - fs/bcachefs/compress.h | 4 +- - fs/bcachefs/darray.h | 2 +- - fs/bcachefs/data_update.c | 293 ++++-- - fs/bcachefs/data_update.h | 9 +- - fs/bcachefs/debug.c | 5 +- - fs/bcachefs/dirent.c | 10 +- - fs/bcachefs/dirent.h | 9 +- - fs/bcachefs/disk_accounting.c | 150 ++-- - fs/bcachefs/disk_accounting.h | 73 +- - fs/bcachefs/ec.c | 267 +++--- - fs/bcachefs/ec.h | 5 +- - fs/bcachefs/ec_format.h | 17 + - fs/bcachefs/errcode.h | 26 +- - fs/bcachefs/error.c | 187 ++-- - fs/bcachefs/error.h | 58 +- - fs/bcachefs/extent_update.c | 4 +- - fs/bcachefs/extents.c | 290 ++---- - fs/bcachefs/extents.h | 18 +- - fs/bcachefs/extents_format.h | 15 +- - fs/bcachefs/fs-common.c | 119 ++- - fs/bcachefs/fs-common.h | 2 + - fs/bcachefs/fs-io-buffered.c | 68 +- - fs/bcachefs/fs-io-direct.c | 25 +- - fs/bcachefs/fs-io-pagecache.c | 4 +- - fs/bcachefs/fs-io.c | 54 +- - fs/bcachefs/fs-ioctl.c | 7 +- - fs/bcachefs/fs.c | 101 ++- - fs/bcachefs/fs.h | 1 + - fs/bcachefs/fsck.c | 772 ++++++++++------ - fs/bcachefs/fsck.h | 11 + - fs/bcachefs/inode.c | 169 ++-- - fs/bcachefs/inode.h | 43 +- - fs/bcachefs/inode_format.h | 15 +- - fs/bcachefs/io_misc.c | 22 +- - fs/bcachefs/io_read.c | 726 ++++++++------- - fs/bcachefs/io_read.h | 98 +- - fs/bcachefs/io_write.c | 184 ++-- - fs/bcachefs/io_write.h | 31 +- - fs/bcachefs/io_write_types.h | 2 +- - fs/bcachefs/journal.c | 252 +++--- - fs/bcachefs/journal.h | 18 +- - fs/bcachefs/journal_io.c | 222 +++-- - fs/bcachefs/journal_io.h | 2 +- - fs/bcachefs/journal_reclaim.c | 161 +++- - fs/bcachefs/journal_reclaim.h | 3 + - fs/bcachefs/journal_types.h | 18 +- - fs/bcachefs/logged_ops.c | 11 +- - fs/bcachefs/logged_ops_format.h | 5 + - fs/bcachefs/lru.c | 4 +- - fs/bcachefs/lru.h | 2 +- - fs/bcachefs/move.c | 248 +++--- - fs/bcachefs/move.h | 5 +- - fs/bcachefs/movinggc.c | 17 +- - fs/bcachefs/opts.c | 26 +- - fs/bcachefs/opts.h | 61 +- - fs/bcachefs/printbuf.h | 15 +- - fs/bcachefs/quota.c | 2 +- - fs/bcachefs/quota.h | 4 +- - fs/bcachefs/rcu_pending.c | 38 +- - fs/bcachefs/rebalance.c | 270 +++++- - fs/bcachefs/rebalance.h | 10 + - fs/bcachefs/rebalance_format.h | 53 ++ - fs/bcachefs/rebalance_types.h | 2 - - fs/bcachefs/recovery.c | 212 +++-- - fs/bcachefs/recovery.h | 2 +- - fs/bcachefs/recovery_passes.c | 112 ++- - fs/bcachefs/recovery_passes.h | 1 + - fs/bcachefs/recovery_passes_types.h | 92 +- - fs/bcachefs/reflink.c | 496 ++++++++--- - fs/bcachefs/reflink.h | 20 +- - fs/bcachefs/reflink_format.h | 7 +- - fs/bcachefs/sb-clean.c | 6 +- - fs/bcachefs/sb-counters_format.h | 165 ++-- - fs/bcachefs/sb-downgrade.c | 28 +- - fs/bcachefs/sb-errors_format.h | 56 +- - fs/bcachefs/six.c | 27 +- - fs/bcachefs/six.h | 1 + - fs/bcachefs/snapshot.c | 515 +++++------ - fs/bcachefs/snapshot.h | 17 +- - fs/bcachefs/str_hash.c | 295 ++++++ - fs/bcachefs/str_hash.h | 28 +- - fs/bcachefs/subvolume.c | 68 +- - fs/bcachefs/subvolume.h | 19 +- - fs/bcachefs/subvolume_types.h | 2 +- - fs/bcachefs/super-io.c | 83 +- - fs/bcachefs/super-io.h | 21 +- - fs/bcachefs/super.c | 54 +- - fs/bcachefs/super.h | 10 - - fs/bcachefs/sysfs.c | 60 +- - fs/bcachefs/tests.c | 26 +- - fs/bcachefs/trace.h | 103 ++- - fs/bcachefs/util.h | 32 + - fs/bcachefs/varint.c | 5 +- - fs/bcachefs/xattr.c | 13 +- - fs/bcachefs/xattr.h | 5 +- - fs/fs_parser.c | 3 +- - include/linux/fs_parser.h | 2 + - include/linux/min_heap.h | 4 +- - 146 files changed, 7954 insertions(+), 5223 deletions(-) - create mode 100644 fs/bcachefs/btree_journal_iter_types.h - create mode 100644 fs/bcachefs/rebalance_format.h - create mode 100644 fs/bcachefs/str_hash.c - -diff --git a/Documentation/filesystems/bcachefs/CodingStyle.rst b/Documentation/filesystems/bcachefs/CodingStyle.rst -index 01de555e21d8..b29562a6bf55 100644 ---- a/Documentation/filesystems/bcachefs/CodingStyle.rst -+++ b/Documentation/filesystems/bcachefs/CodingStyle.rst -@@ -183,4 +183,4 @@ even better as a code comment. - A good code comment is wonderful, but even better is the comment that didn't - need to exist because the code was so straightforward as to be obvious; - organized into small clean and tidy modules, with clear and descriptive names --for functions and variable, where every line of code has a clear purpose. -+for functions and variables, where every line of code has a clear purpose. -diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig -index 5bac803ea367..e8549d04dcb8 100644 ---- a/fs/bcachefs/Kconfig -+++ b/fs/bcachefs/Kconfig -@@ -89,7 +89,7 @@ config BCACHEFS_SIX_OPTIMISTIC_SPIN - - config BCACHEFS_PATH_TRACEPOINTS - bool "Extra btree_path tracepoints" -- depends on BCACHEFS_FS -+ depends on BCACHEFS_FS && TRACING - help - Enable extra tracepoints for debugging btree_path operations; we don't - normally want these enabled because they happen at very high rates. -diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile -index 56d20e219f59..d2689388d5e8 100644 ---- a/fs/bcachefs/Makefile -+++ b/fs/bcachefs/Makefile -@@ -82,6 +82,7 @@ bcachefs-y := \ - siphash.o \ - six.o \ - snapshot.o \ -+ str_hash.o \ - subvolume.o \ - super.o \ - super-io.o \ -diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c -index 87f1be9d4db4..99487727ae64 100644 ---- a/fs/bcachefs/acl.c -+++ b/fs/bcachefs/acl.c -@@ -184,11 +184,6 @@ static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans, - return ERR_PTR(-EINVAL); - } - --#define acl_for_each_entry(acl, acl_e) \ -- for (acl_e = acl->a_entries; \ -- acl_e < acl->a_entries + acl->a_count; \ -- acl_e++) -- - /* - * Convert from in-memory to filesystem representation. - */ -@@ -199,11 +194,11 @@ bch2_acl_to_xattr(struct btree_trans *trans, - { - struct bkey_i_xattr *xattr; - bch_acl_header *acl_header; -- const struct posix_acl_entry *acl_e; -+ const struct posix_acl_entry *acl_e, *pe; - void *outptr; - unsigned nr_short = 0, nr_long = 0, acl_len, u64s; - -- acl_for_each_entry(acl, acl_e) { -+ FOREACH_ACL_ENTRY(acl_e, acl, pe) { - switch (acl_e->e_tag) { - case ACL_USER: - case ACL_GROUP: -@@ -241,7 +236,7 @@ bch2_acl_to_xattr(struct btree_trans *trans, - - outptr = (void *) acl_header + sizeof(*acl_header); - -- acl_for_each_entry(acl, acl_e) { -+ FOREACH_ACL_ENTRY(acl_e, acl, pe) { - bch_acl_entry *entry = outptr; - - entry->e_tag = cpu_to_le16(acl_e->e_tag); -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -index c84a91572a1d..fc2ef33b67b3 100644 ---- a/fs/bcachefs/alloc_background.c -+++ b/fs/bcachefs/alloc_background.c -@@ -198,7 +198,7 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) - } - - int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); - int ret = 0; -@@ -213,7 +213,7 @@ int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, - } - - int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_alloc_unpacked u; - int ret = 0; -@@ -226,7 +226,7 @@ int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, - } - - int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_alloc_unpacked u; - int ret = 0; -@@ -239,7 +239,7 @@ int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, - } - - int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bch_alloc_v4 a; - int ret = 0; -@@ -322,9 +322,9 @@ int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, - void bch2_alloc_v4_swab(struct bkey_s k) - { - struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v; -- struct bch_backpointer *bp, *bps; - -- a->journal_seq = swab64(a->journal_seq); -+ a->journal_seq_nonempty = swab64(a->journal_seq_nonempty); -+ a->journal_seq_empty = swab64(a->journal_seq_empty); - a->flags = swab32(a->flags); - a->dirty_sectors = swab32(a->dirty_sectors); - a->cached_sectors = swab32(a->cached_sectors); -@@ -333,13 +333,6 @@ void bch2_alloc_v4_swab(struct bkey_s k) - a->stripe = swab32(a->stripe); - a->nr_external_backpointers = swab32(a->nr_external_backpointers); - a->stripe_sectors = swab32(a->stripe_sectors); -- -- bps = alloc_v4_backpointers(a); -- for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) { -- bp->bucket_offset = swab40(bp->bucket_offset); -- bp->bucket_len = swab32(bp->bucket_len); -- bch2_bpos_swab(&bp->pos); -- } - } - - void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) -@@ -354,16 +347,17 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c - prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen); - bch2_prt_data_type(out, a->data_type); - prt_newline(out); -- prt_printf(out, "journal_seq %llu\n", a->journal_seq); -- prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a)); -- prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a)); -- prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors); -- prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); -- prt_printf(out, "cached_sectors %u\n", a->cached_sectors); -- prt_printf(out, "stripe %u\n", a->stripe); -- prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); -- prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]); -- prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]); -+ prt_printf(out, "journal_seq_nonempty %llu\n", a->journal_seq_nonempty); -+ prt_printf(out, "journal_seq_empty %llu\n", a->journal_seq_empty); -+ prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a)); -+ prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a)); -+ prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors); -+ prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); -+ prt_printf(out, "cached_sectors %u\n", a->cached_sectors); -+ prt_printf(out, "stripe %u\n", a->stripe); -+ prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); -+ prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]); -+ prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]); - - if (ca) - prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca)); -@@ -392,7 +386,7 @@ void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) - struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); - - *out = (struct bch_alloc_v4) { -- .journal_seq = u.journal_seq, -+ .journal_seq_nonempty = u.journal_seq, - .flags = u.need_discard, - .gen = u.gen, - .oldest_gen = u.oldest_gen, -@@ -517,7 +511,7 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) - } - - int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -@@ -664,74 +658,80 @@ int bch2_alloc_read(struct bch_fs *c) - - /* Free space/discard btree: */ - -+static int __need_discard_or_freespace_err(struct btree_trans *trans, -+ struct bkey_s_c alloc_k, -+ bool set, bool discard, bool repair) -+{ -+ struct bch_fs *c = trans->c; -+ enum bch_fsck_flags flags = FSCK_CAN_IGNORE|(repair ? FSCK_CAN_FIX : 0); -+ enum bch_sb_error_id err_id = discard -+ ? BCH_FSCK_ERR_need_discard_key_wrong -+ : BCH_FSCK_ERR_freespace_key_wrong; -+ enum btree_id btree = discard ? BTREE_ID_need_discard : BTREE_ID_freespace; -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, alloc_k); -+ -+ int ret = __bch2_fsck_err(NULL, trans, flags, err_id, -+ "bucket incorrectly %sset in %s btree\n" -+ " %s", -+ set ? "" : "un", -+ bch2_btree_id_str(btree), -+ buf.buf); -+ if (ret == -BCH_ERR_fsck_ignore || -+ ret == -BCH_ERR_fsck_errors_not_fixed) -+ ret = 0; -+ -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+#define need_discard_or_freespace_err(...) \ -+ fsck_err_wrap(__need_discard_or_freespace_err(__VA_ARGS__)) -+ -+#define need_discard_or_freespace_err_on(cond, ...) \ -+ (unlikely(cond) ? need_discard_or_freespace_err(__VA_ARGS__) : false) -+ - static int bch2_bucket_do_index(struct btree_trans *trans, - struct bch_dev *ca, - struct bkey_s_c alloc_k, - const struct bch_alloc_v4 *a, - bool set) - { -- struct bch_fs *c = trans->c; -- struct btree_iter iter; -- struct bkey_s_c old; -- struct bkey_i *k; - enum btree_id btree; -- enum bch_bkey_type old_type = !set ? KEY_TYPE_set : KEY_TYPE_deleted; -- enum bch_bkey_type new_type = set ? KEY_TYPE_set : KEY_TYPE_deleted; -- struct printbuf buf = PRINTBUF; -- int ret; -+ struct bpos pos; - - if (a->data_type != BCH_DATA_free && - a->data_type != BCH_DATA_need_discard) - return 0; - -- k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); -- if (IS_ERR(k)) -- return PTR_ERR(k); -- -- bkey_init(&k->k); -- k->k.type = new_type; -- - switch (a->data_type) { - case BCH_DATA_free: - btree = BTREE_ID_freespace; -- k->k.p = alloc_freespace_pos(alloc_k.k->p, *a); -- bch2_key_resize(&k->k, 1); -+ pos = alloc_freespace_pos(alloc_k.k->p, *a); - break; - case BCH_DATA_need_discard: - btree = BTREE_ID_need_discard; -- k->k.p = alloc_k.k->p; -+ pos = alloc_k.k->p; - break; - default: - return 0; - } - -- old = bch2_bkey_get_iter(trans, &iter, btree, -- bkey_start_pos(&k->k), -- BTREE_ITER_intent); -- ret = bkey_err(old); -+ struct btree_iter iter; -+ struct bkey_s_c old = bch2_bkey_get_iter(trans, &iter, btree, pos, BTREE_ITER_intent); -+ int ret = bkey_err(old); - if (ret) - return ret; - -- if (ca->mi.freespace_initialized && -- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info && -- bch2_trans_inconsistent_on(old.k->type != old_type, trans, -- "incorrect key when %s %s:%llu:%llu:0 (got %s should be %s)\n" -- " for %s", -- set ? "setting" : "clearing", -- bch2_btree_id_str(btree), -- iter.pos.inode, -- iter.pos.offset, -- bch2_bkey_types[old.k->type], -- bch2_bkey_types[old_type], -- (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -- ret = -EIO; -- goto err; -- } -+ need_discard_or_freespace_err_on(ca->mi.freespace_initialized && -+ !old.k->type != set, -+ trans, alloc_k, set, -+ btree == BTREE_ID_need_discard, false); - -- ret = bch2_trans_update(trans, &iter, k, 0); --err: -+ ret = bch2_btree_bit_mod_iter(trans, &iter, set); -+fsck_err: - bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); - return ret; - } - -@@ -858,7 +858,10 @@ int bch2_trigger_alloc(struct btree_trans *trans, - if (flags & BTREE_TRIGGER_transactional) { - alloc_data_type_set(new_a, new_a->data_type); - -- if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) { -+ int is_empty_delta = (int) data_type_is_empty(new_a->data_type) - -+ (int) data_type_is_empty(old_a->data_type); -+ -+ if (is_empty_delta < 0) { - new_a->io_time[READ] = bch2_current_io_time(c, READ); - new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE); - SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); -@@ -928,37 +931,55 @@ int bch2_trigger_alloc(struct btree_trans *trans, - } - - if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { -- u64 journal_seq = trans->journal_res.seq; -- u64 bucket_journal_seq = new_a->journal_seq; -+ u64 transaction_seq = trans->journal_res.seq; -+ BUG_ON(!transaction_seq); - -- if ((flags & BTREE_TRIGGER_insert) && -- data_type_is_empty(old_a->data_type) != -- data_type_is_empty(new_a->data_type) && -- new.k->type == KEY_TYPE_alloc_v4) { -- struct bch_alloc_v4 *v = bkey_s_to_alloc_v4(new).v; -+ if (log_fsck_err_on(transaction_seq && new_a->journal_seq_nonempty > transaction_seq, -+ trans, alloc_key_journal_seq_in_future, -+ "bucket journal seq in future (currently at %llu)\n%s", -+ journal_cur_seq(&c->journal), -+ (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf))) -+ new_a->journal_seq_nonempty = transaction_seq; - -- /* -- * If the btree updates referring to a bucket weren't flushed -- * before the bucket became empty again, then the we don't have -- * to wait on a journal flush before we can reuse the bucket: -- */ -- v->journal_seq = bucket_journal_seq = -- data_type_is_empty(new_a->data_type) && -- (journal_seq == v->journal_seq || -- bch2_journal_noflush_seq(&c->journal, v->journal_seq)) -- ? 0 : journal_seq; -+ int is_empty_delta = (int) data_type_is_empty(new_a->data_type) - -+ (int) data_type_is_empty(old_a->data_type); -+ -+ /* -+ * Record journal sequence number of empty -> nonempty transition: -+ * Note that there may be multiple empty -> nonempty -+ * transitions, data in a bucket may be overwritten while we're -+ * still writing to it - so be careful to only record the first: -+ * */ -+ if (is_empty_delta < 0 && -+ new_a->journal_seq_empty <= c->journal.flushed_seq_ondisk) { -+ new_a->journal_seq_nonempty = transaction_seq; -+ new_a->journal_seq_empty = 0; - } - -- if (!data_type_is_empty(old_a->data_type) && -- data_type_is_empty(new_a->data_type) && -- bucket_journal_seq) { -- ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, -- c->journal.flushed_seq_ondisk, -- new.k->p.inode, new.k->p.offset, -- bucket_journal_seq); -- if (bch2_fs_fatal_err_on(ret, c, -- "setting bucket_needs_journal_commit: %s", bch2_err_str(ret))) -- goto err; -+ /* -+ * Bucket becomes empty: mark it as waiting for a journal flush, -+ * unless updates since empty -> nonempty transition were never -+ * flushed - we may need to ask the journal not to flush -+ * intermediate sequence numbers: -+ */ -+ if (is_empty_delta > 0) { -+ if (new_a->journal_seq_nonempty == transaction_seq || -+ bch2_journal_noflush_seq(&c->journal, -+ new_a->journal_seq_nonempty, -+ transaction_seq)) { -+ new_a->journal_seq_nonempty = new_a->journal_seq_empty = 0; -+ } else { -+ new_a->journal_seq_empty = transaction_seq; -+ -+ ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, -+ c->journal.flushed_seq_ondisk, -+ new.k->p.inode, new.k->p.offset, -+ transaction_seq); -+ if (bch2_fs_fatal_err_on(ret, c, -+ "setting bucket_needs_journal_commit: %s", -+ bch2_err_str(ret))) -+ goto err; -+ } - } - - if (new_a->gen != old_a->gen) { -@@ -974,7 +995,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, - - #define eval_state(_a, expr) ({ const struct bch_alloc_v4 *a = _a; expr; }) - #define statechange(expr) !eval_state(old_a, expr) && eval_state(new_a, expr) --#define bucket_flushed(a) (!a->journal_seq || a->journal_seq <= c->journal.flushed_seq_ondisk) -+#define bucket_flushed(a) (a->journal_seq_empty <= c->journal.flushed_seq_ondisk) - - if (statechange(a->data_type == BCH_DATA_free) && - bucket_flushed(new_a)) -@@ -1006,6 +1027,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, - rcu_read_unlock(); - } - err: -+fsck_err: - printbuf_exit(&buf); - bch2_dev_put(ca); - return ret; -@@ -1045,7 +1067,7 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos - * btree node min/max is a closed interval, upto takes a half - * open interval: - */ -- k = bch2_btree_iter_peek_upto(&iter2, end); -+ k = bch2_btree_iter_peek_max(&iter2, end); - next = iter2.pos; - bch2_trans_iter_exit(iter->trans, &iter2); - -@@ -1129,7 +1151,6 @@ int bch2_check_alloc_key(struct btree_trans *trans, - struct bch_fs *c = trans->c; - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a; -- unsigned discard_key_type, freespace_key_type; - unsigned gens_offset; - struct bkey_s_c k; - struct printbuf buf = PRINTBUF; -@@ -1149,64 +1170,30 @@ int bch2_check_alloc_key(struct btree_trans *trans, - - a = bch2_alloc_to_v4(alloc_k, &a_convert); - -- discard_key_type = a->data_type == BCH_DATA_need_discard ? KEY_TYPE_set : 0; - bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p); - k = bch2_btree_iter_peek_slot(discard_iter); - ret = bkey_err(k); - if (ret) - goto err; - -- if (fsck_err_on(k.k->type != discard_key_type, -- trans, need_discard_key_wrong, -- "incorrect key in need_discard btree (got %s should be %s)\n" -- " %s", -- bch2_bkey_types[k.k->type], -- bch2_bkey_types[discard_key_type], -- (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -- struct bkey_i *update = -- bch2_trans_kmalloc(trans, sizeof(*update)); -- -- ret = PTR_ERR_OR_ZERO(update); -- if (ret) -- goto err; -- -- bkey_init(&update->k); -- update->k.type = discard_key_type; -- update->k.p = discard_iter->pos; -- -- ret = bch2_trans_update(trans, discard_iter, update, 0); -+ bool is_discarded = a->data_type == BCH_DATA_need_discard; -+ if (need_discard_or_freespace_err_on(!!k.k->type != is_discarded, -+ trans, alloc_k, !is_discarded, true, true)) { -+ ret = bch2_btree_bit_mod_iter(trans, discard_iter, is_discarded); - if (ret) - goto err; - } - -- freespace_key_type = a->data_type == BCH_DATA_free ? KEY_TYPE_set : 0; - bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); - k = bch2_btree_iter_peek_slot(freespace_iter); - ret = bkey_err(k); - if (ret) - goto err; - -- if (fsck_err_on(k.k->type != freespace_key_type, -- trans, freespace_key_wrong, -- "incorrect key in freespace btree (got %s should be %s)\n" -- " %s", -- bch2_bkey_types[k.k->type], -- bch2_bkey_types[freespace_key_type], -- (printbuf_reset(&buf), -- bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -- struct bkey_i *update = -- bch2_trans_kmalloc(trans, sizeof(*update)); -- -- ret = PTR_ERR_OR_ZERO(update); -- if (ret) -- goto err; -- -- bkey_init(&update->k); -- update->k.type = freespace_key_type; -- update->k.p = freespace_iter->pos; -- bch2_key_resize(&update->k, 1); -- -- ret = bch2_trans_update(trans, freespace_iter, update, 0); -+ bool is_free = a->data_type == BCH_DATA_free; -+ if (need_discard_or_freespace_err_on(!!k.k->type != is_free, -+ trans, alloc_k, !is_free, false, true)) { -+ ret = bch2_btree_bit_mod_iter(trans, freespace_iter, is_free); - if (ret) - goto err; - } -@@ -1368,51 +1355,88 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, - return ret; - } - --static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_trans *trans, -- struct btree_iter *iter) -+struct check_discard_freespace_key_async { -+ struct work_struct work; -+ struct bch_fs *c; -+ struct bbpos pos; -+}; -+ -+static int bch2_recheck_discard_freespace_key(struct btree_trans *trans, struct bbpos pos) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, pos.btree, pos.pos, 0); -+ int ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ u8 gen; -+ ret = k.k->type != KEY_TYPE_set -+ ? bch2_check_discard_freespace_key(trans, &iter, &gen, false) -+ : 0; -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static void check_discard_freespace_key_work(struct work_struct *work) -+{ -+ struct check_discard_freespace_key_async *w = -+ container_of(work, struct check_discard_freespace_key_async, work); -+ -+ bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos)); -+ bch2_write_ref_put(w->c, BCH_WRITE_REF_check_discard_freespace_key); -+ kfree(w); -+} -+ -+int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen, -+ bool async_repair) - { - struct bch_fs *c = trans->c; -- struct btree_iter alloc_iter; -- struct bkey_s_c alloc_k; -- struct bch_alloc_v4 a_convert; -- const struct bch_alloc_v4 *a; -- u64 genbits; -- struct bpos pos; - enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard - ? BCH_DATA_need_discard - : BCH_DATA_free; - struct printbuf buf = PRINTBUF; -- int ret; - -- pos = iter->pos; -- pos.offset &= ~(~0ULL << 56); -- genbits = iter->pos.offset & (~0ULL << 56); -+ struct bpos bucket = iter->pos; -+ bucket.offset &= ~(~0ULL << 56); -+ u64 genbits = iter->pos.offset & (~0ULL << 56); - -- alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, pos, 0); -- ret = bkey_err(alloc_k); -+ struct btree_iter alloc_iter; -+ struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, -+ BTREE_ID_alloc, bucket, -+ async_repair ? BTREE_ITER_cached : 0); -+ int ret = bkey_err(alloc_k); - if (ret) - return ret; - -- if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), -- trans, need_discard_freespace_key_to_invalid_dev_bucket, -- "entry in %s btree for nonexistant dev:bucket %llu:%llu", -- bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset)) -- goto delete; -+ if (!bch2_dev_bucket_exists(c, bucket)) { -+ if (fsck_err(trans, need_discard_freespace_key_to_invalid_dev_bucket, -+ "entry in %s btree for nonexistant dev:bucket %llu:%llu", -+ bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset)) -+ goto delete; -+ ret = 1; -+ goto out; -+ } - -- a = bch2_alloc_to_v4(alloc_k, &a_convert); -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); -+ -+ if (a->data_type != state || -+ (state == BCH_DATA_free && -+ genbits != alloc_freespace_genbits(*a))) { -+ if (fsck_err(trans, need_discard_freespace_key_bad, -+ "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", -+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), -+ bch2_btree_id_str(iter->btree_id), -+ iter->pos.inode, -+ iter->pos.offset, -+ a->data_type == state, -+ genbits >> 56, alloc_freespace_genbits(*a) >> 56)) -+ goto delete; -+ ret = 1; -+ goto out; -+ } - -- if (fsck_err_on(a->data_type != state || -- (state == BCH_DATA_free && -- genbits != alloc_freespace_genbits(*a)), -- trans, need_discard_freespace_key_bad, -- "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", -- (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), -- bch2_btree_id_str(iter->btree_id), -- iter->pos.inode, -- iter->pos.offset, -- a->data_type == state, -- genbits >> 56, alloc_freespace_genbits(*a) >> 56)) -- goto delete; -+ *gen = a->gen; - out: - fsck_err: - bch2_set_btree_iter_dontneed(&alloc_iter); -@@ -1420,11 +1444,40 @@ static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_tran - printbuf_exit(&buf); - return ret; - delete: -- ret = bch2_btree_delete_extent_at(trans, iter, -- iter->btree_id == BTREE_ID_freespace ? 1 : 0, 0) ?: -- bch2_trans_commit(trans, NULL, NULL, -- BCH_TRANS_COMMIT_no_enospc); -- goto out; -+ if (!async_repair) { -+ ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BCH_TRANS_COMMIT_no_enospc) ?: -+ -BCH_ERR_transaction_restart_commit; -+ goto out; -+ } else { -+ /* -+ * We can't repair here when called from the allocator path: the -+ * commit will recurse back into the allocator -+ */ -+ struct check_discard_freespace_key_async *w = -+ kzalloc(sizeof(*w), GFP_KERNEL); -+ if (!w) -+ goto out; -+ -+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_check_discard_freespace_key)) { -+ kfree(w); -+ goto out; -+ } -+ -+ INIT_WORK(&w->work, check_discard_freespace_key_work); -+ w->c = c; -+ w->pos = BBPOS(iter->btree_id, iter->pos); -+ queue_work(c->write_ref_wq, &w->work); -+ goto out; -+ } -+} -+ -+static int bch2_check_discard_freespace_key_fsck(struct btree_trans *trans, struct btree_iter *iter) -+{ -+ u8 gen; -+ int ret = bch2_check_discard_freespace_key(trans, iter, &gen, false); -+ return ret < 0 ? ret : 0; - } - - /* -@@ -1581,7 +1634,7 @@ int bch2_check_alloc_info(struct bch_fs *c) - ret = for_each_btree_key(trans, iter, - BTREE_ID_need_discard, POS_MIN, - BTREE_ITER_prefetch, k, -- bch2_check_discard_freespace_key(trans, &iter)); -+ bch2_check_discard_freespace_key_fsck(trans, &iter)); - if (ret) - goto err; - -@@ -1594,7 +1647,7 @@ int bch2_check_alloc_info(struct bch_fs *c) - break; - - ret = bkey_err(k) ?: -- bch2_check_discard_freespace_key(trans, &iter); -+ bch2_check_discard_freespace_key_fsck(trans, &iter); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - ret = 0; - continue; -@@ -1757,7 +1810,8 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - struct bch_dev *ca, - struct btree_iter *need_discard_iter, - struct bpos *discard_pos_done, -- struct discard_buckets_state *s) -+ struct discard_buckets_state *s, -+ bool fastpath) - { - struct bch_fs *c = trans->c; - struct bpos pos = need_discard_iter->pos; -@@ -1793,44 +1847,23 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - if (ret) - goto out; - -- if (bch2_bucket_sectors_total(a->v)) { -- if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, -- trans, "attempting to discard bucket with dirty data\n%s", -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -- ret = -EIO; -- goto out; -- } -- - if (a->v.data_type != BCH_DATA_need_discard) { -- if (data_type_is_empty(a->v.data_type) && -- BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) { -- a->v.gen++; -- SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); -- goto write; -+ if (need_discard_or_freespace_err(trans, k, true, true, true)) { -+ ret = bch2_btree_bit_mod_iter(trans, need_discard_iter, false); -+ if (ret) -+ goto out; -+ goto commit; - } - -- if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, -- trans, "bucket incorrectly set in need_discard btree\n" -- "%s", -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -- ret = -EIO; - goto out; - } - -- if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { -- if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, -- trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s", -- a->v.journal_seq, -- c->journal.flushed_seq_ondisk, -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -- ret = -EIO; -- goto out; -- } -- -- if (discard_in_flight_add(ca, iter.pos.offset, true)) -- goto out; -+ if (!fastpath) { -+ if (discard_in_flight_add(ca, iter.pos.offset, true)) -+ goto out; - -- discard_locked = true; -+ discard_locked = true; -+ } - - if (!bkey_eq(*discard_pos_done, iter.pos) && - ca->mi.discard && !c->opts.nochanges) { -@@ -1844,6 +1877,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - ca->mi.bucket_size, - GFP_KERNEL); - *discard_pos_done = iter.pos; -+ s->discarded++; - - ret = bch2_trans_relock_notrace(trans); - if (ret) -@@ -1851,22 +1885,25 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, - } - - SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); --write: - alloc_data_type_set(&a->v, a->v.data_type); - -- ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: -- bch2_trans_commit(trans, NULL, NULL, -- BCH_WATERMARK_btree| -- BCH_TRANS_COMMIT_no_enospc); -+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0); -+ if (ret) -+ goto out; -+commit: -+ ret = bch2_trans_commit(trans, NULL, NULL, -+ BCH_WATERMARK_btree| -+ BCH_TRANS_COMMIT_no_enospc); - if (ret) - goto out; - - count_event(c, bucket_discard); -- s->discarded++; - out: -+fsck_err: - if (discard_locked) - discard_in_flight_remove(ca, iter.pos.offset); -- s->seen++; -+ if (!ret) -+ s->seen++; - bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); - return ret; -@@ -1886,11 +1923,11 @@ static void bch2_do_discards_work(struct work_struct *work) - * successful commit: - */ - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, -+ for_each_btree_key_max(trans, iter, - BTREE_ID_need_discard, - POS(ca->dev_idx, 0), - POS(ca->dev_idx, U64_MAX), 0, k, -- bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s))); -+ bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false))); - - trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, - bch2_err_str(ret)); -@@ -1923,27 +1960,29 @@ void bch2_do_discards(struct bch_fs *c) - bch2_dev_do_discards(ca); - } - --static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket) -+static int bch2_do_discards_fast_one(struct btree_trans *trans, -+ struct bch_dev *ca, -+ u64 bucket, -+ struct bpos *discard_pos_done, -+ struct discard_buckets_state *s) - { -- struct btree_iter iter; -- bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, BTREE_ITER_intent); -- struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); -- int ret = bkey_err(k); -+ struct btree_iter need_discard_iter; -+ struct bkey_s_c discard_k = bch2_bkey_get_iter(trans, &need_discard_iter, -+ BTREE_ID_need_discard, POS(ca->dev_idx, bucket), 0); -+ int ret = bkey_err(discard_k); - if (ret) -- goto err; -- -- struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, k); -- ret = PTR_ERR_OR_ZERO(a); -- if (ret) -- goto err; -+ return ret; - -- BUG_ON(a->v.dirty_sectors); -- SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); -- alloc_data_type_set(&a->v, a->v.data_type); -+ if (log_fsck_err_on(discard_k.k->type != KEY_TYPE_set, -+ trans, discarding_bucket_not_in_need_discard_btree, -+ "attempting to discard bucket %u:%llu not in need_discard btree", -+ ca->dev_idx, bucket)) -+ goto out; - -- ret = bch2_trans_update(trans, &iter, &a->k_i, 0); --err: -- bch2_trans_iter_exit(trans, &iter); -+ ret = bch2_discard_one_bucket(trans, ca, &need_discard_iter, discard_pos_done, s, true); -+out: -+fsck_err: -+ bch2_trans_iter_exit(trans, &need_discard_iter); - return ret; - } - -@@ -1951,6 +1990,10 @@ static void bch2_do_discards_fast_work(struct work_struct *work) - { - struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work); - struct bch_fs *c = ca->fs; -+ struct discard_buckets_state s = {}; -+ struct bpos discard_pos_done = POS_MAX; -+ struct btree_trans *trans = bch2_trans_get(c); -+ int ret = 0; - - while (1) { - bool got_bucket = false; -@@ -1971,16 +2014,8 @@ static void bch2_do_discards_fast_work(struct work_struct *work) - if (!got_bucket) - break; - -- if (ca->mi.discard && !c->opts.nochanges) -- blkdev_issue_discard(ca->disk_sb.bdev, -- bucket_to_sector(ca, bucket), -- ca->mi.bucket_size, -- GFP_KERNEL); -- -- int ret = bch2_trans_commit_do(c, NULL, NULL, -- BCH_WATERMARK_btree| -- BCH_TRANS_COMMIT_no_enospc, -- bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket))); -+ ret = lockrestart_do(trans, -+ bch2_do_discards_fast_one(trans, ca, bucket, &discard_pos_done, &s)); - bch_err_fn(c, ret); - - discard_in_flight_remove(ca, bucket); -@@ -1989,6 +2024,9 @@ static void bch2_do_discards_fast_work(struct work_struct *work) - break; - } - -+ trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); -+ -+ bch2_trans_put(trans); - percpu_ref_put(&ca->io_ref); - bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); - } -@@ -2030,8 +2068,11 @@ static int invalidate_one_bucket(struct btree_trans *trans, - return 1; - - if (!bch2_dev_bucket_exists(c, bucket)) { -- prt_str(&buf, "lru entry points to invalid bucket"); -- goto err; -+ if (fsck_err(trans, lru_entry_to_invalid_bucket, -+ "lru key points to nonexistent device:bucket %llu:%llu", -+ bucket.inode, bucket.offset)) -+ return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); -+ goto out; - } - - if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset)) -@@ -2072,28 +2113,9 @@ static int invalidate_one_bucket(struct btree_trans *trans, - trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors); - --*nr_to_invalidate; - out: -+fsck_err: - printbuf_exit(&buf); - return ret; --err: -- prt_str(&buf, "\n lru key: "); -- bch2_bkey_val_to_text(&buf, c, lru_k); -- -- prt_str(&buf, "\n lru entry: "); -- bch2_lru_pos_to_text(&buf, lru_iter->pos); -- -- prt_str(&buf, "\n alloc key: "); -- if (!a) -- bch2_bpos_to_text(&buf, bucket); -- else -- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); -- -- bch_err(c, "%s", buf.buf); -- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus) { -- bch2_inconsistent_error(c); -- ret = -EINVAL; -- } -- -- goto out; - } - - static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter, -@@ -2101,7 +2123,7 @@ static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter - { - struct bkey_s_c k; - again: -- k = bch2_btree_iter_peek_upto(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); -+ k = bch2_btree_iter_peek_max(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); - if (!k.k && !*wrapped) { - bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0)); - *wrapped = true; -diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h -index 163a67b97a40..de25ba4ee94b 100644 ---- a/fs/bcachefs/alloc_background.h -+++ b/fs/bcachefs/alloc_background.h -@@ -8,8 +8,6 @@ - #include "debug.h" - #include "super.h" - --enum bch_validate_flags; -- - /* How out of date a pointer gen is allowed to be: */ - #define BUCKET_GC_GEN_MAX 96U - -@@ -245,10 +243,14 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s - - int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); - --int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); --int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); --int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); --int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); -+int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); -+int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); -+int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_alloc_v4_swab(struct bkey_s); - void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - -@@ -282,7 +284,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - }) - - int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - #define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \ -@@ -307,6 +309,8 @@ int bch2_alloc_key_to_dev_counters(struct btree_trans *, struct bch_dev *, - int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, - enum btree_iter_update_trigger_flags); -+ -+int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *, bool); - int bch2_check_alloc_info(struct bch_fs *); - int bch2_check_alloc_to_lru_refs(struct bch_fs *); - void bch2_dev_do_discards(struct bch_dev *); -diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h -index befdaa95c515..740238369a5a 100644 ---- a/fs/bcachefs/alloc_background_format.h -+++ b/fs/bcachefs/alloc_background_format.h -@@ -58,7 +58,7 @@ LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) - - struct bch_alloc_v4 { - struct bch_val v; -- __u64 journal_seq; -+ __u64 journal_seq_nonempty; - __u32 flags; - __u8 gen; - __u8 oldest_gen; -@@ -70,7 +70,7 @@ struct bch_alloc_v4 { - __u32 stripe; - __u32 nr_external_backpointers; - /* end of fields in original version of alloc_v4 */ -- __u64 _fragmentation_lru; /* obsolete */ -+ __u64 journal_seq_empty; - __u32 stripe_sectors; - __u32 pad; - } __packed __aligned(8); -diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c -index 372178c8d416..ecd14962ab01 100644 ---- a/fs/bcachefs/alloc_foreground.c -+++ b/fs/bcachefs/alloc_foreground.c -@@ -107,14 +107,10 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) - return; - } - -- percpu_down_read(&c->mark_lock); - spin_lock(&ob->lock); -- - ob->valid = false; - ob->data_type = 0; -- - spin_unlock(&ob->lock); -- percpu_up_read(&c->mark_lock); - - spin_lock(&c->freelist_lock); - bch2_open_bucket_hash_remove(c, ob); -@@ -156,6 +152,14 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c) - return ob; - } - -+static inline bool is_superblock_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b) -+{ -+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_trans_mark_dev_sbs) -+ return false; -+ -+ return bch2_is_superblock_bucket(ca, b); -+} -+ - static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) - { - BUG_ON(c->open_buckets_partial_nr >= -@@ -175,70 +179,46 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) - closure_wake_up(&c->freelist_wait); - } - --/* _only_ for allocating the journal on a new device: */ --long bch2_bucket_alloc_new_fs(struct bch_dev *ca) -+static inline bool may_alloc_bucket(struct bch_fs *c, -+ struct bpos bucket, -+ struct bucket_alloc_state *s) - { -- while (ca->new_fs_bucket_idx < ca->mi.nbuckets) { -- u64 b = ca->new_fs_bucket_idx++; -- -- if (!is_superblock_bucket(ca, b) && -- (!ca->buckets_nouse || !test_bit(b, ca->buckets_nouse))) -- return b; -+ if (bch2_bucket_is_open(c, bucket.inode, bucket.offset)) { -+ s->skipped_open++; -+ return false; - } - -- return -1; --} -+ if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, -+ c->journal.flushed_seq_ondisk, bucket.inode, bucket.offset)) { -+ s->skipped_need_journal_commit++; -+ return false; -+ } - --static inline unsigned open_buckets_reserved(enum bch_watermark watermark) --{ -- switch (watermark) { -- case BCH_WATERMARK_interior_updates: -- return 0; -- case BCH_WATERMARK_reclaim: -- return OPEN_BUCKETS_COUNT / 6; -- case BCH_WATERMARK_btree: -- case BCH_WATERMARK_btree_copygc: -- return OPEN_BUCKETS_COUNT / 4; -- case BCH_WATERMARK_copygc: -- return OPEN_BUCKETS_COUNT / 3; -- default: -- return OPEN_BUCKETS_COUNT / 2; -+ if (bch2_bucket_nocow_is_locked(&c->nocow_locks, bucket)) { -+ s->skipped_nocow++; -+ return false; - } -+ -+ return true; - } - - static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, -- u64 bucket, -+ u64 bucket, u8 gen, - enum bch_watermark watermark, -- const struct bch_alloc_v4 *a, - struct bucket_alloc_state *s, - struct closure *cl) - { -- struct open_bucket *ob; -+ if (unlikely(is_superblock_bucket(c, ca, bucket))) -+ return NULL; - - if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) { - s->skipped_nouse++; - return NULL; - } - -- if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) { -- s->skipped_open++; -- return NULL; -- } -- -- if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, -- c->journal.flushed_seq_ondisk, ca->dev_idx, bucket)) { -- s->skipped_need_journal_commit++; -- return NULL; -- } -- -- if (bch2_bucket_nocow_is_locked(&c->nocow_locks, POS(ca->dev_idx, bucket))) { -- s->skipped_nocow++; -- return NULL; -- } -- - spin_lock(&c->freelist_lock); - -- if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(watermark))) { -+ if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) { - if (cl) - closure_wait(&c->open_buckets_wait, cl); - -@@ -254,14 +234,13 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * - return NULL; - } - -- ob = bch2_open_bucket_alloc(c); -+ struct open_bucket *ob = bch2_open_bucket_alloc(c); - - spin_lock(&ob->lock); -- - ob->valid = true; - ob->sectors_free = ca->mi.bucket_size; - ob->dev = ca->dev_idx; -- ob->gen = a->gen; -+ ob->gen = gen; - ob->bucket = bucket; - spin_unlock(&ob->lock); - -@@ -276,111 +255,29 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * - } - - static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca, -- enum bch_watermark watermark, u64 free_entry, -+ enum bch_watermark watermark, - struct bucket_alloc_state *s, -- struct bkey_s_c freespace_k, -+ struct btree_iter *freespace_iter, - struct closure *cl) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter = { NULL }; -- struct bkey_s_c k; -- struct open_bucket *ob; -- struct bch_alloc_v4 a_convert; -- const struct bch_alloc_v4 *a; -- u64 b = free_entry & ~(~0ULL << 56); -- unsigned genbits = free_entry >> 56; -- struct printbuf buf = PRINTBUF; -- int ret; -- -- if (b < ca->mi.first_bucket || b >= ca->mi.nbuckets) { -- prt_printf(&buf, "freespace btree has bucket outside allowed range %u-%llu\n" -- " freespace key ", -- ca->mi.first_bucket, ca->mi.nbuckets); -- bch2_bkey_val_to_text(&buf, c, freespace_k); -- bch2_trans_inconsistent(trans, "%s", buf.buf); -- ob = ERR_PTR(-EIO); -- goto err; -- } -- -- k = bch2_bkey_get_iter(trans, &iter, -- BTREE_ID_alloc, POS(ca->dev_idx, b), -- BTREE_ITER_cached); -- ret = bkey_err(k); -- if (ret) { -- ob = ERR_PTR(ret); -- goto err; -- } -- -- a = bch2_alloc_to_v4(k, &a_convert); -- -- if (a->data_type != BCH_DATA_free) { -- if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) { -- ob = NULL; -- goto err; -- } -- -- prt_printf(&buf, "non free bucket in freespace btree\n" -- " freespace key "); -- bch2_bkey_val_to_text(&buf, c, freespace_k); -- prt_printf(&buf, "\n "); -- bch2_bkey_val_to_text(&buf, c, k); -- bch2_trans_inconsistent(trans, "%s", buf.buf); -- ob = ERR_PTR(-EIO); -- goto err; -- } -- -- if (genbits != (alloc_freespace_genbits(*a) >> 56) && -- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { -- prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" -- " freespace key ", -- genbits, alloc_freespace_genbits(*a) >> 56); -- bch2_bkey_val_to_text(&buf, c, freespace_k); -- prt_printf(&buf, "\n "); -- bch2_bkey_val_to_text(&buf, c, k); -- bch2_trans_inconsistent(trans, "%s", buf.buf); -- ob = ERR_PTR(-EIO); -- goto err; -- } -- -- if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers) { -- struct bch_backpointer bp; -- struct bpos bp_pos = POS_MIN; -+ u64 b = freespace_iter->pos.offset & ~(~0ULL << 56); - -- ret = bch2_get_next_backpointer(trans, ca, POS(ca->dev_idx, b), -1, -- &bp_pos, &bp, -- BTREE_ITER_nopreserve); -- if (ret) { -- ob = ERR_PTR(ret); -- goto err; -- } -+ if (!may_alloc_bucket(c, POS(ca->dev_idx, b), s)) -+ return NULL; - -- if (!bkey_eq(bp_pos, POS_MAX)) { -- /* -- * Bucket may have data in it - we don't call -- * bc2h_trans_inconnsistent() because fsck hasn't -- * finished yet -- */ -- ob = NULL; -- goto err; -- } -- } -+ u8 gen; -+ int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen, true); -+ if (ret < 0) -+ return ERR_PTR(ret); -+ if (ret) -+ return NULL; - -- ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl); -- if (!ob) -- bch2_set_btree_iter_dontneed(&iter); --err: -- if (iter.path) -- bch2_set_btree_iter_dontneed(&iter); -- bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); -- return ob; -+ return __try_alloc_bucket(c, ca, b, gen, watermark, s, cl); - } - - /* - * This path is for before the freespace btree is initialized: -- * -- * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock & -- * journal buckets - journal buckets will be < ca->new_fs_bucket_idx - */ - static noinline struct open_bucket * - bch2_bucket_alloc_early(struct btree_trans *trans, -@@ -389,10 +286,11 @@ bch2_bucket_alloc_early(struct btree_trans *trans, - struct bucket_alloc_state *s, - struct closure *cl) - { -+ struct bch_fs *c = trans->c; - struct btree_iter iter, citer; - struct bkey_s_c k, ck; - struct open_bucket *ob = NULL; -- u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx); -+ u64 first_bucket = ca->mi.first_bucket; - u64 *dev_alloc_cursor = &ca->alloc_cursor[s->btree_bitmap]; - u64 alloc_start = max(first_bucket, *dev_alloc_cursor); - u64 alloc_cursor = alloc_start; -@@ -415,10 +313,6 @@ bch2_bucket_alloc_early(struct btree_trans *trans, - if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets))) - break; - -- if (ca->new_fs_bucket_idx && -- is_superblock_bucket(ca, k.k->p.offset)) -- continue; -- - if (s->btree_bitmap != BTREE_BITMAP_ANY && - s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca, - bucket_to_sector(ca, bucket), ca->mi.bucket_size)) { -@@ -452,7 +346,10 @@ bch2_bucket_alloc_early(struct btree_trans *trans, - - s->buckets_seen++; - -- ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl); -+ ob = may_alloc_bucket(c, k.k->p, s) -+ ? __try_alloc_bucket(c, ca, k.k->p.offset, a->gen, -+ watermark, s, cl) -+ : NULL; - next: - bch2_set_btree_iter_dontneed(&citer); - bch2_trans_iter_exit(trans, &citer); -@@ -489,20 +386,21 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, - u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(*dev_alloc_cursor)); - u64 alloc_cursor = alloc_start; - int ret; -- -- BUG_ON(ca->new_fs_bucket_idx); - again: -- for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace, -- POS(ca->dev_idx, alloc_cursor), 0, k, ret) { -- if (k.k->p.inode != ca->dev_idx) -- break; -+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_freespace, -+ POS(ca->dev_idx, alloc_cursor), -+ POS(ca->dev_idx, U64_MAX), -+ 0, k, ret) { -+ /* -+ * peek normally dosen't trim extents - they can span iter.pos, -+ * which is not what we want here: -+ */ -+ iter.k.size = iter.k.p.offset - iter.pos.offset; - -- for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k)); -- alloc_cursor < k.k->p.offset; -- alloc_cursor++) { -+ while (iter.k.size) { - s->buckets_seen++; - -- u64 bucket = alloc_cursor & ~(~0ULL << 56); -+ u64 bucket = iter.pos.offset & ~(~0ULL << 56); - if (s->btree_bitmap != BTREE_BITMAP_ANY && - s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca, - bucket_to_sector(ca, bucket), ca->mi.bucket_size)) { -@@ -511,32 +409,36 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, - goto fail; - - bucket = sector_to_bucket(ca, -- round_up(bucket_to_sector(ca, bucket) + 1, -+ round_up(bucket_to_sector(ca, bucket + 1), - 1ULL << ca->mi.btree_bitmap_shift)); -- u64 genbits = alloc_cursor >> 56; -- alloc_cursor = bucket | (genbits << 56); -+ alloc_cursor = bucket|(iter.pos.offset & (~0ULL << 56)); - -- if (alloc_cursor > k.k->p.offset) -- bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor)); -+ bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor)); - s->skipped_mi_btree_bitmap++; -- continue; -+ goto next; - } - -- ob = try_alloc_bucket(trans, ca, watermark, -- alloc_cursor, s, k, cl); -+ ob = try_alloc_bucket(trans, ca, watermark, s, &iter, cl); - if (ob) { -+ if (!IS_ERR(ob)) -+ *dev_alloc_cursor = iter.pos.offset; - bch2_set_btree_iter_dontneed(&iter); - break; - } -- } - -+ iter.k.size--; -+ iter.pos.offset++; -+ } -+next: - if (ob || ret) - break; - } - fail: - bch2_trans_iter_exit(trans, &iter); - -- if (!ob && ret) -+ BUG_ON(ob && ret); -+ -+ if (ret) - ob = ERR_PTR(ret); - - if (!ob && alloc_start > ca->mi.first_bucket) { -@@ -544,8 +446,6 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, - goto again; - } - -- *dev_alloc_cursor = alloc_cursor; -- - return ob; - } - -@@ -595,6 +495,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca, - * @watermark: how important is this allocation? - * @data_type: BCH_DATA_journal, btree, user... - * @cl: if not NULL, closure to be used to wait if buckets not available -+ * @nowait: if true, do not wait for buckets to become available - * @usage: for secondarily also returning the current device usage - * - * Returns: an open_bucket on success, or an ERR_PTR() on failure. -@@ -629,6 +530,10 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, - bch2_dev_do_invalidates(ca); - - if (!avail) { -+ if (watermark > BCH_WATERMARK_normal && -+ c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) -+ goto alloc; -+ - if (cl && !waiting) { - closure_wait(&c->freelist_wait, cl); - waiting = true; -@@ -711,9 +616,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c, - unsigned i; - - for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX) -- ret.devs[ret.nr++] = i; -+ ret.data[ret.nr++] = i; - -- bubble_sort(ret.devs, ret.nr, dev_stripe_cmp); -+ bubble_sort(ret.data, ret.nr, dev_stripe_cmp); - return ret; - } - -@@ -785,18 +690,13 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, - struct closure *cl) - { - struct bch_fs *c = trans->c; -- struct dev_alloc_list devs_sorted = -- bch2_dev_alloc_list(c, stripe, devs_may_alloc); - int ret = -BCH_ERR_insufficient_devices; - - BUG_ON(*nr_effective >= nr_replicas); - -- for (unsigned i = 0; i < devs_sorted.nr; i++) { -- struct bch_dev_usage usage; -- struct open_bucket *ob; -- -- unsigned dev = devs_sorted.devs[i]; -- struct bch_dev *ca = bch2_dev_tryget_noerror(c, dev); -+ struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, devs_may_alloc); -+ darray_for_each(devs_sorted, i) { -+ struct bch_dev *ca = bch2_dev_tryget_noerror(c, *i); - if (!ca) - continue; - -@@ -805,8 +705,9 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, - continue; - } - -- ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type, -- cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage); -+ struct bch_dev_usage usage; -+ struct open_bucket *ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type, -+ cl, flags & BCH_WRITE_alloc_nowait, &usage); - if (!IS_ERR(ob)) - bch2_dev_stripe_increment_inlined(ca, stripe, &usage); - bch2_dev_put(ca); -@@ -850,10 +751,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, - struct closure *cl) - { - struct bch_fs *c = trans->c; -- struct dev_alloc_list devs_sorted; -- struct ec_stripe_head *h; -- struct open_bucket *ob; -- unsigned i, ec_idx; - int ret = 0; - - if (nr_replicas < 2) -@@ -862,34 +759,32 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, - if (ec_open_bucket(c, ptrs)) - return 0; - -- h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl); -+ struct ec_stripe_head *h = -+ bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl); - if (IS_ERR(h)) - return PTR_ERR(h); - if (!h) - return 0; - -- devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); -- -- for (i = 0; i < devs_sorted.nr; i++) -- for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) { -+ struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); -+ darray_for_each(devs_sorted, i) -+ for (unsigned ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) { - if (!h->s->blocks[ec_idx]) - continue; - -- ob = c->open_buckets + h->s->blocks[ec_idx]; -- if (ob->dev == devs_sorted.devs[i] && -- !test_and_set_bit(ec_idx, h->s->blocks_allocated)) -- goto got_bucket; -+ struct open_bucket *ob = c->open_buckets + h->s->blocks[ec_idx]; -+ if (ob->dev == *i && !test_and_set_bit(ec_idx, h->s->blocks_allocated)) { -+ ob->ec_idx = ec_idx; -+ ob->ec = h->s; -+ ec_stripe_new_get(h->s, STRIPE_REF_io); -+ -+ ret = add_new_bucket(c, ptrs, devs_may_alloc, -+ nr_replicas, nr_effective, -+ have_cache, ob); -+ goto out; -+ } - } -- goto out_put_head; --got_bucket: -- ob->ec_idx = ec_idx; -- ob->ec = h->s; -- ec_stripe_new_get(h->s, STRIPE_REF_io); -- -- ret = add_new_bucket(c, ptrs, devs_may_alloc, -- nr_replicas, nr_effective, -- have_cache, ob); --out_put_head: -+out: - bch2_ec_stripe_head_put(c, h); - return ret; - } -@@ -1420,7 +1315,7 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, - if (wp->data_type != BCH_DATA_user) - have_cache = true; - -- if (target && !(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) { -+ if (target && !(flags & BCH_WRITE_only_specified_devs)) { - ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, - target, erasure_code, - nr_replicas, &nr_effective, -@@ -1510,7 +1405,7 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, - if (cl && bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) - ret = -BCH_ERR_bucket_alloc_blocked; - -- if (cl && !(flags & BCH_WRITE_ALLOC_NOWAIT) && -+ if (cl && !(flags & BCH_WRITE_alloc_nowait) && - bch2_err_matches(ret, BCH_ERR_freelist_empty)) - ret = -BCH_ERR_bucket_alloc_blocked; - -diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h -index 1a16fd5bd4f8..baf5dc163c8a 100644 ---- a/fs/bcachefs/alloc_foreground.h -+++ b/fs/bcachefs/alloc_foreground.h -@@ -20,7 +20,7 @@ void bch2_reset_alloc_cursors(struct bch_fs *); - - struct dev_alloc_list { - unsigned nr; -- u8 devs[BCH_SB_MEMBERS_MAX]; -+ u8 data[BCH_SB_MEMBERS_MAX]; - }; - - struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *, -@@ -28,13 +28,28 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *, - struct bch_devs_mask *); - void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *); - --long bch2_bucket_alloc_new_fs(struct bch_dev *); -- - static inline struct bch_dev *ob_dev(struct bch_fs *c, struct open_bucket *ob) - { - return bch2_dev_have_ref(c, ob->dev); - } - -+static inline unsigned bch2_open_buckets_reserved(enum bch_watermark watermark) -+{ -+ switch (watermark) { -+ case BCH_WATERMARK_interior_updates: -+ return 0; -+ case BCH_WATERMARK_reclaim: -+ return OPEN_BUCKETS_COUNT / 6; -+ case BCH_WATERMARK_btree: -+ case BCH_WATERMARK_btree_copygc: -+ return OPEN_BUCKETS_COUNT / 4; -+ case BCH_WATERMARK_copygc: -+ return OPEN_BUCKETS_COUNT / 3; -+ default: -+ return OPEN_BUCKETS_COUNT / 2; -+ } -+} -+ - struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *, - enum bch_watermark, enum bch_data_type, - struct closure *); -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -index 654a58132a4d..ebeb6a5ff9d2 100644 ---- a/fs/bcachefs/backpointers.c -+++ b/fs/bcachefs/backpointers.c -@@ -14,42 +14,8 @@ - - #include - --static bool extent_matches_bp(struct bch_fs *c, -- enum btree_id btree_id, unsigned level, -- struct bkey_s_c k, -- struct bpos bucket, -- struct bch_backpointer bp) --{ -- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- const union bch_extent_entry *entry; -- struct extent_ptr_decoded p; -- -- rcu_read_lock(); -- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -- struct bpos bucket2; -- struct bch_backpointer bp2; -- -- if (p.ptr.cached) -- continue; -- -- struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); -- if (!ca) -- continue; -- -- bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); -- if (bpos_eq(bucket, bucket2) && -- !memcmp(&bp, &bp2, sizeof(bp))) { -- rcu_read_unlock(); -- return true; -- } -- } -- rcu_read_unlock(); -- -- return false; --} -- - int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); - int ret = 0; -@@ -59,67 +25,70 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, - "backpointer level bad: %u >= %u", - bp.v->level, BTREE_MAX_DEPTH); - -- rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); -- if (!ca) { -- /* these will be caught by fsck */ -- rcu_read_unlock(); -- return 0; -- } -- -- struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p); -- struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset); -- rcu_read_unlock(); -- -- bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || -- !bpos_eq(bp.k->p, bp_pos), -- c, backpointer_bucket_offset_wrong, -- "backpointer bucket_offset wrong"); -+ bkey_fsck_err_on(bp.k->p.inode == BCH_SB_MEMBER_INVALID, -+ c, backpointer_dev_bad, -+ "backpointer for BCH_SB_MEMBER_INVALID"); - fsck_err: - return ret; - } - --void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) -+void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) - { -- prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", -- bch2_btree_id_str(bp->btree_id), -- bp->level, -- (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), -- (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), -- bp->bucket_len); -- bch2_bpos_to_text(out, bp->pos); --} -+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); - --void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) --{ - rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.k->p.inode); -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); - if (ca) { -- struct bpos bucket = bp_pos_to_bucket(ca, k.k->p); -+ u32 bucket_offset; -+ struct bpos bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset); - rcu_read_unlock(); -- prt_str(out, "bucket="); -- bch2_bpos_to_text(out, bucket); -- prt_str(out, " "); -+ prt_printf(out, "bucket=%llu:%llu:%u ", bucket.inode, bucket.offset, bucket_offset); - } else { - rcu_read_unlock(); -+ prt_printf(out, "sector=%llu:%llu ", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT); - } - -- bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); -+ bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level); -+ prt_printf(out, " suboffset=%u len=%u gen=%u pos=", -+ (u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), -+ bp.v->bucket_len, -+ bp.v->bucket_gen); -+ bch2_bpos_to_text(out, bp.v->pos); - } - - void bch2_backpointer_swab(struct bkey_s k) - { - struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); - -- bp.v->bucket_offset = swab40(bp.v->bucket_offset); - bp.v->bucket_len = swab32(bp.v->bucket_len); - bch2_bpos_swab(&bp.v->pos); - } - -+static bool extent_matches_bp(struct bch_fs *c, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, -+ struct bkey_s_c_backpointer bp) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ struct bkey_i_backpointer bp2; -+ bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp2); -+ -+ if (bpos_eq(bp.k->p, bp2.k.p) && -+ !memcmp(bp.v, &bp2.v, sizeof(bp2.v))) -+ return true; -+ } -+ -+ return false; -+} -+ - static noinline int backpointer_mod_err(struct btree_trans *trans, -- struct bch_backpointer bp, -- struct bkey_s_c bp_k, - struct bkey_s_c orig_k, -+ struct bkey_i_backpointer *new_bp, -+ struct bkey_s_c found_bp, - bool insert) - { - struct bch_fs *c = trans->c; -@@ -127,12 +96,12 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, - - if (insert) { - prt_printf(&buf, "existing backpointer found when inserting "); -- bch2_backpointer_to_text(&buf, &bp); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); - prt_newline(&buf); - printbuf_indent_add(&buf, 2); - - prt_printf(&buf, "found "); -- bch2_bkey_val_to_text(&buf, c, bp_k); -+ bch2_bkey_val_to_text(&buf, c, found_bp); - prt_newline(&buf); - - prt_printf(&buf, "for "); -@@ -144,11 +113,11 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, - printbuf_indent_add(&buf, 2); - - prt_printf(&buf, "searching for "); -- bch2_backpointer_to_text(&buf, &bp); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); - prt_newline(&buf); - - prt_printf(&buf, "got "); -- bch2_bkey_val_to_text(&buf, c, bp_k); -+ bch2_bkey_val_to_text(&buf, c, found_bp); - prt_newline(&buf); - - prt_printf(&buf, "for "); -@@ -167,161 +136,118 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, - } - - int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, -- struct bch_dev *ca, -- struct bpos bucket, -- struct bch_backpointer bp, - struct bkey_s_c orig_k, -+ struct bkey_i_backpointer *bp, - bool insert) - { - struct btree_iter bp_iter; -- struct bkey_s_c k; -- struct bkey_i_backpointer *bp_k; -- int ret; -- -- bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); -- ret = PTR_ERR_OR_ZERO(bp_k); -- if (ret) -- return ret; -- -- bkey_backpointer_init(&bp_k->k_i); -- bp_k->k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); -- bp_k->v = bp; -- -- if (!insert) { -- bp_k->k.type = KEY_TYPE_deleted; -- set_bkey_val_u64s(&bp_k->k, 0); -- } -- -- k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, -- bp_k->k.p, -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, -+ bp->k.p, - BTREE_ITER_intent| - BTREE_ITER_slots| - BTREE_ITER_with_updates); -- ret = bkey_err(k); -+ int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - - if (insert - ? k.k->type - : (k.k->type != KEY_TYPE_backpointer || -- memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) { -- ret = backpointer_mod_err(trans, bp, k, orig_k, insert); -+ memcmp(bkey_s_c_to_backpointer(k).v, &bp->v, sizeof(bp->v)))) { -+ ret = backpointer_mod_err(trans, orig_k, bp, k, insert); - if (ret) - goto err; - } - -- ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); -+ if (!insert) { -+ bp->k.type = KEY_TYPE_deleted; -+ set_bkey_val_u64s(&bp->k, 0); -+ } -+ -+ ret = bch2_trans_update(trans, &bp_iter, &bp->k_i, 0); - err: - bch2_trans_iter_exit(trans, &bp_iter); - return ret; - } - --/* -- * Find the next backpointer >= *bp_offset: -- */ --int bch2_get_next_backpointer(struct btree_trans *trans, -- struct bch_dev *ca, -- struct bpos bucket, int gen, -- struct bpos *bp_pos, -- struct bch_backpointer *bp, -- unsigned iter_flags) -+static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos) - { -- struct bpos bp_end_pos = bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0); -- struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL }; -- struct bkey_s_c k; -- int ret = 0; -- -- if (bpos_ge(*bp_pos, bp_end_pos)) -- goto done; -- -- if (gen >= 0) { -- k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, -- bucket, BTREE_ITER_cached|iter_flags); -- ret = bkey_err(k); -- if (ret) -- goto out; -- -- if (k.k->type != KEY_TYPE_alloc_v4 || -- bkey_s_c_to_alloc_v4(k).v->gen != gen) -- goto done; -- } -- -- *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(ca, bucket, 0)); -- -- for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, -- *bp_pos, iter_flags, k, ret) { -- if (bpos_ge(k.k->p, bp_end_pos)) -- break; -+ return (likely(!bch2_backpointers_no_use_write_buffer) -+ ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos) -+ : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0)) ?: -+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); -+} - -- *bp_pos = k.k->p; -- *bp = *bkey_s_c_to_backpointer(k).v; -- goto out; -- } --done: -- *bp_pos = SPOS_MAX; --out: -- bch2_trans_iter_exit(trans, &bp_iter); -- bch2_trans_iter_exit(trans, &alloc_iter); -- return ret; -+static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans, -+ struct bkey_s_c visiting_k, -+ struct bkey_buf *last_flushed) -+{ -+ return likely(!bch2_backpointers_no_use_write_buffer) -+ ? bch2_btree_write_buffer_maybe_flush(trans, visiting_k, last_flushed) -+ : 0; - } - --static void backpointer_not_found(struct btree_trans *trans, -- struct bpos bp_pos, -- struct bch_backpointer bp, -- struct bkey_s_c k) -+static int backpointer_target_not_found(struct btree_trans *trans, -+ struct bkey_s_c_backpointer bp, -+ struct bkey_s_c target_k, -+ struct bkey_buf *last_flushed) - { - struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; -+ int ret = 0; - - /* - * If we're using the btree write buffer, the backpointer we were - * looking at may have already been deleted - failure to find what it - * pointed to is not an error: - */ -- if (likely(!bch2_backpointers_no_use_write_buffer)) -- return; -- -- struct bpos bucket; -- if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) -- return; -+ ret = last_flushed -+ ? bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed) -+ : 0; -+ if (ret) -+ return ret; - - prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", -- bp.level ? "btree node" : "extent"); -- prt_printf(&buf, "bucket: "); -- bch2_bpos_to_text(&buf, bucket); -- prt_printf(&buf, "\n "); -+ bp.v->level ? "btree node" : "extent"); -+ bch2_bkey_val_to_text(&buf, c, bp.s_c); - -- prt_printf(&buf, "backpointer pos: "); -- bch2_bpos_to_text(&buf, bp_pos); - prt_printf(&buf, "\n "); -+ bch2_bkey_val_to_text(&buf, c, target_k); - -- bch2_backpointer_to_text(&buf, &bp); -- prt_printf(&buf, "\n "); -- bch2_bkey_val_to_text(&buf, c, k); -- if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) -- bch_err_ratelimited(c, "%s", buf.buf); -- else -- bch2_trans_inconsistent(trans, "%s", buf.buf); -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry) -+ if (p.ptr.dev == bp.k->p.inode) { -+ prt_printf(&buf, "\n "); -+ struct bkey_i_backpointer bp2; -+ bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i)); -+ } - -+ if (fsck_err(trans, backpointer_to_missing_ptr, -+ "%s", buf.buf)) -+ ret = bch2_backpointer_del(trans, bp.k->p); -+fsck_err: - printbuf_exit(&buf); -+ return ret; - } - - struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, -+ struct bkey_s_c_backpointer bp, - struct btree_iter *iter, -- struct bpos bp_pos, -- struct bch_backpointer bp, -- unsigned iter_flags) -+ unsigned iter_flags, -+ struct bkey_buf *last_flushed) - { -- if (likely(!bp.level)) { -- struct bch_fs *c = trans->c; -+ struct bch_fs *c = trans->c; - -- struct bpos bucket; -- if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) -- return bkey_s_c_err(-EIO); -+ if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c))) -+ return bkey_s_c_null; - -+ if (likely(!bp.v->level)) { - bch2_trans_node_iter_init(trans, iter, -- bp.btree_id, -- bp.pos, -+ bp.v->btree_id, -+ bp.v->pos, - 0, 0, - iter_flags); - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); -@@ -330,67 +256,64 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, - return k; - } - -- if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) -+ if (k.k && -+ extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) - return k; - - bch2_trans_iter_exit(trans, iter); -- backpointer_not_found(trans, bp_pos, bp, k); -- return bkey_s_c_null; -+ int ret = backpointer_target_not_found(trans, bp, k, last_flushed); -+ return ret ? bkey_s_c_err(ret) : bkey_s_c_null; - } else { -- struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); -+ struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed); -+ if (IS_ERR_OR_NULL(b)) -+ return ((struct bkey_s_c) { .k = ERR_CAST(b) }); - -- if (IS_ERR_OR_NULL(b)) { -- bch2_trans_iter_exit(trans, iter); -- return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; -- } - return bkey_i_to_s_c(&b->key); - } - } - - struct btree *bch2_backpointer_get_node(struct btree_trans *trans, -+ struct bkey_s_c_backpointer bp, - struct btree_iter *iter, -- struct bpos bp_pos, -- struct bch_backpointer bp) -+ struct bkey_buf *last_flushed) - { - struct bch_fs *c = trans->c; - -- BUG_ON(!bp.level); -- -- struct bpos bucket; -- if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) -- return ERR_PTR(-EIO); -+ BUG_ON(!bp.v->level); - - bch2_trans_node_iter_init(trans, iter, -- bp.btree_id, -- bp.pos, -+ bp.v->btree_id, -+ bp.v->pos, - 0, -- bp.level - 1, -+ bp.v->level - 1, - 0); - struct btree *b = bch2_btree_iter_peek_node(iter); - if (IS_ERR_OR_NULL(b)) - goto err; - -- BUG_ON(b->c.level != bp.level - 1); -+ BUG_ON(b->c.level != bp.v->level - 1); - -- if (extent_matches_bp(c, bp.btree_id, bp.level, -- bkey_i_to_s_c(&b->key), -- bucket, bp)) -+ if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, -+ bkey_i_to_s_c(&b->key), bp)) - return b; - - if (btree_node_will_make_reachable(b)) { - b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); - } else { -- backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); -- b = NULL; -+ int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed); -+ b = ret ? ERR_PTR(ret) : NULL; - } - err: - bch2_trans_iter_exit(trans, iter); - return b; - } - --static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, -- struct bkey_s_c k) -+static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k, -+ struct bkey_buf *last_flushed) - { -+ if (k.k->type != KEY_TYPE_backpointer) -+ return 0; -+ - struct bch_fs *c = trans->c; - struct btree_iter alloc_iter = { NULL }; - struct bkey_s_c alloc_k; -@@ -399,10 +322,14 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ - - struct bpos bucket; - if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { -+ ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); -+ if (ret) -+ goto out; -+ - if (fsck_err(trans, backpointer_to_missing_device, - "backpointer for missing device:\n%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -- ret = bch2_btree_delete_at(trans, bp_iter, 0); -+ ret = bch2_backpointer_del(trans, k.k->p); - goto out; - } - -@@ -411,13 +338,16 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ - if (ret) - goto out; - -- if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, -- trans, backpointer_to_missing_alloc, -- "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", -- alloc_iter.pos.inode, alloc_iter.pos.offset, -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -- ret = bch2_btree_delete_at(trans, bp_iter, 0); -- goto out; -+ if (alloc_k.k->type != KEY_TYPE_alloc_v4) { -+ ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); -+ if (ret) -+ goto out; -+ -+ if (fsck_err(trans, backpointer_to_missing_alloc, -+ "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", -+ alloc_iter.pos.inode, alloc_iter.pos.offset, -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -+ ret = bch2_backpointer_del(trans, k.k->p); - } - out: - fsck_err: -@@ -429,18 +359,24 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ - /* verify that every backpointer has a corresponding alloc key */ - int bch2_check_btree_backpointers(struct bch_fs *c) - { -+ struct bkey_buf last_flushed; -+ bch2_bkey_buf_init(&last_flushed); -+ bkey_init(&last_flushed.k->k); -+ - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, - BTREE_ID_backpointers, POS_MIN, 0, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_check_btree_backpointer(trans, &iter, k))); -+ bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed))); -+ -+ bch2_bkey_buf_exit(&last_flushed, c); - bch_err_fn(c, ret); - return ret; - } - - struct extents_to_bp_state { -- struct bpos bucket_start; -- struct bpos bucket_end; -+ struct bpos bp_start; -+ struct bpos bp_end; - struct bkey_buf last_flushed; - }; - -@@ -501,9 +437,13 @@ static int check_extent_checksum(struct btree_trans *trans, - goto err; - - prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); -- prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); -+ prt_printf(&buf, "\n "); -+ bch2_btree_id_to_text(&buf, btree); -+ prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, extent); -- prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); -+ prt_printf(&buf, "\n "); -+ bch2_btree_id_to_text(&buf, o_btree); -+ prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, extent2); - - struct nonce nonce = extent_nonce(extent.k->bversion, p.crc); -@@ -524,41 +464,25 @@ static int check_extent_checksum(struct btree_trans *trans, - - static int check_bp_exists(struct btree_trans *trans, - struct extents_to_bp_state *s, -- struct bpos bucket, -- struct bch_backpointer bp, -+ struct bkey_i_backpointer *bp, - struct bkey_s_c orig_k) - { - struct bch_fs *c = trans->c; -- struct btree_iter bp_iter = {}; - struct btree_iter other_extent_iter = {}; - struct printbuf buf = PRINTBUF; -- struct bkey_s_c bp_k; -- int ret = 0; - -- struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); -- if (!ca) { -- prt_str(&buf, "extent for nonexistent device:bucket "); -- bch2_bpos_to_text(&buf, bucket); -- prt_str(&buf, "\n "); -- bch2_bkey_val_to_text(&buf, c, orig_k); -- bch_err(c, "%s", buf.buf); -- ret = -BCH_ERR_fsck_repair_unimplemented; -- goto err; -- } -- -- if (bpos_lt(bucket, s->bucket_start) || -- bpos_gt(bucket, s->bucket_end)) -- goto out; -+ if (bpos_lt(bp->k.p, s->bp_start) || -+ bpos_gt(bp->k.p, s->bp_end)) -+ return 0; - -- bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, -- bucket_pos_to_bp(ca, bucket, bp.bucket_offset), -- 0); -- ret = bkey_err(bp_k); -+ struct btree_iter bp_iter; -+ struct bkey_s_c bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp->k.p, 0); -+ int ret = bkey_err(bp_k); - if (ret) - goto err; - - if (bp_k.k->type != KEY_TYPE_backpointer || -- memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { -+ memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp->v, sizeof(bp->v))) { - ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); - if (ret) - goto err; -@@ -570,7 +494,6 @@ static int check_bp_exists(struct btree_trans *trans, - fsck_err: - bch2_trans_iter_exit(trans, &other_extent_iter); - bch2_trans_iter_exit(trans, &bp_iter); -- bch2_dev_put(ca); - printbuf_exit(&buf); - return ret; - check_existing_bp: -@@ -578,10 +501,10 @@ static int check_bp_exists(struct btree_trans *trans, - if (bp_k.k->type != KEY_TYPE_backpointer) - goto missing; - -- struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v; -+ struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k); - - struct bkey_s_c other_extent = -- bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0); -+ bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL); - ret = bkey_err(other_extent); - if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) - ret = 0; -@@ -600,19 +523,23 @@ static int check_bp_exists(struct btree_trans *trans, - bch_err(c, "%s", buf.buf); - - if (other_extent.k->size <= orig_k.k->size) { -- ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode); -+ ret = drop_dev_and_update(trans, other_bp.v->btree_id, -+ other_extent, bp->k.p.inode); - if (ret) - goto err; - goto out; - } else { -- ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode); -+ ret = drop_dev_and_update(trans, bp->v.btree_id, orig_k, bp->k.p.inode); - if (ret) - goto err; - goto missing; - } - } - -- ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); -+ ret = check_extent_checksum(trans, -+ other_bp.v->btree_id, other_extent, -+ bp->v.btree_id, orig_k, -+ bp->k.p.inode); - if (ret < 0) - goto err; - if (ret) { -@@ -620,7 +547,8 @@ static int check_bp_exists(struct btree_trans *trans, - goto missing; - } - -- ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode); -+ ret = check_extent_checksum(trans, bp->v.btree_id, orig_k, -+ other_bp.v->btree_id, other_extent, bp->k.p.inode); - if (ret < 0) - goto err; - if (ret) { -@@ -629,7 +557,7 @@ static int check_bp_exists(struct btree_trans *trans, - } - - printbuf_reset(&buf); -- prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode); -+ prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bp->k.p.inode); - bch2_bkey_val_to_text(&buf, c, orig_k); - prt_str(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, other_extent); -@@ -638,21 +566,15 @@ static int check_bp_exists(struct btree_trans *trans, - goto err; - missing: - printbuf_reset(&buf); -- prt_printf(&buf, "missing backpointer for btree=%s l=%u ", -- bch2_btree_id_str(bp.btree_id), bp.level); -+ prt_str(&buf, "missing backpointer\n for: "); - bch2_bkey_val_to_text(&buf, c, orig_k); -- prt_printf(&buf, "\n got: "); -+ prt_printf(&buf, "\n want: "); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i)); -+ prt_printf(&buf, "\n got: "); - bch2_bkey_val_to_text(&buf, c, bp_k); - -- struct bkey_i_backpointer n_bp_k; -- bkey_backpointer_init(&n_bp_k.k_i); -- n_bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); -- n_bp_k.v = bp; -- prt_printf(&buf, "\n want: "); -- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i)); -- - if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) -- ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, orig_k, true); -+ ret = bch2_bucket_backpointer_mod(trans, orig_k, bp, true); - - goto out; - } -@@ -663,31 +585,33 @@ static int check_extent_to_backpointers(struct btree_trans *trans, - struct bkey_s_c k) - { - struct bch_fs *c = trans->c; -- struct bkey_ptrs_c ptrs; -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; -- int ret; - -- ptrs = bch2_bkey_ptrs_c(k); - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -- struct bpos bucket_pos = POS_MIN; -- struct bch_backpointer bp; -- - if (p.ptr.cached) - continue; - -+ if (p.ptr.dev == BCH_SB_MEMBER_INVALID) -+ continue; -+ - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); -- if (ca) -- bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp); -+ bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches); -+ bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty); - rcu_read_unlock(); - -- if (!ca) -- continue; -+ if (check || empty) { -+ struct bkey_i_backpointer bp; -+ bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp); - -- ret = check_bp_exists(trans, s, bucket_pos, bp, k); -- if (ret) -- return ret; -+ int ret = check -+ ? check_bp_exists(trans, s, &bp, k) -+ : bch2_bucket_backpointer_mod(trans, k, &bp, true); -+ if (ret) -+ return ret; -+ } - } - - return 0; -@@ -896,54 +820,330 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, - return 0; - } - -+enum alloc_sector_counter { -+ ALLOC_dirty, -+ ALLOC_cached, -+ ALLOC_stripe, -+ ALLOC_SECTORS_NR -+}; -+ -+static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t) -+{ -+ switch (t) { -+ case BCH_DATA_btree: -+ case BCH_DATA_user: -+ return ALLOC_dirty; -+ case BCH_DATA_cached: -+ return ALLOC_cached; -+ case BCH_DATA_stripe: -+ return ALLOC_stripe; -+ default: -+ BUG(); -+ } -+} -+ -+static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos); -+ -+static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k, -+ struct bkey_buf *last_flushed) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); -+ bool need_commit = false; -+ -+ if (a->data_type == BCH_DATA_sb || -+ a->data_type == BCH_DATA_journal || -+ a->data_type == BCH_DATA_parity) -+ return 0; -+ -+ u32 sectors[ALLOC_SECTORS_NR]; -+ memset(sectors, 0, sizeof(sectors)); -+ -+ struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p); -+ if (!ca) -+ return 0; -+ -+ struct btree_iter iter; -+ struct bkey_s_c bp_k; -+ int ret = 0; -+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers, -+ bucket_pos_to_bp_start(ca, alloc_k.k->p), -+ bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) { -+ if (bp_k.k->type != KEY_TYPE_backpointer) -+ continue; -+ -+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); -+ -+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen && -+ (bp.v->bucket_gen != a->gen || -+ bp.v->pad)) { -+ ret = bch2_backpointer_del(trans, bp_k.k->p); -+ if (ret) -+ break; -+ -+ need_commit = true; -+ continue; -+ } -+ -+ if (bp.v->bucket_gen != a->gen) -+ continue; -+ -+ sectors[data_type_to_alloc_counter(bp.v->data_type)] += bp.v->bucket_len; -+ }; -+ bch2_trans_iter_exit(trans, &iter); -+ if (ret) -+ goto err; -+ -+ if (need_commit) { -+ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); -+ if (ret) -+ goto err; -+ } -+ -+ /* Cached pointers don't have backpointers: */ -+ -+ if (sectors[ALLOC_dirty] != a->dirty_sectors || -+ sectors[ALLOC_stripe] != a->stripe_sectors) { -+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) { -+ ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); -+ if (ret) -+ goto err; -+ } -+ -+ if (sectors[ALLOC_dirty] > a->dirty_sectors || -+ sectors[ALLOC_stripe] > a->stripe_sectors) { -+ ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: -+ -BCH_ERR_transaction_restart_nested; -+ goto err; -+ } -+ -+ if (!sectors[ALLOC_dirty] && -+ !sectors[ALLOC_stripe]) -+ __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty); -+ else -+ __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches); -+ } -+err: -+ bch2_dev_put(ca); -+ return ret; -+} -+ -+static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) -+{ -+ switch (k.k->type) { -+ case KEY_TYPE_btree_ptr_v2: { -+ bool ret = false; -+ -+ rcu_read_lock(); -+ struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key; -+ while (pos.inode <= k.k->p.inode) { -+ if (pos.inode >= c->sb.nr_devices) -+ break; -+ -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode); -+ if (!ca) -+ goto next; -+ -+ struct bpos bucket = bp_pos_to_bucket(ca, pos); -+ bucket.offset = find_next_bit(ca->bucket_backpointer_mismatches, -+ ca->mi.nbuckets, bucket.offset); -+ if (bucket.offset == ca->mi.nbuckets) -+ goto next; -+ -+ ret = bpos_le(bucket_pos_to_bp_end(ca, bucket), k.k->p); -+ if (ret) -+ break; -+next: -+ pos = SPOS(pos.inode + 1, 0, 0); -+ } -+ rcu_read_unlock(); -+ -+ return ret; -+ } -+ case KEY_TYPE_btree_ptr: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k, -+ enum btree_id btree, unsigned level) -+{ -+ struct btree_iter iter; -+ bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0); -+ struct btree *b = bch2_btree_iter_peek_node(&iter); -+ int ret = PTR_ERR_OR_ZERO(b); -+ if (ret) -+ goto err; -+ -+ if (b) -+ bch2_node_pin(trans->c, b); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans, -+ struct bpos start, struct bpos *end) -+{ -+ struct bch_fs *c = trans->c; -+ int ret = 0; -+ -+ struct bkey_buf tmp; -+ bch2_bkey_buf_init(&tmp); -+ -+ bch2_btree_cache_unpin(c); -+ -+ *end = SPOS_MAX; -+ -+ s64 mem_may_pin = mem_may_pin_bytes(c); -+ struct btree_iter iter; -+ bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start, -+ 0, 1, BTREE_ITER_prefetch); -+ ret = for_each_btree_key_continue(trans, iter, 0, k, ({ -+ if (!backpointer_node_has_missing(c, k)) -+ continue; -+ -+ mem_may_pin -= c->opts.btree_node_size; -+ if (mem_may_pin <= 0) -+ break; -+ -+ bch2_bkey_buf_reassemble(&tmp, c, k); -+ struct btree_path *path = btree_iter_path(trans, &iter); -+ -+ BUG_ON(path->level != 1); -+ -+ bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, path->level - 1); -+ })); -+ if (ret) -+ return ret; -+ -+ struct bpos pinned = SPOS_MAX; -+ mem_may_pin = mem_may_pin_bytes(c); -+ bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start, -+ 0, 1, BTREE_ITER_prefetch); -+ ret = for_each_btree_key_continue(trans, iter, 0, k, ({ -+ if (!backpointer_node_has_missing(c, k)) -+ continue; -+ -+ mem_may_pin -= c->opts.btree_node_size; -+ if (mem_may_pin <= 0) { -+ *end = pinned; -+ break; -+ } -+ -+ bch2_bkey_buf_reassemble(&tmp, c, k); -+ struct btree_path *path = btree_iter_path(trans, &iter); -+ -+ BUG_ON(path->level != 1); -+ -+ int ret2 = btree_node_get_and_pin(trans, tmp.k, path->btree_id, path->level - 1); -+ -+ if (!ret2) -+ pinned = tmp.k->k.p; -+ -+ ret; -+ })); -+ if (ret) -+ return ret; -+ -+ return ret; -+} -+ - int bch2_check_extents_to_backpointers(struct bch_fs *c) - { -+ int ret = 0; -+ -+ /* -+ * Can't allow devices to come/go/resize while we have bucket bitmaps -+ * allocated -+ */ -+ lockdep_assert_held(&c->state_lock); -+ -+ for_each_member_device(c, ca) { -+ BUG_ON(ca->bucket_backpointer_mismatches); -+ ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets), -+ sizeof(unsigned long), -+ GFP_KERNEL); -+ ca->bucket_backpointer_empty = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets), -+ sizeof(unsigned long), -+ GFP_KERNEL); -+ if (!ca->bucket_backpointer_mismatches || -+ !ca->bucket_backpointer_empty) { -+ bch2_dev_put(ca); -+ ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap; -+ goto err_free_bitmaps; -+ } -+ } -+ - struct btree_trans *trans = bch2_trans_get(c); -- struct extents_to_bp_state s = { .bucket_start = POS_MIN }; -- int ret; -+ struct extents_to_bp_state s = { .bp_start = POS_MIN }; - - bch2_bkey_buf_init(&s.last_flushed); - bkey_init(&s.last_flushed.k->k); - -+ ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, -+ POS_MIN, BTREE_ITER_prefetch, k, ({ -+ check_bucket_backpointer_mismatch(trans, k, &s.last_flushed); -+ })); -+ if (ret) -+ goto err; -+ -+ u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0; -+ for_each_member_device(c, ca) { -+ nr_buckets += ca->mi.nbuckets; -+ nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets); -+ nr_empty += bitmap_weight(ca->bucket_backpointer_empty, ca->mi.nbuckets); -+ } -+ -+ if (!nr_mismatches && !nr_empty) -+ goto err; -+ -+ bch_info(c, "scanning for missing backpointers in %llu/%llu buckets", -+ nr_mismatches + nr_empty, nr_buckets); -+ - while (1) { -- struct bbpos end; -- ret = bch2_get_btree_in_memory_pos(trans, -- BIT_ULL(BTREE_ID_backpointers), -- BIT_ULL(BTREE_ID_backpointers), -- BBPOS(BTREE_ID_backpointers, s.bucket_start), &end); -+ ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end); - if (ret) - break; - -- s.bucket_end = end.pos; -- -- if ( bpos_eq(s.bucket_start, POS_MIN) && -- !bpos_eq(s.bucket_end, SPOS_MAX)) -+ if ( bpos_eq(s.bp_start, POS_MIN) && -+ !bpos_eq(s.bp_end, SPOS_MAX)) - bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", - __func__, btree_nodes_fit_in_ram(c)); - -- if (!bpos_eq(s.bucket_start, POS_MIN) || -- !bpos_eq(s.bucket_end, SPOS_MAX)) { -+ if (!bpos_eq(s.bp_start, POS_MIN) || -+ !bpos_eq(s.bp_end, SPOS_MAX)) { - struct printbuf buf = PRINTBUF; - - prt_str(&buf, "check_extents_to_backpointers(): "); -- bch2_bpos_to_text(&buf, s.bucket_start); -+ bch2_bpos_to_text(&buf, s.bp_start); - prt_str(&buf, "-"); -- bch2_bpos_to_text(&buf, s.bucket_end); -+ bch2_bpos_to_text(&buf, s.bp_end); - - bch_verbose(c, "%s", buf.buf); - printbuf_exit(&buf); - } - - ret = bch2_check_extents_to_backpointers_pass(trans, &s); -- if (ret || bpos_eq(s.bucket_end, SPOS_MAX)) -+ if (ret || bpos_eq(s.bp_end, SPOS_MAX)) - break; - -- s.bucket_start = bpos_successor(s.bucket_end); -+ s.bp_start = bpos_successor(s.bp_end); - } -+err: - bch2_trans_put(trans); - bch2_bkey_buf_exit(&s.last_flushed, c); -- - bch2_btree_cache_unpin(c); -+err_free_bitmaps: -+ for_each_member_device(c, ca) { -+ kvfree(ca->bucket_backpointer_empty); -+ ca->bucket_backpointer_empty = NULL; -+ kvfree(ca->bucket_backpointer_mismatches); -+ ca->bucket_backpointer_mismatches = NULL; -+ } - - bch_err_fn(c, ret); - return ret; -@@ -959,44 +1159,43 @@ static int check_one_backpointer(struct btree_trans *trans, - return 0; - - struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); -- struct bch_fs *c = trans->c; -- struct btree_iter iter; - struct bbpos pos = bp_to_bbpos(*bp.v); -- struct bkey_s_c k; -- struct printbuf buf = PRINTBUF; -- int ret; - - if (bbpos_cmp(pos, start) < 0 || - bbpos_cmp(pos, end) > 0) - return 0; - -- k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0); -- ret = bkey_err(k); -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed); -+ int ret = bkey_err(k); - if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) - return 0; - if (ret) - return ret; - -- if (!k.k) { -- ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed); -- if (ret) -- goto out; -- -- if (fsck_err(trans, backpointer_to_missing_ptr, -- "backpointer for missing %s\n %s", -- bp.v->level ? "btree node" : "extent", -- (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { -- ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); -- goto out; -- } -- } --out: --fsck_err: - bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); - return ret; - } - -+static int check_bucket_backpointers_to_extents(struct btree_trans *trans, -+ struct bch_dev *ca, struct bpos bucket) -+{ -+ u32 restart_count = trans->restart_count; -+ struct bkey_buf last_flushed; -+ bch2_bkey_buf_init(&last_flushed); -+ bkey_init(&last_flushed.k->k); -+ -+ int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers, -+ bucket_pos_to_bp_start(ca, bucket), -+ bucket_pos_to_bp_end(ca, bucket), -+ 0, k, -+ check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed) -+ ); -+ -+ bch2_bkey_buf_exit(&last_flushed, trans->c); -+ return ret ?: trans_was_restarted(trans, restart_count); -+} -+ - static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, - struct bbpos start, - struct bbpos end) -@@ -1009,9 +1208,8 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, - bkey_init(&last_flushed.k->k); - progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers)); - -- int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, -- POS_MIN, BTREE_ITER_prefetch, k, -- NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -+ int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers, -+ POS_MIN, BTREE_ITER_prefetch, k, ({ - progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); - check_one_backpointer(trans, start, end, k, &last_flushed); - })); -diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h -index 3b29fdf519dd..060dad1521ee 100644 ---- a/fs/bcachefs/backpointers.h -+++ b/fs/bcachefs/backpointers.h -@@ -18,14 +18,14 @@ static inline u64 swab40(u64 x) - ((x & 0xff00000000ULL) >> 32)); - } - --int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags); --void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); --void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, -+ struct bkey_validate_context); -+void bch2_backpointer_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - void bch2_backpointer_swab(struct bkey_s); - - #define bch2_bkey_ops_backpointer ((struct bkey_ops) { \ - .key_validate = bch2_backpointer_validate, \ -- .val_to_text = bch2_backpointer_k_to_text, \ -+ .val_to_text = bch2_backpointer_to_text, \ - .swab = bch2_backpointer_swab, \ - .min_val_size = 32, \ - }) -@@ -43,22 +43,24 @@ static inline struct bpos bp_pos_to_bucket(const struct bch_dev *ca, struct bpos - return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector)); - } - -+static inline struct bpos bp_pos_to_bucket_and_offset(const struct bch_dev *ca, struct bpos bp_pos, -+ u32 *bucket_offset) -+{ -+ u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT; -+ -+ return POS(bp_pos.inode, sector_to_bucket_and_offset(ca, bucket_sector, bucket_offset)); -+} -+ - static inline bool bp_pos_to_bucket_nodev_noerror(struct bch_fs *c, struct bpos bp_pos, struct bpos *bucket) - { - rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu(c, bp_pos.inode); -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp_pos.inode); - if (ca) - *bucket = bp_pos_to_bucket(ca, bp_pos); - rcu_read_unlock(); - return ca != NULL; - } - --static inline bool bp_pos_to_bucket_nodev(struct bch_fs *c, struct bpos bp_pos, struct bpos *bucket) --{ -- return !bch2_fs_inconsistent_on(!bp_pos_to_bucket_nodev_noerror(c, bp_pos, bucket), -- c, "backpointer for missing device %llu", bp_pos.inode); --} -- - static inline struct bpos bucket_pos_to_bp_noerror(const struct bch_dev *ca, - struct bpos bucket, - u64 bucket_offset) -@@ -80,31 +82,35 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_dev *ca, - return ret; - } - --int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bch_dev *, -- struct bpos bucket, struct bch_backpointer, struct bkey_s_c, bool); -+static inline struct bpos bucket_pos_to_bp_start(const struct bch_dev *ca, struct bpos bucket) -+{ -+ return bucket_pos_to_bp(ca, bucket, 0); -+} -+ -+static inline struct bpos bucket_pos_to_bp_end(const struct bch_dev *ca, struct bpos bucket) -+{ -+ return bpos_nosnap_predecessor(bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0)); -+} -+ -+int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, -+ struct bkey_s_c, -+ struct bkey_i_backpointer *, -+ bool); - - static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, -- struct bch_dev *ca, -- struct bpos bucket, -- struct bch_backpointer bp, - struct bkey_s_c orig_k, -+ struct bkey_i_backpointer *bp, - bool insert) - { - if (unlikely(bch2_backpointers_no_use_write_buffer)) -- return bch2_bucket_backpointer_mod_nowritebuffer(trans, ca, bucket, bp, orig_k, insert); -- -- struct bkey_i_backpointer bp_k; -- -- bkey_backpointer_init(&bp_k.k_i); -- bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); -- bp_k.v = bp; -+ return bch2_bucket_backpointer_mod_nowritebuffer(trans, orig_k, bp, insert); - - if (!insert) { -- bp_k.k.type = KEY_TYPE_deleted; -- set_bkey_val_u64s(&bp_k.k, 0); -+ bp->k.type = KEY_TYPE_deleted; -+ set_bkey_val_u64s(&bp->k, 0); - } - -- return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i); -+ return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp->k_i); - } - - static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k, -@@ -134,44 +140,29 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k, - } - } - --static inline void __bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, -+static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, struct extent_ptr_decoded p, - const union bch_extent_entry *entry, -- struct bpos *bucket_pos, struct bch_backpointer *bp, -- u64 sectors) -+ struct bkey_i_backpointer *bp) - { -- u32 bucket_offset; -- *bucket_pos = PTR_BUCKET_POS_OFFSET(ca, &p.ptr, &bucket_offset); -- *bp = (struct bch_backpointer) { -+ bkey_backpointer_init(&bp->k_i); -+ bp->k.p = POS(p.ptr.dev, ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset); -+ bp->v = (struct bch_backpointer) { - .btree_id = btree_id, - .level = level, - .data_type = bch2_bkey_ptr_data_type(k, p, entry), -- .bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + -- p.crc.offset, -- .bucket_len = sectors, -+ .bucket_gen = p.ptr.gen, -+ .bucket_len = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p), - .pos = k.k->p, - }; - } - --static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, -- enum btree_id btree_id, unsigned level, -- struct bkey_s_c k, struct extent_ptr_decoded p, -- const union bch_extent_entry *entry, -- struct bpos *bucket_pos, struct bch_backpointer *bp) --{ -- u64 sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p); -- -- __bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, bucket_pos, bp, sectors); --} -- --int bch2_get_next_backpointer(struct btree_trans *, struct bch_dev *ca, struct bpos, int, -- struct bpos *, struct bch_backpointer *, unsigned); --struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *, -- struct bpos, struct bch_backpointer, -- unsigned); --struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *, -- struct bpos, struct bch_backpointer); -+struct bkey_buf; -+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_backpointer, -+ struct btree_iter *, unsigned, struct bkey_buf *); -+struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer, -+ struct btree_iter *, struct bkey_buf *); - - int bch2_check_btree_backpointers(struct bch_fs *); - int bch2_check_extents_to_backpointers(struct bch_fs *); -diff --git a/fs/bcachefs/bbpos.h b/fs/bcachefs/bbpos.h -index be2edced5213..63abe17f35ea 100644 ---- a/fs/bcachefs/bbpos.h -+++ b/fs/bcachefs/bbpos.h -@@ -29,7 +29,7 @@ static inline struct bbpos bbpos_successor(struct bbpos pos) - - static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos) - { -- prt_str(out, bch2_btree_id_str(pos.btree)); -+ bch2_btree_id_to_text(out, pos.btree); - prt_char(out, ':'); - bch2_bpos_to_text(out, pos.pos); - } -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -index e94a83b8113e..161cf2f05d2a 100644 ---- a/fs/bcachefs/bcachefs.h -+++ b/fs/bcachefs/bcachefs.h -@@ -205,6 +205,7 @@ - #include - - #include "bcachefs_format.h" -+#include "btree_journal_iter_types.h" - #include "disk_accounting_types.h" - #include "errcode.h" - #include "fifo.h" -@@ -293,6 +294,8 @@ do { \ - - #define bch_info(c, fmt, ...) \ - bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) -+#define bch_info_ratelimited(c, fmt, ...) \ -+ bch2_print_ratelimited(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) - #define bch_notice(c, fmt, ...) \ - bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__) - #define bch_warn(c, fmt, ...) \ -@@ -352,6 +355,12 @@ do { \ - bch_info(c, fmt, ##__VA_ARGS__); \ - } while (0) - -+#define bch_verbose_ratelimited(c, fmt, ...) \ -+do { \ -+ if ((c)->opts.verbose) \ -+ bch_info_ratelimited(c, fmt, ##__VA_ARGS__); \ -+} while (0) -+ - #define pr_verbose_init(opts, fmt, ...) \ - do { \ - if (opt_get(opts, verbose)) \ -@@ -538,20 +547,20 @@ struct bch_dev { - - /* - * Buckets: -- * Per-bucket arrays are protected by c->mark_lock, bucket_lock and -- * gc_gens_lock, for device resize - holding any is sufficient for -- * access: Or rcu_read_lock(), but only for dev_ptr_stale(): -+ * Per-bucket arrays are protected by either rcu_read_lock or -+ * state_lock, for device resize. - */ - GENRADIX(struct bucket) buckets_gc; - struct bucket_gens __rcu *bucket_gens; - u8 *oldest_gen; - unsigned long *buckets_nouse; -- struct rw_semaphore bucket_lock; -+ -+ unsigned long *bucket_backpointer_mismatches; -+ unsigned long *bucket_backpointer_empty; - - struct bch_dev_usage __percpu *usage; - - /* Allocator: */ -- u64 new_fs_bucket_idx; - u64 alloc_cursor[3]; - - unsigned nr_open_buckets; -@@ -606,6 +615,7 @@ struct bch_dev { - x(going_ro) \ - x(write_disable_complete) \ - x(clean_shutdown) \ -+ x(recovery_running) \ - x(fsck_running) \ - x(initial_gc_unfixed) \ - x(need_delete_dead_snapshots) \ -@@ -650,28 +660,6 @@ struct journal_seq_blacklist_table { - } entries[]; - }; - --struct journal_keys { -- /* must match layout in darray_types.h */ -- size_t nr, size; -- struct journal_key { -- u64 journal_seq; -- u32 journal_offset; -- enum btree_id btree_id:8; -- unsigned level:8; -- bool allocated; -- bool overwritten; -- struct bkey_i *k; -- } *data; -- /* -- * Gap buffer: instead of all the empty space in the array being at the -- * end of the buffer - from @nr to @size - the empty space is at @gap. -- * This means that sequential insertions are O(n) instead of O(n^2). -- */ -- size_t gap; -- atomic_t ref; -- bool initial_ref_held; --}; -- - struct btree_trans_buf { - struct btree_trans *trans; - }; -@@ -680,6 +668,7 @@ struct btree_trans_buf { - ((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO }) - - #define BCH_WRITE_REFS() \ -+ x(journal) \ - x(trans) \ - x(write) \ - x(promote) \ -@@ -692,6 +681,7 @@ struct btree_trans_buf { - x(dio_write) \ - x(discard) \ - x(discard_fast) \ -+ x(check_discard_freespace_key) \ - x(invalidate) \ - x(delete_dead_snapshots) \ - x(gc_gens) \ -@@ -734,6 +724,12 @@ struct bch_fs { - #else - struct percpu_ref writes; - #endif -+ /* -+ * Certain operations are only allowed in single threaded mode, during -+ * recovery, and we want to assert that this is the case: -+ */ -+ struct task_struct *recovery_task; -+ - /* - * Analagous to c->writes, for asynchronous ops that don't necessarily - * need fs to be read-write -@@ -764,6 +760,8 @@ struct bch_fs { - __uuid_t user_uuid; - - u16 version; -+ u16 version_incompat; -+ u16 version_incompat_allowed; - u16 version_min; - u16 version_upgrade_complete; - -@@ -834,9 +832,10 @@ struct bch_fs { - struct work_struct btree_interior_update_work; - - struct workqueue_struct *btree_node_rewrite_worker; -- -- struct list_head pending_node_rewrites; -- struct mutex pending_node_rewrites_lock; -+ struct list_head btree_node_rewrites; -+ struct list_head btree_node_rewrites_pending; -+ spinlock_t btree_node_rewrites_lock; -+ struct closure_waitlist btree_node_rewrites_wait; - - /* btree_io.c: */ - spinlock_t btree_write_error_lock; -@@ -967,8 +966,7 @@ struct bch_fs { - struct rhashtable promote_table; - - mempool_t compression_bounce[2]; -- mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR]; -- mempool_t decompress_workspace; -+ mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR]; - size_t zstd_workspace_size; - - struct crypto_shash *sha256; -@@ -1027,6 +1025,7 @@ struct bch_fs { - struct list_head vfs_inodes_list; - struct mutex vfs_inodes_lock; - struct rhashtable vfs_inodes_table; -+ struct rhltable vfs_inodes_by_inum_table; - - /* VFS IO PATH - fs-io.c */ - struct bio_set writepage_bioset; -@@ -1048,10 +1047,12 @@ struct bch_fs { - * for signaling to the toplevel code which pass we want to run now. - */ - enum bch_recovery_pass curr_recovery_pass; -+ enum bch_recovery_pass next_recovery_pass; - /* bitmask of recovery passes that we actually ran */ - u64 recovery_passes_complete; - /* never rewinds version of curr_recovery_pass */ - enum bch_recovery_pass recovery_pass_done; -+ spinlock_t recovery_pass_lock; - struct semaphore online_fsck_mutex; - - /* DEBUG JUNK */ -@@ -1062,9 +1063,6 @@ struct bch_fs { - struct btree_node *verify_ondisk; - struct mutex verify_lock; - -- u64 *unused_inode_hints; -- unsigned inode_shard_bits; -- - /* - * A btree node on disk could have too many bsets for an iterator to fit - * on the stack - have to dynamically allocate them -@@ -1086,8 +1084,6 @@ struct bch_fs { - u64 counters_on_mount[BCH_COUNTER_NR]; - u64 __percpu *counters; - -- unsigned copy_gc_enabled:1; -- - struct bch2_time_stats times[BCH_TIME_STAT_NR]; - - struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -index 5004f6ba997c..f70f0108401f 100644 ---- a/fs/bcachefs/bcachefs_format.h -+++ b/fs/bcachefs/bcachefs_format.h -@@ -418,7 +418,8 @@ static inline void bkey_init(struct bkey *k) - x(snapshot_tree, 31) \ - x(logged_op_truncate, 32) \ - x(logged_op_finsert, 33) \ -- x(accounting, 34) -+ x(accounting, 34) \ -+ x(inode_alloc_cursor, 35) - - enum bch_bkey_type { - #define x(name, nr) KEY_TYPE_##name = nr, -@@ -463,7 +464,8 @@ struct bch_backpointer { - __u8 btree_id; - __u8 level; - __u8 data_type; -- __u64 bucket_offset:40; -+ __u8 bucket_gen; -+ __u32 pad; - __u32 bucket_len; - struct bpos pos; - } __packed __aligned(8); -@@ -499,8 +501,6 @@ struct bch_sb_field { - #include "disk_groups_format.h" - #include "extents_format.h" - #include "ec_format.h" --#include "dirent_format.h" --#include "disk_groups_format.h" - #include "inode_format.h" - #include "journal_seq_blacklist_format.h" - #include "logged_ops_format.h" -@@ -679,7 +679,14 @@ struct bch_sb_field_ext { - x(disk_accounting_v3, BCH_VERSION(1, 10)) \ - x(disk_accounting_inum, BCH_VERSION(1, 11)) \ - x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \ -- x(inode_has_child_snapshots, BCH_VERSION(1, 13)) -+ x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \ -+ x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \ -+ x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \ -+ x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \ -+ x(inode_depth, BCH_VERSION(1, 17)) \ -+ x(persistent_inode_cursors, BCH_VERSION(1, 18)) \ -+ x(autofix_errors, BCH_VERSION(1, 19)) \ -+ x(directory_size, BCH_VERSION(1, 20)) - - enum bcachefs_metadata_version { - bcachefs_metadata_version_min = 9, -@@ -844,6 +851,10 @@ LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, - struct bch_sb, flags[5], 0, 16); - LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, - struct bch_sb, flags[5], 16, 32); -+LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48); -+LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, -+ struct bch_sb, flags[5], 48, 64); -+LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4); - - static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) - { -@@ -896,21 +907,22 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u - x(new_varint, 15) \ - x(journal_no_flush, 16) \ - x(alloc_v2, 17) \ -- x(extents_across_btree_nodes, 18) -+ x(extents_across_btree_nodes, 18) \ -+ x(incompat_version_field, 19) - - #define BCH_SB_FEATURES_ALWAYS \ -- ((1ULL << BCH_FEATURE_new_extent_overwrite)| \ -- (1ULL << BCH_FEATURE_extents_above_btree_updates)|\ -- (1ULL << BCH_FEATURE_btree_updates_journalled)|\ -- (1ULL << BCH_FEATURE_alloc_v2)|\ -- (1ULL << BCH_FEATURE_extents_across_btree_nodes)) -+ (BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \ -+ BIT_ULL(BCH_FEATURE_extents_above_btree_updates)|\ -+ BIT_ULL(BCH_FEATURE_btree_updates_journalled)|\ -+ BIT_ULL(BCH_FEATURE_alloc_v2)|\ -+ BIT_ULL(BCH_FEATURE_extents_across_btree_nodes)) - - #define BCH_SB_FEATURES_ALL \ - (BCH_SB_FEATURES_ALWAYS| \ -- (1ULL << BCH_FEATURE_new_siphash)| \ -- (1ULL << BCH_FEATURE_btree_ptr_v2)| \ -- (1ULL << BCH_FEATURE_new_varint)| \ -- (1ULL << BCH_FEATURE_journal_no_flush)) -+ BIT_ULL(BCH_FEATURE_new_siphash)| \ -+ BIT_ULL(BCH_FEATURE_btree_ptr_v2)| \ -+ BIT_ULL(BCH_FEATURE_new_varint)| \ -+ BIT_ULL(BCH_FEATURE_journal_no_flush)) - - enum bch_sb_feature { - #define x(f, n) BCH_FEATURE_##f, -@@ -1032,7 +1044,7 @@ static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type) - x(crc64, 2) \ - x(xxhash, 3) - --enum bch_csum_opts { -+enum bch_csum_opt { - #define x(t, n) BCH_CSUM_OPT_##t = n, - BCH_CSUM_OPTS() - #undef x -@@ -1221,6 +1233,15 @@ struct jset_entry_log { - u8 d[]; - } __packed __aligned(8); - -+static inline unsigned jset_entry_log_msg_bytes(struct jset_entry_log *l) -+{ -+ unsigned b = vstruct_bytes(&l->entry) - offsetof(struct jset_entry_log, d); -+ -+ while (b && !l->d[b - 1]) -+ --b; -+ return b; -+} -+ - struct jset_entry_datetime { - struct jset_entry entry; - __le64 seconds; -@@ -1268,14 +1289,18 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6); - /* Btree: */ - - enum btree_id_flags { -- BTREE_ID_EXTENTS = BIT(0), -- BTREE_ID_SNAPSHOTS = BIT(1), -- BTREE_ID_SNAPSHOT_FIELD = BIT(2), -- BTREE_ID_DATA = BIT(3), -+ BTREE_IS_extents = BIT(0), -+ BTREE_IS_snapshots = BIT(1), -+ BTREE_IS_snapshot_field = BIT(2), -+ BTREE_IS_data = BIT(3), -+ BTREE_IS_write_buffer = BIT(4), - }; - - #define BCH_BTREE_IDS() \ -- x(extents, 0, BTREE_ID_EXTENTS|BTREE_ID_SNAPSHOTS|BTREE_ID_DATA,\ -+ x(extents, 0, \ -+ BTREE_IS_extents| \ -+ BTREE_IS_snapshots| \ -+ BTREE_IS_data, \ - BIT_ULL(KEY_TYPE_whiteout)| \ - BIT_ULL(KEY_TYPE_error)| \ - BIT_ULL(KEY_TYPE_cookie)| \ -@@ -1283,17 +1308,20 @@ enum btree_id_flags { - BIT_ULL(KEY_TYPE_reservation)| \ - BIT_ULL(KEY_TYPE_reflink_p)| \ - BIT_ULL(KEY_TYPE_inline_data)) \ -- x(inodes, 1, BTREE_ID_SNAPSHOTS, \ -+ x(inodes, 1, \ -+ BTREE_IS_snapshots, \ - BIT_ULL(KEY_TYPE_whiteout)| \ - BIT_ULL(KEY_TYPE_inode)| \ - BIT_ULL(KEY_TYPE_inode_v2)| \ - BIT_ULL(KEY_TYPE_inode_v3)| \ - BIT_ULL(KEY_TYPE_inode_generation)) \ -- x(dirents, 2, BTREE_ID_SNAPSHOTS, \ -+ x(dirents, 2, \ -+ BTREE_IS_snapshots, \ - BIT_ULL(KEY_TYPE_whiteout)| \ - BIT_ULL(KEY_TYPE_hash_whiteout)| \ - BIT_ULL(KEY_TYPE_dirent)) \ -- x(xattrs, 3, BTREE_ID_SNAPSHOTS, \ -+ x(xattrs, 3, \ -+ BTREE_IS_snapshots, \ - BIT_ULL(KEY_TYPE_whiteout)| \ - BIT_ULL(KEY_TYPE_cookie)| \ - BIT_ULL(KEY_TYPE_hash_whiteout)| \ -@@ -1307,7 +1335,9 @@ enum btree_id_flags { - BIT_ULL(KEY_TYPE_quota)) \ - x(stripes, 6, 0, \ - BIT_ULL(KEY_TYPE_stripe)) \ -- x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \ -+ x(reflink, 7, \ -+ BTREE_IS_extents| \ -+ BTREE_IS_data, \ - BIT_ULL(KEY_TYPE_reflink_v)| \ - BIT_ULL(KEY_TYPE_indirect_inline_data)| \ - BIT_ULL(KEY_TYPE_error)) \ -@@ -1315,28 +1345,38 @@ enum btree_id_flags { - BIT_ULL(KEY_TYPE_subvolume)) \ - x(snapshots, 9, 0, \ - BIT_ULL(KEY_TYPE_snapshot)) \ -- x(lru, 10, 0, \ -+ x(lru, 10, \ -+ BTREE_IS_write_buffer, \ - BIT_ULL(KEY_TYPE_set)) \ -- x(freespace, 11, BTREE_ID_EXTENTS, \ -+ x(freespace, 11, \ -+ BTREE_IS_extents, \ - BIT_ULL(KEY_TYPE_set)) \ - x(need_discard, 12, 0, \ - BIT_ULL(KEY_TYPE_set)) \ -- x(backpointers, 13, 0, \ -+ x(backpointers, 13, \ -+ BTREE_IS_write_buffer, \ - BIT_ULL(KEY_TYPE_backpointer)) \ - x(bucket_gens, 14, 0, \ - BIT_ULL(KEY_TYPE_bucket_gens)) \ - x(snapshot_trees, 15, 0, \ - BIT_ULL(KEY_TYPE_snapshot_tree)) \ -- x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \ -+ x(deleted_inodes, 16, \ -+ BTREE_IS_snapshot_field| \ -+ BTREE_IS_write_buffer, \ - BIT_ULL(KEY_TYPE_set)) \ - x(logged_ops, 17, 0, \ - BIT_ULL(KEY_TYPE_logged_op_truncate)| \ -- BIT_ULL(KEY_TYPE_logged_op_finsert)) \ -- x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \ -+ BIT_ULL(KEY_TYPE_logged_op_finsert)| \ -+ BIT_ULL(KEY_TYPE_inode_alloc_cursor)) \ -+ x(rebalance_work, 18, \ -+ BTREE_IS_snapshot_field| \ -+ BTREE_IS_write_buffer, \ - BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \ - x(subvolume_children, 19, 0, \ - BIT_ULL(KEY_TYPE_set)) \ -- x(accounting, 20, BTREE_ID_SNAPSHOT_FIELD, \ -+ x(accounting, 20, \ -+ BTREE_IS_snapshot_field| \ -+ BTREE_IS_write_buffer, \ - BIT_ULL(KEY_TYPE_accounting)) \ - - enum btree_id { -@@ -1361,6 +1401,8 @@ static inline bool btree_id_is_alloc(enum btree_id id) - case BTREE_ID_need_discard: - case BTREE_ID_freespace: - case BTREE_ID_bucket_gens: -+ case BTREE_ID_lru: -+ case BTREE_ID_accounting: - return true; - default: - return false; -diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h -index 41df24a53d97..054e2d5e8448 100644 ---- a/fs/bcachefs/bkey.h -+++ b/fs/bcachefs/bkey.h -@@ -9,13 +9,6 @@ - #include "util.h" - #include "vstructs.h" - --enum bch_validate_flags { -- BCH_VALIDATE_write = BIT(0), -- BCH_VALIDATE_commit = BIT(1), -- BCH_VALIDATE_journal = BIT(2), -- BCH_VALIDATE_silent = BIT(3), --}; -- - #if 0 - - /* -diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c -index e7ac227ba7e8..15c93576b5c2 100644 ---- a/fs/bcachefs/bkey_methods.c -+++ b/fs/bcachefs/bkey_methods.c -@@ -28,7 +28,7 @@ const char * const bch2_bkey_types[] = { - }; - - static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return 0; - } -@@ -42,7 +42,7 @@ static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, - }) - - static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -@@ -59,7 +59,7 @@ static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, - }) - - static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return 0; - } -@@ -83,7 +83,7 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c, - }) - - static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return 0; - } -@@ -124,7 +124,7 @@ const struct bkey_ops bch2_bkey_null_ops = { - }; - - int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) - return 0; -@@ -140,7 +140,7 @@ int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, - if (!ops->key_validate) - return 0; - -- ret = ops->key_validate(c, k, flags); -+ ret = ops->key_validate(c, k, from); - fsck_err: - return ret; - } -@@ -161,9 +161,10 @@ const char *bch2_btree_node_type_str(enum btree_node_type type) - } - - int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, -- enum btree_node_type type, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { -+ enum btree_node_type type = __btree_node_type(from.level, from.btree); -+ - if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) - return 0; - -@@ -177,7 +178,7 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, - return 0; - - bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && -- (type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) && -+ (type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) && - !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), - c, bkey_invalid_type_for_btree, - "invalid key type for btree %s (%s)", -@@ -228,15 +229,15 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, - } - - int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, -- enum btree_node_type type, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { -- return __bch2_bkey_validate(c, k, type, flags) ?: -- bch2_bkey_val_validate(c, k, flags); -+ return __bch2_bkey_validate(c, k, from) ?: -+ bch2_bkey_val_validate(c, k, from); - } - - int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, -- struct bkey_s_c k, enum bch_validate_flags flags) -+ struct bkey_s_c k, -+ struct bkey_validate_context from) - { - int ret = 0; - -diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h -index 018fb72e32d3..bf34111cdf00 100644 ---- a/fs/bcachefs/bkey_methods.h -+++ b/fs/bcachefs/bkey_methods.h -@@ -22,7 +22,7 @@ extern const struct bkey_ops bch2_bkey_null_ops; - */ - struct bkey_ops { - int (*key_validate)(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags); -+ struct bkey_validate_context from); - void (*val_to_text)(struct printbuf *, struct bch_fs *, - struct bkey_s_c); - void (*swab)(struct bkey_s); -@@ -48,13 +48,14 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type) - : &bch2_bkey_null_ops; - } - --int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); --int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, -- enum bch_validate_flags); --int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, -- enum bch_validate_flags); -+int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); -+int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); -+int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context from); - - void bch2_bpos_to_text(struct printbuf *, struct bpos); - void bch2_bkey_to_text(struct printbuf *, const struct bkey *); -diff --git a/fs/bcachefs/bkey_types.h b/fs/bcachefs/bkey_types.h -index c9ae9e42b385..b4f328f9853c 100644 ---- a/fs/bcachefs/bkey_types.h -+++ b/fs/bcachefs/bkey_types.h -@@ -210,4 +210,32 @@ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\ - BCH_BKEY_TYPES(); - #undef x - -+enum bch_validate_flags { -+ BCH_VALIDATE_write = BIT(0), -+ BCH_VALIDATE_commit = BIT(1), -+ BCH_VALIDATE_silent = BIT(2), -+}; -+ -+#define BKEY_VALIDATE_CONTEXTS() \ -+ x(unknown) \ -+ x(superblock) \ -+ x(journal) \ -+ x(btree_root) \ -+ x(btree_node) \ -+ x(commit) -+ -+struct bkey_validate_context { -+ enum { -+#define x(n) BKEY_VALIDATE_##n, -+ BKEY_VALIDATE_CONTEXTS() -+#undef x -+ } from:8; -+ enum bch_validate_flags flags:8; -+ u8 level; -+ enum btree_id btree; -+ bool root:1; -+ unsigned journal_offset; -+ u64 journal_seq; -+}; -+ - #endif /* _BCACHEFS_BKEY_TYPES_H */ -diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c -index 7123019ab3bc..ca755e8d1a37 100644 ---- a/fs/bcachefs/btree_cache.c -+++ b/fs/bcachefs/btree_cache.c -@@ -24,7 +24,10 @@ do { \ - } while (0) - - const char * const bch2_btree_node_flags[] = { --#define x(f) #f, -+ "typebit", -+ "typebit", -+ "typebit", -+#define x(f) [BTREE_NODE_##f] = #f, - BTREE_FLAGS() - #undef x - NULL -@@ -222,7 +225,6 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b) - struct btree_cache *bc = &c->btree_cache; - - mutex_lock(&bc->lock); -- BUG_ON(!__btree_node_pinned(bc, b)); - if (b != btree_node_root(c, b) && !btree_node_pinned(b)) { - set_btree_node_pinned(b); - list_move(&b->list, &bc->live[1].list); -@@ -326,7 +328,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans, - if (!IS_ERR_OR_NULL(b)) { - mutex_lock(&c->btree_cache.lock); - -- bch2_btree_node_hash_remove(&c->btree_cache, b); -+ __bch2_btree_node_hash_remove(&c->btree_cache, b); - - bkey_copy(&b->key, new); - ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); -@@ -1004,16 +1006,14 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) - return; - - prt_printf(&buf, -- "btree node header doesn't match ptr\n" -- "btree %s level %u\n" -- "ptr: ", -- bch2_btree_id_str(b->c.btree_id), b->c.level); -+ "btree node header doesn't match ptr: "); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_str(&buf, "\nptr: "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - -- prt_printf(&buf, "\nheader: btree %s level %llu\n" -- "min ", -- bch2_btree_id_str(BTREE_NODE_ID(b->data)), -- BTREE_NODE_LEVEL(b->data)); -+ prt_str(&buf, "\nheader: "); -+ bch2_btree_id_level_to_text(&buf, BTREE_NODE_ID(b->data), BTREE_NODE_LEVEL(b->data)); -+ prt_str(&buf, "\nmin "); - bch2_bpos_to_text(&buf, b->data->min_key); - - prt_printf(&buf, "\nmax "); -@@ -1133,7 +1133,7 @@ static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btr - - if (unlikely(btree_node_read_error(b))) { - six_unlock_type(&b->c.lock, lock_type); -- return ERR_PTR(-BCH_ERR_btree_node_read_error); -+ return ERR_PTR(-BCH_ERR_btree_node_read_err_cached); - } - - EBUG_ON(b->c.btree_id != path->btree_id); -@@ -1223,7 +1223,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path * - - if (unlikely(btree_node_read_error(b))) { - six_unlock_type(&b->c.lock, lock_type); -- return ERR_PTR(-BCH_ERR_btree_node_read_error); -+ return ERR_PTR(-BCH_ERR_btree_node_read_err_cached); - } - - EBUG_ON(b->c.btree_id != path->btree_id); -@@ -1305,7 +1305,7 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, - - if (unlikely(btree_node_read_error(b))) { - six_unlock_read(&b->c.lock); -- b = ERR_PTR(-BCH_ERR_btree_node_read_error); -+ b = ERR_PTR(-BCH_ERR_btree_node_read_err_cached); - goto out; - } - -@@ -1398,13 +1398,31 @@ void bch2_btree_id_to_text(struct printbuf *out, enum btree_id btree) - prt_printf(out, "(unknown btree %u)", btree); - } - -+void bch2_btree_id_level_to_text(struct printbuf *out, enum btree_id btree, unsigned level) -+{ -+ prt_str(out, "btree="); -+ bch2_btree_id_to_text(out, btree); -+ prt_printf(out, " level=%u", level); -+} -+ -+void __bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, -+ enum btree_id btree, unsigned level, struct bkey_s_c k) -+{ -+ bch2_btree_id_to_text(out, btree); -+ prt_printf(out, " level %u/", level); -+ struct btree_root *r = bch2_btree_id_root(c, btree); -+ if (r) -+ prt_printf(out, "%u", r->level); -+ else -+ prt_printf(out, "(unknown)"); -+ prt_printf(out, "\n "); -+ -+ bch2_bkey_val_to_text(out, c, k); -+} -+ - void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) - { -- prt_printf(out, "%s level %u/%u\n ", -- bch2_btree_id_str(b->c.btree_id), -- b->c.level, -- bch2_btree_id_root(c, b->c.btree_id)->level); -- bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); -+ __bch2_btree_pos_to_text(out, c, b->c.btree_id, b->c.level, bkey_i_to_s_c(&b->key)); - } - - void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) -@@ -1478,8 +1496,12 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc - prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock); - prt_newline(out); - -- for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) -- prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->nr_by_btree[i]); -+ for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) { -+ bch2_btree_id_to_text(out, i); -+ prt_printf(out, "\t"); -+ prt_human_readable_u64(out, bc->nr_by_btree[i] * c->opts.btree_node_size); -+ prt_printf(out, " (%zu)\n", bc->nr_by_btree[i]); -+ } - - prt_newline(out); - prt_printf(out, "freed:\t%zu\n", bc->nr_freed); -diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h -index 66e86d1a178d..ca3c1b145330 100644 ---- a/fs/bcachefs/btree_cache.h -+++ b/fs/bcachefs/btree_cache.h -@@ -128,19 +128,27 @@ static inline struct btree_root *bch2_btree_id_root(struct bch_fs *c, unsigned i - } else { - unsigned idx = id - BTREE_ID_NR; - -- EBUG_ON(idx >= c->btree_roots_extra.nr); -+ /* This can happen when we're called from btree_node_scan */ -+ if (idx >= c->btree_roots_extra.nr) -+ return NULL; -+ - return &c->btree_roots_extra.data[idx]; - } - } - - static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b) - { -- return bch2_btree_id_root(c, b->c.btree_id)->b; -+ struct btree_root *r = bch2_btree_id_root(c, b->c.btree_id); -+ -+ return r ? r->b : NULL; - } - --const char *bch2_btree_id_str(enum btree_id); -+const char *bch2_btree_id_str(enum btree_id); /* avoid */ - void bch2_btree_id_to_text(struct printbuf *, enum btree_id); -+void bch2_btree_id_level_to_text(struct printbuf *, enum btree_id, unsigned); - -+void __bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, -+ enum btree_id, unsigned, struct bkey_s_c); - void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *); - void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *); - void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *); -diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -index 81dcf9e512c0..dd1d9b74076e 100644 ---- a/fs/bcachefs/btree_gc.c -+++ b/fs/bcachefs/btree_gc.c -@@ -29,6 +29,7 @@ - #include "move.h" - #include "recovery_passes.h" - #include "reflink.h" -+#include "recovery.h" - #include "replicas.h" - #include "super-io.h" - #include "trace.h" -@@ -56,8 +57,8 @@ void bch2_gc_pos_to_text(struct printbuf *out, struct gc_pos *p) - { - prt_str(out, bch2_gc_phase_strs[p->phase]); - prt_char(out, ' '); -- bch2_btree_id_to_text(out, p->btree); -- prt_printf(out, " l=%u ", p->level); -+ bch2_btree_id_level_to_text(out, p->btree, p->level); -+ prt_char(out, ' '); - bch2_bpos_to_text(out, p->pos); - } - -@@ -209,8 +210,9 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * - if (bpos_eq(expected_start, cur->data->min_key)) - return 0; - -- prt_printf(&buf, " at btree %s level %u:\n parent: ", -- bch2_btree_id_str(b->c.btree_id), b->c.level); -+ prt_printf(&buf, " at "); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_printf(&buf, ":\n parent: "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - - if (prev) { -@@ -277,8 +279,9 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, - if (bpos_eq(child->key.k.p, b->key.k.p)) - return 0; - -- prt_printf(&buf, "at btree %s level %u:\n parent: ", -- bch2_btree_id_str(b->c.btree_id), b->c.level); -+ prt_printf(&buf, " at "); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_printf(&buf, ":\n parent: "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - - prt_str(&buf, "\n child: "); -@@ -341,14 +344,14 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - ret = PTR_ERR_OR_ZERO(cur); - - printbuf_reset(&buf); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level - 1); -+ prt_char(&buf, ' '); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); - - if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), -- trans, btree_node_unreadable, -- "Topology repair: unreadable btree node at btree %s level %u:\n" -+ trans, btree_node_read_error, -+ "Topology repair: unreadable btree node at\n" - " %s", -- bch2_btree_id_str(b->c.btree_id), -- b->c.level - 1, - buf.buf)) { - bch2_btree_node_evict(trans, cur_k.k); - cur = NULL; -@@ -357,11 +360,9 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - if (ret) - break; - -- if (!btree_id_is_alloc(b->c.btree_id)) { -- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); -- if (ret) -- break; -- } -+ ret = bch2_btree_lost_data(c, b->c.btree_id); -+ if (ret) -+ break; - continue; - } - -@@ -370,7 +371,7 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - break; - - if (bch2_btree_node_is_stale(c, cur)) { -- bch_info(c, "btree node %s older than nodes found by scanning", buf.buf); -+ bch_info(c, "btree node older than nodes found by scanning\n %s", buf.buf); - six_unlock_read(&cur->c.lock); - bch2_btree_node_evict(trans, cur_k.k); - ret = bch2_journal_key_delete(c, b->c.btree_id, -@@ -478,14 +479,13 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct - } - - printbuf_reset(&buf); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_newline(&buf); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - - if (mustfix_fsck_err_on(!have_child, - trans, btree_node_topology_interior_node_empty, -- "empty interior btree node at btree %s level %u\n" -- " %s", -- bch2_btree_id_str(b->c.btree_id), -- b->c.level, buf.buf)) -+ "empty interior btree node at %s", buf.buf)) - ret = DROP_THIS_NODE; - err: - fsck_err: -@@ -511,6 +511,7 @@ int bch2_check_topology(struct bch_fs *c) - { - struct btree_trans *trans = bch2_trans_get(c); - struct bpos pulled_from_scan = POS_MIN; -+ struct printbuf buf = PRINTBUF; - int ret = 0; - - bch2_trans_srcu_unlock(trans); -@@ -519,19 +520,22 @@ int bch2_check_topology(struct bch_fs *c) - struct btree_root *r = bch2_btree_id_root(c, i); - bool reconstructed_root = false; - -+ printbuf_reset(&buf); -+ bch2_btree_id_to_text(&buf, i); -+ - if (r->error) { -- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); -+ ret = bch2_btree_lost_data(c, i); - if (ret) - break; - reconstruct_root: -- bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i)); -+ bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); - - r->alive = false; - r->error = 0; - - if (!bch2_btree_has_scanned_nodes(c, i)) { - mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing, -- "no nodes found for btree %s, continue?", bch2_btree_id_str(i)); -+ "no nodes found for btree %s, continue?", buf.buf); - bch2_btree_root_alloc_fake_trans(trans, i, 0); - } else { - bch2_btree_root_alloc_fake_trans(trans, i, 1); -@@ -560,13 +564,14 @@ int bch2_check_topology(struct bch_fs *c) - if (!reconstructed_root) - goto reconstruct_root; - -- bch_err(c, "empty btree root %s", bch2_btree_id_str(i)); -+ bch_err(c, "empty btree root %s", buf.buf); - bch2_btree_root_alloc_fake_trans(trans, i, 0); - r->alive = false; - ret = 0; - } - } - fsck_err: -+ printbuf_exit(&buf); - bch2_trans_put(trans); - return ret; - } -@@ -713,6 +718,7 @@ static int bch2_gc_btrees(struct bch_fs *c) - { - struct btree_trans *trans = bch2_trans_get(c); - enum btree_id ids[BTREE_ID_NR]; -+ struct printbuf buf = PRINTBUF; - unsigned i; - int ret = 0; - -@@ -727,14 +733,9 @@ static int bch2_gc_btrees(struct bch_fs *c) - continue; - - ret = bch2_gc_btree(trans, btree, true); -- -- if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), -- trans, btree_node_read_error, -- "btree node read error for %s", -- bch2_btree_id_str(btree))) -- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); - } --fsck_err: -+ -+ printbuf_exit(&buf); - bch2_trans_put(trans); - bch_err_fn(c, ret); - return ret; -@@ -802,7 +803,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, - old = bch2_alloc_to_v4(k, &old_convert); - gc = new = *old; - -- percpu_down_read(&c->mark_lock); - __bucket_m_to_alloc(&gc, *gc_bucket(ca, iter->pos.offset)); - - old_gc = gc; -@@ -813,7 +813,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, - gc.data_type = old->data_type; - gc.dirty_sectors = old->dirty_sectors; - } -- percpu_up_read(&c->mark_lock); - - /* - * gc.data_type doesn't yet include need_discard & need_gc_gen states - -@@ -831,11 +830,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans, - * safe w.r.t. transaction restarts, so fixup the gc_bucket so - * we don't run it twice: - */ -- percpu_down_read(&c->mark_lock); - struct bucket *gc_m = gc_bucket(ca, iter->pos.offset); - gc_m->data_type = gc.data_type; - gc_m->dirty_sectors = gc.dirty_sectors; -- percpu_up_read(&c->mark_lock); - } - - if (fsck_err_on(new.data_type != gc.data_type, -@@ -895,11 +892,11 @@ static int bch2_gc_alloc_done(struct bch_fs *c) - - for_each_member_device(c, ca) { - ret = bch2_trans_run(c, -- for_each_btree_key_upto_commit(trans, iter, BTREE_ID_alloc, -+ for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, - POS(ca->dev_idx, ca->mi.first_bucket), - POS(ca->dev_idx, ca->mi.nbuckets - 1), - BTREE_ITER_slots|BTREE_ITER_prefetch, k, -- NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, -+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_alloc_write_key(trans, &iter, ca, k))); - if (ret) { - bch2_dev_put(ca); -@@ -928,98 +925,6 @@ static int bch2_gc_alloc_start(struct bch_fs *c) - return ret; - } - --static int bch2_gc_write_reflink_key(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bkey_s_c k, -- size_t *idx) --{ -- struct bch_fs *c = trans->c; -- const __le64 *refcount = bkey_refcount_c(k); -- struct printbuf buf = PRINTBUF; -- struct reflink_gc *r; -- int ret = 0; -- -- if (!refcount) -- return 0; -- -- while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) && -- r->offset < k.k->p.offset) -- ++*idx; -- -- if (!r || -- r->offset != k.k->p.offset || -- r->size != k.k->size) { -- bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); -- return -EINVAL; -- } -- -- if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), -- trans, reflink_v_refcount_wrong, -- "reflink key has wrong refcount:\n" -- " %s\n" -- " should be %u", -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf), -- r->refcount)) { -- struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); -- ret = PTR_ERR_OR_ZERO(new); -- if (ret) -- goto out; -- -- if (!r->refcount) -- new->k.type = KEY_TYPE_deleted; -- else -- *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); -- ret = bch2_trans_update(trans, iter, new, 0); -- } --out: --fsck_err: -- printbuf_exit(&buf); -- return ret; --} -- --static int bch2_gc_reflink_done(struct bch_fs *c) --{ -- size_t idx = 0; -- -- int ret = bch2_trans_run(c, -- for_each_btree_key_commit(trans, iter, -- BTREE_ID_reflink, POS_MIN, -- BTREE_ITER_prefetch, k, -- NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_gc_write_reflink_key(trans, &iter, k, &idx))); -- c->reflink_gc_nr = 0; -- return ret; --} -- --static int bch2_gc_reflink_start(struct bch_fs *c) --{ -- c->reflink_gc_nr = 0; -- -- int ret = bch2_trans_run(c, -- for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, -- BTREE_ITER_prefetch, k, ({ -- const __le64 *refcount = bkey_refcount_c(k); -- -- if (!refcount) -- continue; -- -- struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table, -- c->reflink_gc_nr++, GFP_KERNEL); -- if (!r) { -- ret = -BCH_ERR_ENOMEM_gc_reflink_start; -- break; -- } -- -- r->offset = k.k->p.offset; -- r->size = k.k->size; -- r->refcount = 0; -- 0; -- }))); -- -- bch_err_fn(c, ret); -- return ret; --} -- - static int bch2_gc_write_stripes_key(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) -@@ -1171,7 +1076,6 @@ static int gc_btree_gens_key(struct btree_trans *trans, - if (unlikely(test_bit(BCH_FS_going_ro, &c->flags))) - return -EROFS; - -- percpu_down_read(&c->mark_lock); - rcu_read_lock(); - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); -@@ -1180,7 +1084,6 @@ static int gc_btree_gens_key(struct btree_trans *trans, - - if (dev_ptr_stale(ca, ptr) > 16) { - rcu_read_unlock(); -- percpu_up_read(&c->mark_lock); - goto update; - } - } -@@ -1195,7 +1098,6 @@ static int gc_btree_gens_key(struct btree_trans *trans, - *gen = ptr->gen; - } - rcu_read_unlock(); -- percpu_up_read(&c->mark_lock); - return 0; - update: - u = bch2_bkey_make_mut(trans, iter, &k, 0); -@@ -1224,7 +1126,6 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev - return ret; - - a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset]; -- alloc_data_type_set(&a_mut->v, a_mut->v.data_type); - - return bch2_trans_update(trans, iter, &a_mut->k_i, 0); - } -@@ -1337,9 +1238,16 @@ void bch2_gc_gens_async(struct bch_fs *c) - bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens); - } - --void bch2_fs_gc_init(struct bch_fs *c) -+void bch2_fs_btree_gc_exit(struct bch_fs *c) - { -- seqcount_init(&c->gc_pos_lock); -+} - -+int bch2_fs_btree_gc_init(struct bch_fs *c) -+{ -+ seqcount_init(&c->gc_pos_lock); - INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work); -+ -+ init_rwsem(&c->gc_lock); -+ mutex_init(&c->gc_gens_lock); -+ return 0; - } -diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h -index 8a47e8bd0791..9693a90a48a2 100644 ---- a/fs/bcachefs/btree_gc.h -+++ b/fs/bcachefs/btree_gc.h -@@ -82,6 +82,8 @@ void bch2_gc_pos_to_text(struct printbuf *, struct gc_pos *); - - int bch2_gc_gens(struct bch_fs *); - void bch2_gc_gens_async(struct bch_fs *); --void bch2_fs_gc_init(struct bch_fs *); -+ -+void bch2_fs_btree_gc_exit(struct bch_fs *); -+int bch2_fs_btree_gc_init(struct bch_fs *); - - #endif /* _BCACHEFS_BTREE_GC_H */ -diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c -index 839d68802e42..e371e60e3133 100644 ---- a/fs/bcachefs/btree_io.c -+++ b/fs/bcachefs/btree_io.c -@@ -25,9 +25,8 @@ - - static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn) - { -- prt_printf(out, "btree=%s l=%u seq %llux\n", -- bch2_btree_id_str(BTREE_NODE_ID(bn)), -- (unsigned) BTREE_NODE_LEVEL(bn), bn->keys.seq); -+ bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn)); -+ prt_printf(out, " seq %llx %llu\n", bn->keys.seq, BTREE_NODE_SEQ(bn)); - prt_str(out, "min: "); - bch2_bpos_to_text(out, bn->min_key); - prt_newline(out); -@@ -490,8 +489,8 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) - if (b->nsets == MAX_BSETS && - !btree_node_write_in_flight(b) && - should_compact_all(c, b)) { -- bch2_btree_node_write(c, b, SIX_LOCK_write, -- BTREE_WRITE_init_next_bset); -+ bch2_btree_node_write_trans(trans, b, SIX_LOCK_write, -+ BTREE_WRITE_init_next_bset); - reinit_iter = true; - } - -@@ -832,13 +831,32 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, - return ret; - } - -+static int btree_node_bkey_val_validate(struct bch_fs *c, struct btree *b, -+ struct bkey_s_c k, -+ enum bch_validate_flags flags) -+{ -+ return bch2_bkey_val_validate(c, k, (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_btree_node, -+ .level = b->c.level, -+ .btree = b->c.btree_id, -+ .flags = flags -+ }); -+} -+ - static int bset_key_validate(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, -- bool updated_range, int rw) -+ bool updated_range, -+ enum bch_validate_flags flags) - { -- return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?: -- (!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?: -- (rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0); -+ struct bkey_validate_context from = (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_btree_node, -+ .level = b->c.level, -+ .btree = b->c.btree_id, -+ .flags = flags, -+ }; -+ return __bch2_bkey_validate(c, k, from) ?: -+ (!updated_range ? bch2_bkey_in_btree_node(c, b, k, from) : 0) ?: -+ (flags & BCH_VALIDATE_write ? btree_node_bkey_val_validate(c, b, k, flags) : 0); - } - - static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, -@@ -855,7 +873,21 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, - - struct bkey tmp; - struct bkey_s u = __bkey_disassemble(b, k, &tmp); -- return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent); -+ return !__bch2_bkey_validate(c, u.s_c, -+ (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_btree_node, -+ .level = b->c.level, -+ .btree = b->c.btree_id, -+ .flags = BCH_VALIDATE_silent -+ }); -+} -+ -+static inline int btree_node_read_bkey_cmp(const struct btree *b, -+ const struct bkey_packed *l, -+ const struct bkey_packed *r) -+{ -+ return bch2_bkey_cmp_packed(b, l, r) -+ ?: (int) bkey_deleted(r) - (int) bkey_deleted(l); - } - - static int validate_bset_keys(struct bch_fs *c, struct btree *b, -@@ -918,7 +950,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, - BSET_BIG_ENDIAN(i), write, - &b->format, k); - -- if (prev && bkey_iter_cmp(b, prev, k) > 0) { -+ if (prev && btree_node_read_bkey_cmp(b, prev, k) >= 0) { - struct bkey up = bkey_unpack_key(b, prev); - - printbuf_reset(&buf); -@@ -965,6 +997,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, - got_good_key: - le16_add_cpu(&i->u64s, -next_good_key); - memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); -+ set_btree_node_need_rewrite(b); - } - fsck_err: - printbuf_exit(&buf); -@@ -1038,39 +1071,51 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, - - while (b->written < (ptr_written ?: btree_sectors(c))) { - unsigned sectors; -- struct nonce nonce; - bool first = !b->written; -- bool csum_bad; - -- if (!b->written) { -+ if (first) { -+ bne = NULL; - i = &b->data->keys; -+ } else { -+ bne = write_block(b); -+ i = &bne->keys; - -- btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), -- -BCH_ERR_btree_node_read_err_want_retry, -- c, ca, b, i, NULL, -- bset_unknown_csum, -- "unknown checksum type %llu", BSET_CSUM_TYPE(i)); -- -- nonce = btree_nonce(i, b->written << 9); -+ if (i->seq != b->data->keys.seq) -+ break; -+ } - -- struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); -- csum_bad = bch2_crc_cmp(b->data->csum, csum); -- if (csum_bad) -- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -+ struct nonce nonce = btree_nonce(i, b->written << 9); -+ bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)); - -- btree_err_on(csum_bad, -- -BCH_ERR_btree_node_read_err_want_retry, -- c, ca, b, i, NULL, -- bset_bad_csum, -- "%s", -- (printbuf_reset(&buf), -- bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum), -- buf.buf)); -- -- ret = bset_encrypt(c, i, b->written << 9); -- if (bch2_fs_fatal_err_on(ret, c, -- "decrypting btree node: %s", bch2_err_str(ret))) -- goto fsck_err; -+ btree_err_on(!good_csum_type, -+ bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) -+ ? -BCH_ERR_btree_node_read_err_must_retry -+ : -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, i, NULL, -+ bset_unknown_csum, -+ "unknown checksum type %llu", BSET_CSUM_TYPE(i)); -+ -+ if (first) { -+ if (good_csum_type) { -+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); -+ bool csum_bad = bch2_crc_cmp(b->data->csum, csum); -+ if (csum_bad) -+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -+ -+ btree_err_on(csum_bad, -+ -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, i, NULL, -+ bset_bad_csum, -+ "%s", -+ (printbuf_reset(&buf), -+ bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum), -+ buf.buf)); -+ -+ ret = bset_encrypt(c, i, b->written << 9); -+ if (bch2_fs_fatal_err_on(ret, c, -+ "decrypting btree node: %s", bch2_err_str(ret))) -+ goto fsck_err; -+ } - - btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && - !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), -@@ -1081,37 +1126,26 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, - - sectors = vstruct_sectors(b->data, c->block_bits); - } else { -- bne = write_block(b); -- i = &bne->keys; -- -- if (i->seq != b->data->keys.seq) -- break; -- -- btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), -- -BCH_ERR_btree_node_read_err_want_retry, -- c, ca, b, i, NULL, -- bset_unknown_csum, -- "unknown checksum type %llu", BSET_CSUM_TYPE(i)); -- -- nonce = btree_nonce(i, b->written << 9); -- struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); -- csum_bad = bch2_crc_cmp(bne->csum, csum); -- if (ca && csum_bad) -- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -- -- btree_err_on(csum_bad, -- -BCH_ERR_btree_node_read_err_want_retry, -- c, ca, b, i, NULL, -- bset_bad_csum, -- "%s", -- (printbuf_reset(&buf), -- bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum), -- buf.buf)); -- -- ret = bset_encrypt(c, i, b->written << 9); -- if (bch2_fs_fatal_err_on(ret, c, -- "decrypting btree node: %s", bch2_err_str(ret))) -- goto fsck_err; -+ if (good_csum_type) { -+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); -+ bool csum_bad = bch2_crc_cmp(bne->csum, csum); -+ if (ca && csum_bad) -+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -+ -+ btree_err_on(csum_bad, -+ -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, i, NULL, -+ bset_bad_csum, -+ "%s", -+ (printbuf_reset(&buf), -+ bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum), -+ buf.buf)); -+ -+ ret = bset_encrypt(c, i, b->written << 9); -+ if (bch2_fs_fatal_err_on(ret, c, -+ "decrypting btree node: %s", bch2_err_str(ret))) -+ goto fsck_err; -+ } - - sectors = vstruct_sectors(bne, c->block_bits); - } -@@ -1216,7 +1250,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, - struct bkey tmp; - struct bkey_s u = __bkey_disassemble(b, k, &tmp); - -- ret = bch2_bkey_val_validate(c, u.s_c, READ); -+ ret = btree_node_bkey_val_validate(c, b, u.s_c, READ); - if (ret == -BCH_ERR_fsck_delete_bkey || - (bch2_inject_invalid_keys && - !bversion_cmp(u.k->bversion, MAX_VERSION))) { -@@ -1226,6 +1260,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, - memmove_u64s_down(k, bkey_p_next(k), - (u64 *) vstruct_end(i) - (u64 *) k); - set_btree_bset_end(b, b->set); -+ set_btree_node_need_rewrite(b); - continue; - } - if (ret) -@@ -1339,13 +1374,18 @@ static void btree_node_read_work(struct work_struct *work) - rb->start_time); - bio_put(&rb->bio); - -- if (saw_error && -+ if ((saw_error || -+ btree_node_need_rewrite(b)) && - !btree_node_read_error(b) && - c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { -- printbuf_reset(&buf); -- bch2_bpos_to_text(&buf, b->key.k.p); -- bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error", -- __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf); -+ if (saw_error) { -+ printbuf_reset(&buf); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_str(&buf, " "); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); -+ bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", -+ __func__, buf.buf); -+ } - - bch2_btree_node_rewrite_async(c, b); - } -@@ -1933,7 +1973,12 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, - bool saw_error; - - int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), -- BKEY_TYPE_btree, WRITE); -+ (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_btree_node, -+ .level = b->c.level + 1, -+ .btree = b->c.btree_id, -+ .flags = BCH_VALIDATE_write, -+ }); - if (ret) { - bch2_fs_inconsistent(c, "invalid btree node key before write"); - return ret; -@@ -2300,6 +2345,34 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b, - } - } - -+void bch2_btree_node_write_trans(struct btree_trans *trans, struct btree *b, -+ enum six_lock_type lock_type_held, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ -+ if (lock_type_held == SIX_LOCK_intent || -+ (lock_type_held == SIX_LOCK_read && -+ six_lock_tryupgrade(&b->c.lock))) { -+ __bch2_btree_node_write(c, b, flags); -+ -+ /* don't cycle lock unnecessarily: */ -+ if (btree_node_just_written(b) && -+ six_trylock_write(&b->c.lock)) { -+ bch2_btree_post_write_cleanup(c, b); -+ __bch2_btree_node_unlock_write(trans, b); -+ } -+ -+ if (lock_type_held == SIX_LOCK_read) -+ six_lock_downgrade(&b->c.lock); -+ } else { -+ __bch2_btree_node_write(c, b, flags); -+ if (lock_type_held == SIX_LOCK_write && -+ btree_node_just_written(b)) -+ bch2_btree_post_write_cleanup(c, b); -+ } -+} -+ - static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag) - { - struct bucket_table *tbl; -diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h -index 9b01ca3de907..6f9e4a6dacf7 100644 ---- a/fs/bcachefs/btree_io.h -+++ b/fs/bcachefs/btree_io.h -@@ -144,11 +144,13 @@ enum btree_write_flags { - void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned); - void bch2_btree_node_write(struct bch_fs *, struct btree *, - enum six_lock_type, unsigned); -+void bch2_btree_node_write_trans(struct btree_trans *, struct btree *, -+ enum six_lock_type, unsigned); - --static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b, -+static inline void btree_node_write_if_need(struct btree_trans *trans, struct btree *b, - enum six_lock_type lock_held) - { -- bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED); -+ bch2_btree_node_write_trans(trans, b, lock_held, BTREE_WRITE_ONLY_IF_NEED); - } - - bool bch2_btree_flush_all_reads(struct bch_fs *); -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -index eef9b89c561d..5988219c6908 100644 ---- a/fs/bcachefs/btree_iter.c -+++ b/fs/bcachefs/btree_iter.c -@@ -270,8 +270,10 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) - BUG_ON(!(iter->flags & BTREE_ITER_all_snapshots) && - iter->pos.snapshot != iter->snapshot); - -- BUG_ON(bkey_lt(iter->pos, bkey_start_pos(&iter->k)) || -- bkey_gt(iter->pos, iter->k.p)); -+ BUG_ON(iter->flags & BTREE_ITER_all_snapshots ? !bpos_eq(iter->pos, iter->k.p) : -+ !(iter->flags & BTREE_ITER_is_extents) ? !bkey_eq(iter->pos, iter->k.p) : -+ (bkey_lt(iter->pos, bkey_start_pos(&iter->k)) || -+ bkey_gt(iter->pos, iter->k.p))); - } - - static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) -@@ -327,7 +329,7 @@ static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k - void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, - struct bpos pos) - { -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - struct btree_path *path; - struct trans_for_each_path_inorder_iter iter; -@@ -697,6 +699,19 @@ void bch2_trans_node_add(struct btree_trans *trans, - bch2_trans_revalidate_updates_in_node(trans, b); - } - -+void bch2_trans_node_drop(struct btree_trans *trans, -+ struct btree *b) -+{ -+ struct btree_path *path; -+ unsigned i, level = b->c.level; -+ -+ trans_for_each_path(trans, path, i) -+ if (path->l[level].b == b) { -+ btree_node_unlock(trans, path, level); -+ path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); -+ } -+} -+ - /* - * A btree node has been modified in such a way as to invalidate iterators - fix - * them: -@@ -720,7 +735,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans, - unsigned long trace_ip) - { - struct bch_fs *c = trans->c; -- struct btree *b, **rootp = &bch2_btree_id_root(c, path->btree_id)->b; -+ struct btree_root *r = bch2_btree_id_root(c, path->btree_id); - enum six_lock_type lock_type; - unsigned i; - int ret; -@@ -728,7 +743,12 @@ static inline int btree_path_lock_root(struct btree_trans *trans, - EBUG_ON(path->nodes_locked); - - while (1) { -- b = READ_ONCE(*rootp); -+ struct btree *b = READ_ONCE(r->b); -+ if (unlikely(!b)) { -+ BUG_ON(!r->error); -+ return r->error; -+ } -+ - path->level = READ_ONCE(b->c.level); - - if (unlikely(path->level < depth_want)) { -@@ -748,14 +768,12 @@ static inline int btree_path_lock_root(struct btree_trans *trans, - ret = btree_node_lock(trans, path, &b->c, - path->level, lock_type, trace_ip); - if (unlikely(ret)) { -- if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed)) -- continue; - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - return ret; - BUG(); - } - -- if (likely(b == READ_ONCE(*rootp) && -+ if (likely(b == READ_ONCE(r->b) && - b->c.level == path->level && - !race_fault())) { - for (i = 0; i < path->level; i++) -@@ -825,6 +843,8 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p - - bch2_bkey_buf_init(&tmp); - -+ jiter->fail_if_too_many_whiteouts = true; -+ - while (nr-- && !ret) { - if (!bch2_btree_node_relock(trans, path, path->level)) - break; -@@ -1000,7 +1020,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) - - bch2_trans_unlock(trans); - cond_resched(); -- trans_set_locked(trans); -+ trans_set_locked(trans, false); - - if (unlikely(trans->memory_allocation_failure)) { - struct closure cl; -@@ -1267,7 +1287,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans, - { - int cmp = bpos_cmp(new_pos, trans->paths[path_idx].pos); - -- bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - EBUG_ON(!trans->paths[path_idx].ref); - - trace_btree_path_set_pos(trans, trans->paths + path_idx, &new_pos); -@@ -1427,17 +1447,31 @@ void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_ - (void *) trans->last_begin_ip); - } - --void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) -+static void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) - { -+#ifdef CONFIG_BCACHEFS_DEBUG -+ struct printbuf buf = PRINTBUF; -+ bch2_prt_backtrace(&buf, &trans->last_restarted_trace); -+ panic("in transaction restart: %s, last restarted by\n%s", -+ bch2_err_str(trans->restarted), -+ buf.buf); -+#else - panic("in transaction restart: %s, last restarted by %pS\n", - bch2_err_str(trans->restarted), - (void *) trans->last_restarted_ip); -+#endif - } - --void __noreturn bch2_trans_unlocked_error(struct btree_trans *trans) -+void __noreturn bch2_trans_unlocked_or_in_restart_error(struct btree_trans *trans) - { -- panic("trans should be locked, unlocked by %pS\n", -- (void *) trans->last_unlock_ip); -+ if (trans->restarted) -+ bch2_trans_in_restart_error(trans); -+ -+ if (!trans->locked) -+ panic("trans should be locked, unlocked by %pS\n", -+ (void *) trans->last_unlock_ip); -+ -+ BUG(); - } - - noinline __cold -@@ -1450,10 +1484,11 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) - trans_for_each_update(trans, i) { - struct bkey_s_c old = { &i->old_k, i->old_v }; - -- prt_printf(buf, "update: btree=%s cached=%u %pS\n", -- bch2_btree_id_str(i->btree_id), -- i->cached, -- (void *) i->ip_allocated); -+ prt_str(buf, "update: btree="); -+ bch2_btree_id_to_text(buf, i->btree_id); -+ prt_printf(buf, " cached=%u %pS\n", -+ i->cached, -+ (void *) i->ip_allocated); - - prt_printf(buf, " old "); - bch2_bkey_val_to_text(buf, trans->c, old); -@@ -1486,13 +1521,13 @@ static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_tra - { - struct btree_path *path = trans->paths + path_idx; - -- prt_printf(out, "path: idx %3u ref %u:%u %c %c %c btree=%s l=%u pos ", -+ prt_printf(out, "path: idx %3u ref %u:%u %c %c %c ", - path_idx, path->ref, path->intent_ref, - path->preserve ? 'P' : ' ', - path->should_be_locked ? 'S' : ' ', -- path->cached ? 'C' : 'B', -- bch2_btree_id_str(path->btree_id), -- path->level); -+ path->cached ? 'C' : 'B'); -+ bch2_btree_id_level_to_text(out, path->btree_id, path->level); -+ prt_str(out, " pos "); - bch2_bpos_to_text(out, path->pos); - - if (!path->cached && btree_node_locked(path, path->level)) { -@@ -1717,8 +1752,7 @@ btree_path_idx_t bch2_path_get(struct btree_trans *trans, - struct trans_for_each_path_inorder_iter iter; - btree_path_idx_t path_pos = 0, path_idx; - -- bch2_trans_verify_not_unlocked(trans); -- bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - bch2_trans_verify_locks(trans); - - btree_trans_sort_paths(trans); -@@ -1833,7 +1867,7 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey * - !bkey_eq(path->pos, ck->key.pos)); - - *u = ck->k->k; -- k = bkey_i_to_s_c(ck->k); -+ k = (struct bkey_s_c) { u, &ck->k->v }; - } - - return k; -@@ -1843,7 +1877,6 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey * - return (struct bkey_s_c) { u, NULL }; - } - -- - void bch2_set_btree_iter_dontneed(struct btree_iter *iter) - { - struct btree_trans *trans = iter->trans; -@@ -1870,7 +1903,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter) - struct btree_trans *trans = iter->trans; - int ret; - -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - iter->path = bch2_btree_path_set_pos(trans, iter->path, - btree_iter_search_key(iter), -@@ -1945,7 +1978,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) - int ret; - - EBUG_ON(trans->paths[iter->path].cached); -- bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - bch2_btree_iter_verify(iter); - - ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); -@@ -2101,7 +2134,7 @@ static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, - { - struct btree_path *path = btree_iter_path(trans, iter); - -- return bch2_journal_keys_peek_upto(trans->c, iter->btree_id, -+ return bch2_journal_keys_peek_max(trans->c, iter->btree_id, - path->level, - path->pos, - end_pos, -@@ -2124,21 +2157,47 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, - } - - static noinline --struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bkey_s_c k) -+void btree_trans_peek_journal(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c *k) - { - struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *next_journal = - bch2_btree_journal_peek(trans, iter, -- k.k ? k.k->p : path_l(path)->b->key.k.p); -- -+ k->k ? k->k->p : path_l(path)->b->key.k.p); - if (next_journal) { - iter->k = next_journal->k; -- k = bkey_i_to_s_c(next_journal); -+ *k = bkey_i_to_s_c(next_journal); - } -+} - -- return k; -+static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bpos end_pos) -+{ -+ struct btree_path *path = btree_iter_path(trans, iter); -+ -+ return bch2_journal_keys_peek_prev_min(trans->c, iter->btree_id, -+ path->level, -+ path->pos, -+ end_pos, -+ &iter->journal_idx); -+} -+ -+static noinline -+void btree_trans_peek_prev_journal(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c *k) -+{ -+ struct btree_path *path = btree_iter_path(trans, iter); -+ struct bkey_i *next_journal = -+ bch2_btree_journal_peek_prev(trans, iter, -+ k->k ? k->k->p : path_l(path)->b->key.k.p); -+ -+ if (next_journal) { -+ iter->k = next_journal->k; -+ *k = bkey_i_to_s_c(next_journal); -+ } - } - - /* -@@ -2154,8 +2213,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos - struct bkey_s_c k; - int ret; - -- bch2_trans_verify_not_in_restart(trans); -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - if ((iter->flags & BTREE_ITER_key_cache_fill) && - bpos_eq(iter->pos, pos)) -@@ -2181,13 +2239,17 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos - if (unlikely(ret)) - return bkey_s_c_err(ret); - -- btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path); -- - k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u); -- if (k.k && !bkey_err(k)) { -- iter->k = u; -- k.k = &iter->k; -- } -+ if (!k.k) -+ return k; -+ -+ if ((iter->flags & BTREE_ITER_all_snapshots) && -+ !bpos_eq(pos, k.k->p)) -+ return bkey_s_c_null; -+ -+ iter->k = u; -+ k.k = &iter->k; -+ btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path); - return k; - } - -@@ -2201,8 +2263,6 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp - bch2_btree_iter_verify(iter); - - while (1) { -- struct btree_path_level *l; -- - iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, - iter->flags & BTREE_ITER_intent, - btree_iter_ip_allocated(iter)); -@@ -2212,17 +2272,17 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp - /* ensure that iter->k is consistent with iter->pos: */ - bch2_btree_iter_set_pos(iter, iter->pos); - k = bkey_s_c_err(ret); -- goto out; -+ break; - } - - struct btree_path *path = btree_iter_path(trans, iter); -- l = path_l(path); -+ struct btree_path_level *l = path_l(path); - - if (unlikely(!l->b)) { - /* No btree nodes at requested level: */ - bch2_btree_iter_set_pos(iter, SPOS_MAX); - k = bkey_s_c_null; -- goto out; -+ break; - } - - btree_path_set_should_be_locked(trans, path); -@@ -2233,15 +2293,14 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp - k.k && - (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { - k = k2; -- ret = bkey_err(k); -- if (ret) { -+ if (bkey_err(k)) { - bch2_btree_iter_set_pos(iter, iter->pos); -- goto out; -+ break; - } - } - - if (unlikely(iter->flags & BTREE_ITER_with_journal)) -- k = btree_trans_peek_journal(trans, iter, k); -+ btree_trans_peek_journal(trans, iter, &k); - - if (unlikely((iter->flags & BTREE_ITER_with_updates) && - trans->nr_updates)) -@@ -2270,32 +2329,32 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp - /* End of btree: */ - bch2_btree_iter_set_pos(iter, SPOS_MAX); - k = bkey_s_c_null; -- goto out; -+ break; - } - } --out: -- bch2_btree_iter_verify(iter); - -+ bch2_btree_iter_verify(iter); - return k; - } - - /** -- * bch2_btree_iter_peek_upto() - returns first key greater than or equal to -+ * bch2_btree_iter_peek_max() - returns first key greater than or equal to - * iterator's current position - * @iter: iterator to peek from - * @end: search limit: returns keys less than or equal to @end - * - * Returns: key if found, or an error extractable with bkey_err(). - */ --struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end) -+struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos end) - { - struct btree_trans *trans = iter->trans; - struct bpos search_key = btree_iter_search_key(iter); - struct bkey_s_c k; -- struct bpos iter_pos; -+ struct bpos iter_pos = iter->pos; - int ret; - -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); -+ bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX)); - - if (iter->update_path) { -@@ -2304,8 +2363,6 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e - iter->update_path = 0; - } - -- bch2_btree_iter_verify_entry_exit(iter); -- - while (1) { - k = __bch2_btree_iter_peek(iter, search_key); - if (unlikely(!k.k)) -@@ -2313,75 +2370,75 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e - if (unlikely(bkey_err(k))) - goto out_no_locked; - -- /* -- * We need to check against @end before FILTER_SNAPSHOTS because -- * if we get to a different inode that requested we might be -- * seeing keys for a different snapshot tree that will all be -- * filtered out. -- * -- * But we can't do the full check here, because bkey_start_pos() -- * isn't monotonically increasing before FILTER_SNAPSHOTS, and -- * that's what we check against in extents mode: -- */ -- if (unlikely(!(iter->flags & BTREE_ITER_is_extents) -- ? bkey_gt(k.k->p, end) -- : k.k->p.inode > end.inode)) -- goto end; -+ if (iter->flags & BTREE_ITER_filter_snapshots) { -+ /* -+ * We need to check against @end before FILTER_SNAPSHOTS because -+ * if we get to a different inode that requested we might be -+ * seeing keys for a different snapshot tree that will all be -+ * filtered out. -+ * -+ * But we can't do the full check here, because bkey_start_pos() -+ * isn't monotonically increasing before FILTER_SNAPSHOTS, and -+ * that's what we check against in extents mode: -+ */ -+ if (unlikely(!(iter->flags & BTREE_ITER_is_extents) -+ ? bkey_gt(k.k->p, end) -+ : k.k->p.inode > end.inode)) -+ goto end; -+ -+ if (iter->update_path && -+ !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) { -+ bch2_path_put_nokeep(trans, iter->update_path, -+ iter->flags & BTREE_ITER_intent); -+ iter->update_path = 0; -+ } - -- if (iter->update_path && -- !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) { -- bch2_path_put_nokeep(trans, iter->update_path, -- iter->flags & BTREE_ITER_intent); -- iter->update_path = 0; -- } -+ if ((iter->flags & BTREE_ITER_intent) && -+ !(iter->flags & BTREE_ITER_is_extents) && -+ !iter->update_path) { -+ struct bpos pos = k.k->p; - -- if ((iter->flags & BTREE_ITER_filter_snapshots) && -- (iter->flags & BTREE_ITER_intent) && -- !(iter->flags & BTREE_ITER_is_extents) && -- !iter->update_path) { -- struct bpos pos = k.k->p; -+ if (pos.snapshot < iter->snapshot) { -+ search_key = bpos_successor(k.k->p); -+ continue; -+ } - -- if (pos.snapshot < iter->snapshot) { -- search_key = bpos_successor(k.k->p); -- continue; -- } -+ pos.snapshot = iter->snapshot; - -- pos.snapshot = iter->snapshot; -+ /* -+ * advance, same as on exit for iter->path, but only up -+ * to snapshot -+ */ -+ __btree_path_get(trans, trans->paths + iter->path, iter->flags & BTREE_ITER_intent); -+ iter->update_path = iter->path; -+ -+ iter->update_path = bch2_btree_path_set_pos(trans, -+ iter->update_path, pos, -+ iter->flags & BTREE_ITER_intent, -+ _THIS_IP_); -+ ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); -+ if (unlikely(ret)) { -+ k = bkey_s_c_err(ret); -+ goto out_no_locked; -+ } -+ } - - /* -- * advance, same as on exit for iter->path, but only up -- * to snapshot -+ * We can never have a key in a leaf node at POS_MAX, so -+ * we don't have to check these successor() calls: - */ -- __btree_path_get(trans, trans->paths + iter->path, iter->flags & BTREE_ITER_intent); -- iter->update_path = iter->path; -- -- iter->update_path = bch2_btree_path_set_pos(trans, -- iter->update_path, pos, -- iter->flags & BTREE_ITER_intent, -- _THIS_IP_); -- ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); -- if (unlikely(ret)) { -- k = bkey_s_c_err(ret); -- goto out_no_locked; -+ if (!bch2_snapshot_is_ancestor(trans->c, -+ iter->snapshot, -+ k.k->p.snapshot)) { -+ search_key = bpos_successor(k.k->p); -+ continue; - } -- } - -- /* -- * We can never have a key in a leaf node at POS_MAX, so -- * we don't have to check these successor() calls: -- */ -- if ((iter->flags & BTREE_ITER_filter_snapshots) && -- !bch2_snapshot_is_ancestor(trans->c, -- iter->snapshot, -- k.k->p.snapshot)) { -- search_key = bpos_successor(k.k->p); -- continue; -- } -- -- if (bkey_whiteout(k.k) && -- !(iter->flags & BTREE_ITER_all_snapshots)) { -- search_key = bkey_successor(iter, k.k->p); -- continue; -+ if (bkey_whiteout(k.k) && -+ !(iter->flags & BTREE_ITER_key_cache_fill)) { -+ search_key = bkey_successor(iter, k.k->p); -+ continue; -+ } - } - - /* -@@ -2451,127 +2508,204 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) - return bch2_btree_iter_peek(iter); - } - --/** -- * bch2_btree_iter_peek_prev() - returns first key less than or equal to -- * iterator's current position -- * @iter: iterator to peek from -- * -- * Returns: key if found, or an error extractable with bkey_err(). -- */ --struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) -+static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, struct bpos search_key) - { - struct btree_trans *trans = iter->trans; -- struct bpos search_key = iter->pos; -- struct bkey_s_c k; -- struct bkey saved_k; -- const struct bch_val *saved_v; -- btree_path_idx_t saved_path = 0; -- int ret; -- -- bch2_trans_verify_not_unlocked(trans); -- EBUG_ON(btree_iter_path(trans, iter)->cached || -- btree_iter_path(trans, iter)->level); -- -- if (iter->flags & BTREE_ITER_with_journal) -- return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported); -+ struct bkey_s_c k, k2; - - bch2_btree_iter_verify(iter); -- bch2_btree_iter_verify_entry_exit(iter); -- -- if (iter->flags & BTREE_ITER_filter_snapshots) -- search_key.snapshot = U32_MAX; - - while (1) { - iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, -- iter->flags & BTREE_ITER_intent, -- btree_iter_ip_allocated(iter)); -+ iter->flags & BTREE_ITER_intent, -+ btree_iter_ip_allocated(iter)); - -- ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); -+ int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); - if (unlikely(ret)) { - /* ensure that iter->k is consistent with iter->pos: */ - bch2_btree_iter_set_pos(iter, iter->pos); - k = bkey_s_c_err(ret); -- goto out_no_locked; -+ break; - } - - struct btree_path *path = btree_iter_path(trans, iter); -+ struct btree_path_level *l = path_l(path); -+ -+ if (unlikely(!l->b)) { -+ /* No btree nodes at requested level: */ -+ bch2_btree_iter_set_pos(iter, SPOS_MAX); -+ k = bkey_s_c_null; -+ break; -+ } -+ -+ btree_path_set_should_be_locked(trans, path); -+ -+ k = btree_path_level_peek_all(trans->c, l, &iter->k); -+ if (!k.k || bpos_gt(k.k->p, search_key)) { -+ k = btree_path_level_prev(trans, path, l, &iter->k); - -- k = btree_path_level_peek(trans, path, &path->l[0], &iter->k); -- if (!k.k || -- ((iter->flags & BTREE_ITER_is_extents) -- ? bpos_ge(bkey_start_pos(k.k), search_key) -- : bpos_gt(k.k->p, search_key))) -- k = btree_path_level_prev(trans, path, &path->l[0], &iter->k); -+ BUG_ON(k.k && bpos_gt(k.k->p, search_key)); -+ } -+ -+ if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && -+ k.k && -+ (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { -+ k = k2; -+ if (bkey_err(k2)) { -+ bch2_btree_iter_set_pos(iter, iter->pos); -+ break; -+ } -+ } -+ -+ if (unlikely(iter->flags & BTREE_ITER_with_journal)) -+ btree_trans_peek_prev_journal(trans, iter, &k); - - if (unlikely((iter->flags & BTREE_ITER_with_updates) && - trans->nr_updates)) - bch2_btree_trans_peek_prev_updates(trans, iter, &k); - -- if (likely(k.k)) { -- if (iter->flags & BTREE_ITER_filter_snapshots) { -- if (k.k->p.snapshot == iter->snapshot) -- goto got_key; -+ if (likely(k.k && !bkey_deleted(k.k))) { -+ break; -+ } else if (k.k) { -+ search_key = bpos_predecessor(k.k->p); -+ } else if (likely(!bpos_eq(path->l[0].b->data->min_key, POS_MIN))) { -+ /* Advance to previous leaf node: */ -+ search_key = bpos_predecessor(path->l[0].b->data->min_key); -+ } else { -+ /* Start of btree: */ -+ bch2_btree_iter_set_pos(iter, POS_MIN); -+ k = bkey_s_c_null; -+ break; -+ } -+ } -+ -+ bch2_btree_iter_verify(iter); -+ return k; -+} -+ -+/** -+ * bch2_btree_iter_peek_prev_min() - returns first key less than or equal to -+ * iterator's current position -+ * @iter: iterator to peek from -+ * @end: search limit: returns keys greater than or equal to @end -+ * -+ * Returns: key if found, or an error extractable with bkey_err(). -+ */ -+struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bpos end) -+{ -+ if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) && -+ !bkey_eq(iter->pos, POS_MAX)) { -+ /* -+ * bkey_start_pos(), for extents, is not monotonically -+ * increasing until after filtering for snapshots: -+ * -+ * Thus, for extents we need to search forward until we find a -+ * real visible extents - easiest to just use peek_slot() (which -+ * internally uses peek() for extents) -+ */ -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); -+ if (bkey_err(k)) -+ return k; -+ -+ if (!bkey_deleted(k.k) && -+ (!(iter->flags & BTREE_ITER_is_extents) || -+ bkey_lt(bkey_start_pos(k.k), iter->pos))) -+ return k; -+ } -+ -+ struct btree_trans *trans = iter->trans; -+ struct bpos search_key = iter->pos; -+ struct bkey_s_c k; -+ btree_path_idx_t saved_path = 0; -+ -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); -+ bch2_btree_iter_verify_entry_exit(iter); -+ EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN)); -+ -+ while (1) { -+ k = __bch2_btree_iter_peek_prev(iter, search_key); -+ if (unlikely(!k.k)) -+ goto end; -+ if (unlikely(bkey_err(k))) -+ goto out_no_locked; -+ -+ if (iter->flags & BTREE_ITER_filter_snapshots) { -+ struct btree_path *s = saved_path ? trans->paths + saved_path : NULL; -+ if (s && bpos_lt(k.k->p, SPOS(s->pos.inode, s->pos.offset, iter->snapshot))) { -+ /* -+ * If we have a saved candidate, and we're past -+ * the last possible snapshot overwrite, return -+ * it: -+ */ -+ bch2_path_put_nokeep(trans, iter->path, -+ iter->flags & BTREE_ITER_intent); -+ iter->path = saved_path; -+ saved_path = 0; -+ k = bch2_btree_path_peek_slot(btree_iter_path(trans, iter), &iter->k); -+ break; -+ } -+ -+ /* -+ * We need to check against @end before FILTER_SNAPSHOTS because -+ * if we get to a different inode that requested we might be -+ * seeing keys for a different snapshot tree that will all be -+ * filtered out. -+ */ -+ if (unlikely(bkey_lt(k.k->p, end))) -+ goto end; -+ -+ if (!bch2_snapshot_is_ancestor(trans->c, iter->snapshot, k.k->p.snapshot)) { -+ search_key = bpos_predecessor(k.k->p); -+ continue; -+ } - -+ if (k.k->p.snapshot != iter->snapshot) { - /* -- * If we have a saved candidate, and we're no -- * longer at the same _key_ (not pos), return -- * that candidate -+ * Have a key visible in iter->snapshot, but -+ * might have overwrites: - save it and keep -+ * searching. Unless it's a whiteout - then drop -+ * our previous saved candidate: - */ -- if (saved_path && !bkey_eq(k.k->p, saved_k.p)) { -- bch2_path_put_nokeep(trans, iter->path, -- iter->flags & BTREE_ITER_intent); -- iter->path = saved_path; -+ if (saved_path) { -+ bch2_path_put_nokeep(trans, saved_path, -+ iter->flags & BTREE_ITER_intent); - saved_path = 0; -- iter->k = saved_k; -- k.v = saved_v; -- goto got_key; - } - -- if (bch2_snapshot_is_ancestor(trans->c, -- iter->snapshot, -- k.k->p.snapshot)) { -- if (saved_path) -- bch2_path_put_nokeep(trans, saved_path, -- iter->flags & BTREE_ITER_intent); -+ if (!bkey_whiteout(k.k)) { - saved_path = btree_path_clone(trans, iter->path, - iter->flags & BTREE_ITER_intent, - _THIS_IP_); -- path = btree_iter_path(trans, iter); -- trace_btree_path_save_pos(trans, path, trans->paths + saved_path); -- saved_k = *k.k; -- saved_v = k.v; -+ trace_btree_path_save_pos(trans, -+ trans->paths + iter->path, -+ trans->paths + saved_path); - } - - search_key = bpos_predecessor(k.k->p); - continue; - } --got_key: -- if (bkey_whiteout(k.k) && -- !(iter->flags & BTREE_ITER_all_snapshots)) { -+ -+ if (bkey_whiteout(k.k)) { - search_key = bkey_predecessor(iter, k.k->p); -- if (iter->flags & BTREE_ITER_filter_snapshots) -- search_key.snapshot = U32_MAX; -+ search_key.snapshot = U32_MAX; - continue; - } -- -- btree_path_set_should_be_locked(trans, path); -- break; -- } else if (likely(!bpos_eq(path->l[0].b->data->min_key, POS_MIN))) { -- /* Advance to previous leaf node: */ -- search_key = bpos_predecessor(path->l[0].b->data->min_key); -- } else { -- /* Start of btree: */ -- bch2_btree_iter_set_pos(iter, POS_MIN); -- k = bkey_s_c_null; -- goto out_no_locked; - } -- } - -- EBUG_ON(bkey_gt(bkey_start_pos(k.k), iter->pos)); -+ EBUG_ON(iter->flags & BTREE_ITER_all_snapshots ? bpos_gt(k.k->p, iter->pos) : -+ iter->flags & BTREE_ITER_is_extents ? bkey_ge(bkey_start_pos(k.k), iter->pos) : -+ bkey_gt(k.k->p, iter->pos)); -+ -+ if (unlikely(iter->flags & BTREE_ITER_all_snapshots ? bpos_lt(k.k->p, end) : -+ iter->flags & BTREE_ITER_is_extents ? bkey_le(k.k->p, end) : -+ bkey_lt(k.k->p, end))) -+ goto end; -+ -+ break; -+ } - - /* Extents can straddle iter->pos: */ -- if (bkey_lt(k.k->p, iter->pos)) -- iter->pos = k.k->p; -+ iter->pos = bpos_min(iter->pos, k.k->p);; - - if (iter->flags & BTREE_ITER_filter_snapshots) - iter->pos.snapshot = iter->snapshot; -@@ -2581,8 +2715,11 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) - - bch2_btree_iter_verify_entry_exit(iter); - bch2_btree_iter_verify(iter); -- - return k; -+end: -+ bch2_btree_iter_set_pos(iter, end); -+ k = bkey_s_c_null; -+ goto out_no_locked; - } - - /** -@@ -2607,7 +2744,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) - struct bkey_s_c k; - int ret; - -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - bch2_btree_iter_verify(iter); - bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache)); -@@ -2632,6 +2769,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) - goto out_no_locked; - } - -+ struct btree_path *path = btree_iter_path(trans, iter); -+ if (unlikely(!btree_path_node(path, path->level))) -+ return bkey_s_c_null; -+ - if ((iter->flags & BTREE_ITER_cached) || - !(iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots))) { - k = bkey_s_c_null; -@@ -2658,6 +2799,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) - k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k); - if (unlikely(!k.k)) - goto out_no_locked; -+ -+ if (unlikely(k.k->type == KEY_TYPE_whiteout && -+ (iter->flags & BTREE_ITER_filter_snapshots) && -+ !(iter->flags & BTREE_ITER_key_cache_fill))) -+ iter->k.type = KEY_TYPE_deleted; - } else { - struct bpos next; - struct bpos end = iter->pos; -@@ -2671,7 +2817,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) - struct btree_iter iter2; - - bch2_trans_copy_iter(&iter2, iter); -- k = bch2_btree_iter_peek_upto(&iter2, end); -+ k = bch2_btree_iter_peek_max(&iter2, end); - - if (k.k && !bkey_err(k)) { - swap(iter->key_cache_path, iter2.key_cache_path); -@@ -2682,7 +2828,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) - } else { - struct bpos pos = iter->pos; - -- k = bch2_btree_iter_peek_upto(iter, end); -+ k = bch2_btree_iter_peek_max(iter, end); - if (unlikely(bkey_err(k))) - bch2_btree_iter_set_pos(iter, pos); - else -@@ -2902,7 +3048,7 @@ void bch2_trans_iter_init_outlined(struct btree_trans *trans, - unsigned flags) - { - bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0, -- bch2_btree_iter_flags(trans, btree_id, flags), -+ bch2_btree_iter_flags(trans, btree_id, 0, flags), - _RET_IP_); - } - -@@ -2918,8 +3064,11 @@ void bch2_trans_node_iter_init(struct btree_trans *trans, - flags |= BTREE_ITER_snapshot_field; - flags |= BTREE_ITER_all_snapshots; - -+ if (!depth && btree_id_cached(trans->c, btree_id)) -+ flags |= BTREE_ITER_with_key_cache; -+ - bch2_trans_iter_init_common(trans, iter, btree_id, pos, locks_want, depth, -- __bch2_btree_iter_flags(trans, btree_id, flags), -+ bch2_btree_iter_flags(trans, btree_id, depth, flags), - _RET_IP_); - - iter->min_depth = depth; -@@ -3122,14 +3271,14 @@ u32 bch2_trans_begin(struct btree_trans *trans) - - trans->last_begin_ip = _RET_IP_; - -- trans_set_locked(trans); -+ trans_set_locked(trans, false); - - if (trans->restarted) { - bch2_btree_path_traverse_all(trans); - trans->notrace_relock_fail = false; - } - -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - return trans->restart_count; - } - -@@ -3228,7 +3377,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) - trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); - trans->srcu_lock_time = jiffies; - trans->srcu_held = true; -- trans_set_locked(trans); -+ trans_set_locked(trans, false); - - closure_init_stack_release(&trans->ref); - return trans; -@@ -3262,6 +3411,9 @@ void bch2_trans_put(struct btree_trans *trans) - { - struct bch_fs *c = trans->c; - -+ if (trans->restarted) -+ bch2_trans_in_restart_error(trans); -+ - bch2_trans_unlock(trans); - - trans_for_each_update(trans, i) -@@ -3285,6 +3437,10 @@ void bch2_trans_put(struct btree_trans *trans) - closure_return_sync(&trans->ref); - trans->locking_wait.task = NULL; - -+#ifdef CONFIG_BCACHEFS_DEBUG -+ darray_exit(&trans->last_restarted_trace); -+#endif -+ - unsigned long *paths_allocated = trans->paths_allocated; - trans->paths_allocated = NULL; - trans->paths = NULL; -@@ -3338,8 +3494,9 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out, - pid = owner ? owner->pid : 0; - rcu_read_unlock(); - -- prt_printf(out, "\t%px %c l=%u %s:", b, b->cached ? 'c' : 'b', -- b->level, bch2_btree_id_str(b->btree_id)); -+ prt_printf(out, "\t%px %c ", b, b->cached ? 'c' : 'b'); -+ bch2_btree_id_to_text(out, b->btree_id); -+ prt_printf(out, " l=%u:", b->level); - bch2_bpos_to_text(out, btree_node_pos(b)); - - prt_printf(out, "\t locks %u:%u:%u held by pid %u", -@@ -3378,11 +3535,11 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) - if (!path->nodes_locked) - continue; - -- prt_printf(out, " path %u %c l=%u %s:", -- idx, -- path->cached ? 'c' : 'b', -- path->level, -- bch2_btree_id_str(path->btree_id)); -+ prt_printf(out, " path %u %c ", -+ idx, -+ path->cached ? 'c' : 'b'); -+ bch2_btree_id_to_text(out, path->btree_id); -+ prt_printf(out, " l=%u:", path->level); - bch2_bpos_to_text(out, path->pos); - prt_newline(out); - -@@ -3488,7 +3645,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c) - #ifdef CONFIG_LOCKDEP - fs_reclaim_acquire(GFP_KERNEL); - struct btree_trans *trans = bch2_trans_get(c); -- trans_set_locked(trans); -+ trans_set_locked(trans, false); - bch2_trans_put(trans); - fs_reclaim_release(GFP_KERNEL); - #endif -diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h -index 0bda054f80d7..b9538e6e6d65 100644 ---- a/fs/bcachefs/btree_iter.h -+++ b/fs/bcachefs/btree_iter.h -@@ -23,6 +23,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path - { - unsigned idx = path - trans->paths; - -+ EBUG_ON(idx >= trans->nr_paths); - EBUG_ON(!test_bit(idx, trans->paths_allocated)); - if (unlikely(path->ref == U8_MAX)) { - bch2_dump_trans_paths_updates(trans); -@@ -36,6 +37,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path - - static inline bool __btree_path_put(struct btree_trans *trans, struct btree_path *path, bool intent) - { -+ EBUG_ON(path - trans->paths >= trans->nr_paths); - EBUG_ON(!test_bit(path - trans->paths, trans->paths_allocated)); - EBUG_ON(!path->ref); - EBUG_ON(!path->intent_ref && intent); -@@ -234,12 +236,12 @@ int __must_check bch2_btree_path_traverse_one(struct btree_trans *, - btree_path_idx_t, - unsigned, unsigned long); - --static inline void bch2_trans_verify_not_unlocked(struct btree_trans *); -+static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_trans *); - - static inline int __must_check bch2_btree_path_traverse(struct btree_trans *trans, - btree_path_idx_t path, unsigned flags) - { -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - if (trans->paths[path].uptodate < BTREE_ITER_NEED_RELOCK) - return 0; -@@ -324,38 +326,33 @@ static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans, - bch2_trans_restart_error(trans, restart_count); - } - --void __noreturn bch2_trans_in_restart_error(struct btree_trans *); -+void __noreturn bch2_trans_unlocked_or_in_restart_error(struct btree_trans *); - --static inline void bch2_trans_verify_not_in_restart(struct btree_trans *trans) -+static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_trans *trans) - { -- if (trans->restarted) -- bch2_trans_in_restart_error(trans); --} -- --void __noreturn bch2_trans_unlocked_error(struct btree_trans *); -- --static inline void bch2_trans_verify_not_unlocked(struct btree_trans *trans) --{ -- if (!trans->locked) -- bch2_trans_unlocked_error(trans); -+ if (trans->restarted || !trans->locked) -+ bch2_trans_unlocked_or_in_restart_error(trans); - } - - __always_inline --static int btree_trans_restart_nounlock(struct btree_trans *trans, int err) -+static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip) - { - BUG_ON(err <= 0); - BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart)); - - trans->restarted = err; -- trans->last_restarted_ip = _THIS_IP_; -+ trans->last_restarted_ip = ip; -+#ifdef CONFIG_BCACHEFS_DEBUG -+ darray_exit(&trans->last_restarted_trace); -+ bch2_save_backtrace(&trans->last_restarted_trace, current, 0, GFP_NOWAIT); -+#endif - return -err; - } - - __always_inline - static int btree_trans_restart(struct btree_trans *trans, int err) - { -- btree_trans_restart_nounlock(trans, err); -- return -err; -+ return btree_trans_restart_ip(trans, err, _THIS_IP_); - } - - bool bch2_btree_node_upgrade(struct btree_trans *, -@@ -375,6 +372,7 @@ static inline void bch2_btree_path_downgrade(struct btree_trans *trans, - void bch2_trans_downgrade(struct btree_trans *); - - void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct btree *); -+void bch2_trans_node_drop(struct btree_trans *trans, struct btree *); - void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *); - - int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter); -@@ -384,15 +382,21 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *); - struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *); - struct btree *bch2_btree_iter_next_node(struct btree_iter *); - --struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos); -+struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *, struct bpos); - struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); - - static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) - { -- return bch2_btree_iter_peek_upto(iter, SPOS_MAX); -+ return bch2_btree_iter_peek_max(iter, SPOS_MAX); -+} -+ -+struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *, struct bpos); -+ -+static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) -+{ -+ return bch2_btree_iter_peek_prev_min(iter, POS_MIN); - } - --struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *); - struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *); - - struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *); -@@ -443,10 +447,17 @@ static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 sna - - void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *); - --static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans, -- unsigned btree_id, -- unsigned flags) -+static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans, -+ unsigned btree_id, -+ unsigned level, -+ unsigned flags) - { -+ if (level || !btree_id_cached(trans->c, btree_id)) { -+ flags &= ~BTREE_ITER_cached; -+ flags &= ~BTREE_ITER_with_key_cache; -+ } else if (!(flags & BTREE_ITER_cached)) -+ flags |= BTREE_ITER_with_key_cache; -+ - if (!(flags & (BTREE_ITER_all_snapshots|BTREE_ITER_not_extents)) && - btree_id_is_extents(btree_id)) - flags |= BTREE_ITER_is_extents; -@@ -465,19 +476,6 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans, - return flags; - } - --static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans, -- unsigned btree_id, -- unsigned flags) --{ -- if (!btree_id_cached(trans->c, btree_id)) { -- flags &= ~BTREE_ITER_cached; -- flags &= ~BTREE_ITER_with_key_cache; -- } else if (!(flags & BTREE_ITER_cached)) -- flags |= BTREE_ITER_with_key_cache; -- -- return __bch2_btree_iter_flags(trans, btree_id, flags); --} -- - static inline void bch2_trans_iter_init_common(struct btree_trans *trans, - struct btree_iter *iter, - unsigned btree_id, struct bpos pos, -@@ -514,7 +512,7 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans, - if (__builtin_constant_p(btree_id) && - __builtin_constant_p(flags)) - bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0, -- bch2_btree_iter_flags(trans, btree_id, flags), -+ bch2_btree_iter_flags(trans, btree_id, 0, flags), - _THIS_IP_); - else - bch2_trans_iter_init_outlined(trans, iter, btree_id, pos, flags); -@@ -593,13 +591,18 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans, - bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \ - _btree_id, _pos, _flags, KEY_TYPE_##_type)) - -+static inline void __bkey_val_copy(void *dst_v, unsigned dst_size, struct bkey_s_c src_k) -+{ -+ unsigned b = min_t(unsigned, dst_size, bkey_val_bytes(src_k.k)); -+ memcpy(dst_v, src_k.v, b); -+ if (unlikely(b < dst_size)) -+ memset(dst_v + b, 0, dst_size - b); -+} -+ - #define bkey_val_copy(_dst_v, _src_k) \ - do { \ -- unsigned b = min_t(unsigned, sizeof(*_dst_v), \ -- bkey_val_bytes(_src_k.k)); \ -- memcpy(_dst_v, _src_k.v, b); \ -- if (b < sizeof(*_dst_v)) \ -- memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \ -+ BUILD_BUG_ON(!__typecheck(*_dst_v, *_src_k.v)); \ -+ __bkey_val_copy(_dst_v, sizeof(*_dst_v), _src_k.s_c); \ - } while (0) - - static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, -@@ -608,17 +611,10 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, - unsigned val_size, void *val) - { - struct btree_iter iter; -- struct bkey_s_c k; -- int ret; -- -- k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); -- ret = bkey_err(k); -+ struct bkey_s_c k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); -+ int ret = bkey_err(k); - if (!ret) { -- unsigned b = min_t(unsigned, bkey_val_bytes(k.k), val_size); -- -- memcpy(val, k.v, b); -- if (unlikely(b < sizeof(*val))) -- memset((void *) val + b, 0, sizeof(*val) - b); -+ __bkey_val_copy(val, val_size, k); - bch2_trans_iter_exit(trans, &iter); - } - -@@ -677,12 +673,12 @@ static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, - bch2_btree_iter_peek(iter); - } - --static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter, -+static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_iter *iter, - struct bpos end, - unsigned flags) - { - if (!(flags & BTREE_ITER_slots)) -- return bch2_btree_iter_peek_upto(iter, end); -+ return bch2_btree_iter_peek_max(iter, end); - - if (bkey_gt(iter->pos, end)) - return bkey_s_c_null; -@@ -746,7 +742,7 @@ transaction_restart: \ - _ret2 ?: trans_was_restarted(_trans, _restart_count); \ - }) - --#define for_each_btree_key_upto_continue(_trans, _iter, \ -+#define for_each_btree_key_max_continue(_trans, _iter, \ - _end, _flags, _k, _do) \ - ({ \ - struct bkey_s_c _k; \ -@@ -754,7 +750,7 @@ transaction_restart: \ - \ - do { \ - _ret3 = lockrestart_do(_trans, ({ \ -- (_k) = bch2_btree_iter_peek_upto_type(&(_iter), \ -+ (_k) = bch2_btree_iter_peek_max_type(&(_iter), \ - _end, (_flags)); \ - if (!(_k).k) \ - break; \ -@@ -768,9 +764,9 @@ transaction_restart: \ - }) - - #define for_each_btree_key_continue(_trans, _iter, _flags, _k, _do) \ -- for_each_btree_key_upto_continue(_trans, _iter, SPOS_MAX, _flags, _k, _do) -+ for_each_btree_key_max_continue(_trans, _iter, SPOS_MAX, _flags, _k, _do) - --#define for_each_btree_key_upto(_trans, _iter, _btree_id, \ -+#define for_each_btree_key_max(_trans, _iter, _btree_id, \ - _start, _end, _flags, _k, _do) \ - ({ \ - bch2_trans_begin(trans); \ -@@ -779,12 +775,12 @@ transaction_restart: \ - bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ - (_start), (_flags)); \ - \ -- for_each_btree_key_upto_continue(_trans, _iter, _end, _flags, _k, _do);\ -+ for_each_btree_key_max_continue(_trans, _iter, _end, _flags, _k, _do);\ - }) - - #define for_each_btree_key(_trans, _iter, _btree_id, \ - _start, _flags, _k, _do) \ -- for_each_btree_key_upto(_trans, _iter, _btree_id, _start, \ -+ for_each_btree_key_max(_trans, _iter, _btree_id, _start, \ - SPOS_MAX, _flags, _k, _do) - - #define for_each_btree_key_reverse(_trans, _iter, _btree_id, \ -@@ -828,33 +824,33 @@ transaction_restart: \ - (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ - (_journal_seq), (_commit_flags))) - --#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \ -+#define for_each_btree_key_max_commit(_trans, _iter, _btree_id, \ - _start, _end, _iter_flags, _k, \ - _disk_res, _journal_seq, _commit_flags,\ - _do) \ -- for_each_btree_key_upto(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\ -+ for_each_btree_key_max(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\ - (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ - (_journal_seq), (_commit_flags))) - - struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); - --#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \ -+#define for_each_btree_key_max_norestart(_trans, _iter, _btree_id, \ - _start, _end, _flags, _k, _ret) \ - for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ - (_start), (_flags)); \ -- (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\ -+ (_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags),\ - !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_advance(&(_iter))) - --#define for_each_btree_key_upto_continue_norestart(_iter, _end, _flags, _k, _ret)\ -+#define for_each_btree_key_max_continue_norestart(_iter, _end, _flags, _k, _ret)\ - for (; \ -- (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags), \ -+ (_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags), \ - !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_advance(&(_iter))) - - #define for_each_btree_key_norestart(_trans, _iter, _btree_id, \ - _start, _flags, _k, _ret) \ -- for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, _start,\ -+ for_each_btree_key_max_norestart(_trans, _iter, _btree_id, _start,\ - SPOS_MAX, _flags, _k, _ret) - - #define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \ -@@ -866,7 +862,7 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); - bch2_btree_iter_rewind(&(_iter))) - - #define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ -- for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) -+ for_each_btree_key_max_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) - - /* - * This should not be used in a fastpath, without first trying _do in -diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c -index c1657182c275..6d25e3f85ce8 100644 ---- a/fs/bcachefs/btree_journal_iter.c -+++ b/fs/bcachefs/btree_journal_iter.c -@@ -16,6 +16,17 @@ - * operations for the regular btree iter code to use: - */ - -+static inline size_t pos_to_idx(struct journal_keys *keys, size_t pos) -+{ -+ size_t gap_size = keys->size - keys->nr; -+ -+ BUG_ON(pos >= keys->gap && pos < keys->gap + gap_size); -+ -+ if (pos >= keys->gap) -+ pos -= gap_size; -+ return pos; -+} -+ - static inline size_t idx_to_pos(struct journal_keys *keys, size_t idx) - { - size_t gap_size = keys->size - keys->nr; -@@ -61,7 +72,7 @@ static size_t bch2_journal_key_search(struct journal_keys *keys, - } - - /* Returns first non-overwritten key >= search key: */ --struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id, -+struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id, - unsigned level, struct bpos pos, - struct bpos end_pos, size_t *idx) - { -@@ -84,27 +95,92 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree - } - } - -+ struct bkey_i *ret = NULL; -+ rcu_read_lock(); /* for overwritten_ranges */ -+ - while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { - if (__journal_key_cmp(btree_id, level, end_pos, k) < 0) -- return NULL; -+ break; - - if (k->overwritten) { -- (*idx)++; -+ if (k->overwritten_range) -+ *idx = rcu_dereference(k->overwritten_range)->end; -+ else -+ *idx += 1; - continue; - } - -- if (__journal_key_cmp(btree_id, level, pos, k) <= 0) -- return k->k; -+ if (__journal_key_cmp(btree_id, level, pos, k) <= 0) { -+ ret = k->k; -+ break; -+ } - - (*idx)++; - iters++; - if (iters == 10) { - *idx = 0; -+ rcu_read_unlock(); - goto search; - } - } - -- return NULL; -+ rcu_read_unlock(); -+ return ret; -+} -+ -+struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id, -+ unsigned level, struct bpos pos, -+ struct bpos end_pos, size_t *idx) -+{ -+ struct journal_keys *keys = &c->journal_keys; -+ unsigned iters = 0; -+ struct journal_key *k; -+ -+ BUG_ON(*idx > keys->nr); -+search: -+ if (!*idx) -+ *idx = __bch2_journal_key_search(keys, btree_id, level, pos); -+ -+ while (*idx && -+ __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { -+ (*idx)++; -+ iters++; -+ if (iters == 10) { -+ *idx = 0; -+ goto search; -+ } -+ } -+ -+ struct bkey_i *ret = NULL; -+ rcu_read_lock(); /* for overwritten_ranges */ -+ -+ while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { -+ if (__journal_key_cmp(btree_id, level, end_pos, k) > 0) -+ break; -+ -+ if (k->overwritten) { -+ if (k->overwritten_range) -+ *idx = rcu_dereference(k->overwritten_range)->start - 1; -+ else -+ *idx -= 1; -+ continue; -+ } -+ -+ if (__journal_key_cmp(btree_id, level, pos, k) >= 0) { -+ ret = k->k; -+ break; -+ } -+ -+ --(*idx); -+ iters++; -+ if (iters == 10) { -+ *idx = 0; -+ goto search; -+ } -+ } -+ -+ rcu_read_unlock(); -+ return ret; - } - - struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id, -@@ -112,11 +188,12 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree - { - size_t idx = 0; - -- return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx); -+ return bch2_journal_keys_peek_max(c, btree_id, level, pos, pos, &idx); - } - - static void journal_iter_verify(struct journal_iter *iter) - { -+#ifdef CONFIG_BCACHEFS_DEBUG - struct journal_keys *keys = iter->keys; - size_t gap_size = keys->size - keys->nr; - -@@ -126,10 +203,10 @@ static void journal_iter_verify(struct journal_iter *iter) - if (iter->idx < keys->size) { - struct journal_key *k = keys->data + iter->idx; - -- int cmp = cmp_int(k->btree_id, iter->btree_id) ?: -- cmp_int(k->level, iter->level); -- BUG_ON(cmp < 0); -+ int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k); -+ BUG_ON(cmp > 0); - } -+#endif - } - - static void journal_iters_fix(struct bch_fs *c) -@@ -182,7 +259,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, - * Ensure these keys are done last by journal replay, to unblock - * journal reclaim: - */ -- .journal_seq = U32_MAX, -+ .journal_seq = U64_MAX, - }; - struct journal_keys *keys = &c->journal_keys; - size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); -@@ -290,6 +367,68 @@ bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree, - bkey_deleted(&keys->data[idx].k->k)); - } - -+static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos) -+{ -+ struct journal_key *k = keys->data + pos; -+ size_t idx = pos_to_idx(keys, pos); -+ -+ k->overwritten = true; -+ -+ struct journal_key *prev = idx > 0 ? keys->data + idx_to_pos(keys, idx - 1) : NULL; -+ struct journal_key *next = idx + 1 < keys->nr ? keys->data + idx_to_pos(keys, idx + 1) : NULL; -+ -+ bool prev_overwritten = prev && prev->overwritten; -+ bool next_overwritten = next && next->overwritten; -+ -+ struct journal_key_range_overwritten *prev_range = -+ prev_overwritten ? prev->overwritten_range : NULL; -+ struct journal_key_range_overwritten *next_range = -+ next_overwritten ? next->overwritten_range : NULL; -+ -+ BUG_ON(prev_range && prev_range->end != idx); -+ BUG_ON(next_range && next_range->start != idx + 1); -+ -+ if (prev_range && next_range) { -+ prev_range->end = next_range->end; -+ -+ keys->data[pos].overwritten_range = prev_range; -+ for (size_t i = next_range->start; i < next_range->end; i++) { -+ struct journal_key *ip = keys->data + idx_to_pos(keys, i); -+ BUG_ON(ip->overwritten_range != next_range); -+ ip->overwritten_range = prev_range; -+ } -+ -+ kfree_rcu_mightsleep(next_range); -+ } else if (prev_range) { -+ prev_range->end++; -+ k->overwritten_range = prev_range; -+ if (next_overwritten) { -+ prev_range->end++; -+ next->overwritten_range = prev_range; -+ } -+ } else if (next_range) { -+ next_range->start--; -+ k->overwritten_range = next_range; -+ if (prev_overwritten) { -+ next_range->start--; -+ prev->overwritten_range = next_range; -+ } -+ } else if (prev_overwritten || next_overwritten) { -+ struct journal_key_range_overwritten *r = kmalloc(sizeof(*r), GFP_KERNEL); -+ if (!r) -+ return; -+ -+ r->start = idx - (size_t) prev_overwritten; -+ r->end = idx + 1 + (size_t) next_overwritten; -+ -+ rcu_assign_pointer(k->overwritten_range, r); -+ if (prev_overwritten) -+ prev->overwritten_range = r; -+ if (next_overwritten) -+ next->overwritten_range = r; -+ } -+} -+ - void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, - unsigned level, struct bpos pos) - { -@@ -299,8 +438,12 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, - if (idx < keys->size && - keys->data[idx].btree_id == btree && - keys->data[idx].level == level && -- bpos_eq(keys->data[idx].k->k.p, pos)) -- keys->data[idx].overwritten = true; -+ bpos_eq(keys->data[idx].k->k.p, pos) && -+ !keys->data[idx].overwritten) { -+ mutex_lock(&keys->overwrite_lock); -+ __bch2_journal_key_overwritten(keys, idx); -+ mutex_unlock(&keys->overwrite_lock); -+ } - } - - static void bch2_journal_iter_advance(struct journal_iter *iter) -@@ -314,24 +457,32 @@ static void bch2_journal_iter_advance(struct journal_iter *iter) - - static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) - { -+ struct bkey_s_c ret = bkey_s_c_null; -+ - journal_iter_verify(iter); - -+ rcu_read_lock(); - while (iter->idx < iter->keys->size) { - struct journal_key *k = iter->keys->data + iter->idx; - -- int cmp = cmp_int(k->btree_id, iter->btree_id) ?: -- cmp_int(k->level, iter->level); -- if (cmp > 0) -+ int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k); -+ if (cmp < 0) - break; - BUG_ON(cmp); - -- if (!k->overwritten) -- return bkey_i_to_s_c(k->k); -+ if (!k->overwritten) { -+ ret = bkey_i_to_s_c(k->k); -+ break; -+ } - -- bch2_journal_iter_advance(iter); -+ if (k->overwritten_range) -+ iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end); -+ else -+ bch2_journal_iter_advance(iter); - } -+ rcu_read_unlock(); - -- return bkey_s_c_null; -+ return ret; - } - - static void bch2_journal_iter_exit(struct journal_iter *iter) -@@ -382,6 +533,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter - : (level > 1 ? 1 : 16); - - iter.prefetch = false; -+ iter.fail_if_too_many_whiteouts = true; - bch2_bkey_buf_init(&tmp); - - while (nr--) { -@@ -400,6 +552,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter - struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter) - { - struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret; -+ size_t iters = 0; - - if (iter->prefetch && iter->journal.level) - btree_and_journal_iter_prefetch(iter); -@@ -407,6 +560,11 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter * - if (iter->at_end) - return bkey_s_c_null; - -+ iters++; -+ -+ if (iters > 20 && iter->fail_if_too_many_whiteouts) -+ return bkey_s_c_null; -+ - while ((btree_k = bch2_journal_iter_peek_btree(iter)).k && - bpos_lt(btree_k.k->p, iter->pos)) - bch2_journal_iter_advance_btree(iter); -@@ -481,16 +639,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, - - /* sort and dedup all keys in the journal: */ - --void bch2_journal_entries_free(struct bch_fs *c) --{ -- struct journal_replay **i; -- struct genradix_iter iter; -- -- genradix_for_each(&c->journal_entries, iter, i) -- kvfree(*i); -- genradix_free(&c->journal_entries); --} -- - /* - * When keys compare equal, oldest compares first: - */ -@@ -515,15 +663,26 @@ void bch2_journal_keys_put(struct bch_fs *c) - - move_gap(keys, keys->nr); - -- darray_for_each(*keys, i) -+ darray_for_each(*keys, i) { -+ if (i->overwritten_range && -+ (i == &darray_last(*keys) || -+ i->overwritten_range != i[1].overwritten_range)) -+ kfree(i->overwritten_range); -+ - if (i->allocated) - kfree(i->k); -+ } - - kvfree(keys->data); - keys->data = NULL; - keys->nr = keys->gap = keys->size = 0; - -- bch2_journal_entries_free(c); -+ struct journal_replay **i; -+ struct genradix_iter iter; -+ -+ genradix_for_each(&c->journal_entries, iter, i) -+ kvfree(*i); -+ genradix_free(&c->journal_entries); - } - - static void __journal_keys_sort(struct journal_keys *keys) -@@ -628,8 +787,20 @@ void bch2_journal_keys_dump(struct bch_fs *c) - - darray_for_each(*keys, i) { - printbuf_reset(&buf); -+ prt_printf(&buf, "btree="); -+ bch2_btree_id_to_text(&buf, i->btree_id); -+ prt_printf(&buf, " l=%u ", i->level); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); -- pr_err("%s l=%u %s", bch2_btree_id_str(i->btree_id), i->level, buf.buf); -+ pr_err("%s", buf.buf); - } - printbuf_exit(&buf); - } -+ -+void bch2_fs_journal_keys_init(struct bch_fs *c) -+{ -+ struct journal_keys *keys = &c->journal_keys; -+ -+ atomic_set(&keys->ref, 1); -+ keys->initial_ref_held = true; -+ mutex_init(&keys->overwrite_lock); -+} -diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h -index 1653de9d609b..2a3082919b8d 100644 ---- a/fs/bcachefs/btree_journal_iter.h -+++ b/fs/bcachefs/btree_journal_iter.h -@@ -26,16 +26,24 @@ struct btree_and_journal_iter { - struct bpos pos; - bool at_end; - bool prefetch; -+ bool fail_if_too_many_whiteouts; - }; - -+static inline int __journal_key_btree_cmp(enum btree_id l_btree_id, -+ unsigned l_level, -+ const struct journal_key *r) -+{ -+ return -cmp_int(l_level, r->level) ?: -+ cmp_int(l_btree_id, r->btree_id); -+} -+ - static inline int __journal_key_cmp(enum btree_id l_btree_id, - unsigned l_level, - struct bpos l_pos, - const struct journal_key *r) - { -- return (cmp_int(l_btree_id, r->btree_id) ?: -- cmp_int(l_level, r->level) ?: -- bpos_cmp(l_pos, r->k->k.p)); -+ return __journal_key_btree_cmp(l_btree_id, l_level, r) ?: -+ bpos_cmp(l_pos, r->k->k.p); - } - - static inline int journal_key_cmp(const struct journal_key *l, const struct journal_key *r) -@@ -43,7 +51,9 @@ static inline int journal_key_cmp(const struct journal_key *l, const struct jour - return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r); - } - --struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id, -+struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id, -+ unsigned, struct bpos, struct bpos, size_t *); -+struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id, - unsigned, struct bpos, struct bpos, size_t *); - struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id, - unsigned, struct bpos); -@@ -79,8 +89,6 @@ static inline void bch2_journal_keys_put_initial(struct bch_fs *c) - c->journal_keys.initial_ref_held = false; - } - --void bch2_journal_entries_free(struct bch_fs *); -- - int bch2_journal_keys_sort(struct bch_fs *); - - void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id, -@@ -89,4 +97,6 @@ void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id, - - void bch2_journal_keys_dump(struct bch_fs *); - -+void bch2_fs_journal_keys_init(struct bch_fs *); -+ - #endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */ -diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h -new file mode 100644 -index 000000000000..8b773823704f ---- /dev/null -+++ b/fs/bcachefs/btree_journal_iter_types.h -@@ -0,0 +1,36 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H -+#define _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H -+ -+struct journal_key_range_overwritten { -+ size_t start, end; -+}; -+ -+struct journal_key { -+ u64 journal_seq; -+ u32 journal_offset; -+ enum btree_id btree_id:8; -+ unsigned level:8; -+ bool allocated; -+ bool overwritten; -+ struct journal_key_range_overwritten __rcu * -+ overwritten_range; -+ struct bkey_i *k; -+}; -+ -+struct journal_keys { -+ /* must match layout in darray_types.h */ -+ size_t nr, size; -+ struct journal_key *data; -+ /* -+ * Gap buffer: instead of all the empty space in the array being at the -+ * end of the buffer - from @nr to @size - the empty space is at @gap. -+ * This means that sequential insertions are O(n) instead of O(n^2). -+ */ -+ size_t gap; -+ atomic_t ref; -+ bool initial_ref_held; -+ struct mutex overwrite_lock; -+}; -+ -+#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H */ -diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c -index 244610b1d0b5..c378b97ebeca 100644 ---- a/fs/bcachefs/btree_key_cache.c -+++ b/fs/bcachefs/btree_key_cache.c -@@ -197,7 +197,9 @@ bkey_cached_reuse(struct btree_key_cache *c) - return ck; - } - --static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *path, -+static int btree_key_cache_create(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree_path *ck_path, - struct bkey_s_c k) - { - struct bch_fs *c = trans->c; -@@ -217,7 +219,7 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path * - key_u64s = min(256U, (key_u64s * 3) / 2); - key_u64s = roundup_pow_of_two(key_u64s); - -- struct bkey_cached *ck = bkey_cached_alloc(trans, path, key_u64s); -+ struct bkey_cached *ck = bkey_cached_alloc(trans, ck_path, key_u64s); - int ret = PTR_ERR_OR_ZERO(ck); - if (ret) - return ret; -@@ -226,19 +228,19 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path * - ck = bkey_cached_reuse(bc); - if (unlikely(!ck)) { - bch_err(c, "error allocating memory for key cache item, btree %s", -- bch2_btree_id_str(path->btree_id)); -+ bch2_btree_id_str(ck_path->btree_id)); - return -BCH_ERR_ENOMEM_btree_key_cache_create; - } - } - - ck->c.level = 0; -- ck->c.btree_id = path->btree_id; -- ck->key.btree_id = path->btree_id; -- ck->key.pos = path->pos; -+ ck->c.btree_id = ck_path->btree_id; -+ ck->key.btree_id = ck_path->btree_id; -+ ck->key.pos = ck_path->pos; - ck->flags = 1U << BKEY_CACHED_ACCESSED; - - if (unlikely(key_u64s > ck->u64s)) { -- mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); -+ mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED); - - struct bkey_i *new_k = allocate_dropping_locks(trans, ret, - kmalloc(key_u64s * sizeof(u64), _gfp)); -@@ -258,22 +260,29 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path * - - bkey_reassemble(ck->k, k); - -+ ret = bch2_btree_node_lock_write(trans, path, &path_l(path)->b->c); -+ if (unlikely(ret)) -+ goto err; -+ - ret = rhashtable_lookup_insert_fast(&bc->table, &ck->hash, bch2_btree_key_cache_params); -+ -+ bch2_btree_node_unlock_write(trans, path, path_l(path)->b); -+ - if (unlikely(ret)) /* raced with another fill? */ - goto err; - - atomic_long_inc(&bc->nr_keys); - six_unlock_write(&ck->c.lock); - -- enum six_lock_type lock_want = __btree_lock_want(path, 0); -+ enum six_lock_type lock_want = __btree_lock_want(ck_path, 0); - if (lock_want == SIX_LOCK_read) - six_lock_downgrade(&ck->c.lock); -- btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want); -- path->uptodate = BTREE_ITER_UPTODATE; -+ btree_path_cached_set(trans, ck_path, ck, (enum btree_node_locked_type) lock_want); -+ ck_path->uptodate = BTREE_ITER_UPTODATE; - return 0; - err: - bkey_cached_free(bc, ck); -- mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); -+ mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED); - - return ret; - } -@@ -283,7 +292,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, - unsigned flags) - { - if (flags & BTREE_ITER_cached_nofill) { -- ck_path->uptodate = BTREE_ITER_UPTODATE; -+ ck_path->l[0].b = NULL; - return 0; - } - -@@ -293,6 +302,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, - int ret; - - bch2_trans_iter_init(trans, &iter, ck_path->btree_id, ck_path->pos, -+ BTREE_ITER_intent| - BTREE_ITER_key_cache_fill| - BTREE_ITER_cached_nofill); - iter.flags &= ~BTREE_ITER_with_journal; -@@ -306,9 +316,19 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, - if (unlikely(ret)) - goto out; - -- ret = btree_key_cache_create(trans, ck_path, k); -+ ret = btree_key_cache_create(trans, btree_iter_path(trans, &iter), ck_path, k); - if (ret) - goto err; -+ -+ if (trace_key_cache_fill_enabled()) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bpos_to_text(&buf, ck_path->pos); -+ prt_char(&buf, ' '); -+ bch2_bkey_val_to_text(&buf, trans->c, k); -+ trace_key_cache_fill(trans, buf.buf); -+ printbuf_exit(&buf); -+ } - out: - /* We're not likely to need this iterator again: */ - bch2_set_btree_iter_dontneed(&iter); -@@ -424,8 +444,15 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, - !test_bit(JOURNAL_space_low, &c->journal.flags)) - commit_flags |= BCH_TRANS_COMMIT_no_journal_res; - -- ret = bch2_btree_iter_traverse(&b_iter) ?: -- bch2_trans_update(trans, &b_iter, ck->k, -+ struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(&b_iter); -+ ret = bkey_err(btree_k); -+ if (ret) -+ goto err; -+ -+ /* * Check that we're not violating cache coherency rules: */ -+ BUG_ON(bkey_deleted(btree_k.k)); -+ -+ ret = bch2_trans_update(trans, &b_iter, ck->k, - BTREE_UPDATE_key_cache_reclaim| - BTREE_UPDATE_internal_snapshot_node| - BTREE_TRIGGER_norun) ?: -@@ -433,7 +460,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, - BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_no_enospc| - commit_flags); -- -+err: - bch2_fs_fatal_err_on(ret && - !bch2_err_matches(ret, BCH_ERR_transaction_restart) && - !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) && -@@ -586,8 +613,18 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans, - bkey_cached_free(bc, ck); - - mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); -- btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -- path->should_be_locked = false; -+ -+ struct btree_path *path2; -+ unsigned i; -+ trans_for_each_path(trans, path2, i) -+ if (path2->l[0].b == (void *) ck) { -+ __bch2_btree_path_unlock(trans, path2); -+ path2->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_drop); -+ path2->should_be_locked = false; -+ btree_path_set_dirty(path2, BTREE_ITER_NEED_TRAVERSE); -+ } -+ -+ bch2_trans_verify_locks(trans); - } - - static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, -diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c -index efe2a007b482..10b805a60f52 100644 ---- a/fs/bcachefs/btree_locking.c -+++ b/fs/bcachefs/btree_locking.c -@@ -109,6 +109,12 @@ static noinline void lock_graph_pop_all(struct lock_graph *g) - lock_graph_up(g); - } - -+static noinline void lock_graph_pop_from(struct lock_graph *g, struct trans_waiting_for_lock *i) -+{ -+ while (g->g + g->nr > i) -+ lock_graph_up(g); -+} -+ - static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans) - { - g->g[g->nr++] = (struct trans_waiting_for_lock) { -@@ -124,15 +130,20 @@ static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans) - __lock_graph_down(g, trans); - } - --static bool lock_graph_remove_non_waiters(struct lock_graph *g) -+static bool lock_graph_remove_non_waiters(struct lock_graph *g, -+ struct trans_waiting_for_lock *from) - { - struct trans_waiting_for_lock *i; - -- for (i = g->g + 1; i < g->g + g->nr; i++) -+ if (from->trans->locking != from->node_want) { -+ lock_graph_pop_from(g, from); -+ return true; -+ } -+ -+ for (i = from + 1; i < g->g + g->nr; i++) - if (i->trans->locking != i->node_want || - i->trans->locking_wait.start_time != i[-1].lock_start_time) { -- while (g->g + g->nr > i) -- lock_graph_up(g); -+ lock_graph_pop_from(g, i); - return true; - } - -@@ -179,13 +190,14 @@ static int btree_trans_abort_preference(struct btree_trans *trans) - return 3; - } - --static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) -+static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle, -+ struct trans_waiting_for_lock *from) - { - struct trans_waiting_for_lock *i, *abort = NULL; - unsigned best = 0, pref; - int ret; - -- if (lock_graph_remove_non_waiters(g)) -+ if (lock_graph_remove_non_waiters(g, from)) - return 0; - - /* Only checking, for debugfs: */ -@@ -195,7 +207,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) - goto out; - } - -- for (i = g->g; i < g->g + g->nr; i++) { -+ for (i = from; i < g->g + g->nr; i++) { - pref = btree_trans_abort_preference(i->trans); - if (pref > best) { - abort = i; -@@ -229,8 +241,9 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) - ret = abort_lock(g, abort); - out: - if (ret) -- while (g->nr) -- lock_graph_up(g); -+ lock_graph_pop_all(g); -+ else -+ lock_graph_pop_from(g, abort); - return ret; - } - -@@ -243,7 +256,7 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, - for (i = g->g; i < g->g + g->nr; i++) - if (i->trans == trans) { - closure_put(&trans->ref); -- return break_cycle(g, cycle); -+ return break_cycle(g, cycle, i); - } - - if (g->nr == ARRAY_SIZE(g->g)) { -@@ -252,8 +265,7 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, - if (orig_trans->lock_may_not_fail) - return 0; - -- while (g->nr) -- lock_graph_up(g); -+ lock_graph_pop_all(g); - - if (cycle) - return 0; -@@ -281,7 +293,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) - - g.nr = 0; - -- if (trans->lock_must_abort) { -+ if (trans->lock_must_abort && !trans->lock_may_not_fail) { - if (cycle) - return -1; - -@@ -336,7 +348,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) - * structures - which means it can't be blocked - * waiting on a lock: - */ -- if (!lock_graph_remove_non_waiters(&g)) { -+ if (!lock_graph_remove_non_waiters(&g, g.g)) { - /* - * If lock_graph_remove_non_waiters() - * didn't do anything, it must be -@@ -512,7 +524,6 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans, - struct btree_path *path, unsigned level) - { - struct btree *b = path->l[level].b; -- struct six_lock_count count = bch2_btree_node_lock_counts(trans, path, &b->c, level); - - if (!is_btree_node(path, level)) - return false; -@@ -536,24 +547,11 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans, - if (race_fault()) - return false; - -- if (btree_node_locked(path, level)) { -- bool ret; -- -- six_lock_readers_add(&b->c.lock, -count.n[SIX_LOCK_read]); -- ret = six_lock_tryupgrade(&b->c.lock); -- six_lock_readers_add(&b->c.lock, count.n[SIX_LOCK_read]); -- -- if (ret) -- goto success; -- } else { -- if (six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq)) -- goto success; -- } -+ if (btree_node_locked(path, level) -+ ? six_lock_tryupgrade(&b->c.lock) -+ : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq)) -+ goto success; - -- /* -- * Do we already have an intent lock via another path? If so, just bump -- * lock count: -- */ - if (btree_node_lock_seq_matches(path, b, level) && - btree_node_lock_increment(trans, &b->c, level, BTREE_NODE_INTENT_LOCKED)) { - btree_node_unlock(trans, path, level); -@@ -782,7 +780,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) - return bch2_trans_relock_fail(trans, path, &f, trace); - } - -- trans_set_locked(trans); -+ trans_set_locked(trans, true); - out: - bch2_trans_verify_locks(trans); - return 0; -@@ -818,6 +816,17 @@ void bch2_trans_unlock_long(struct btree_trans *trans) - bch2_trans_srcu_unlock(trans); - } - -+void bch2_trans_unlock_write(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ unsigned i; -+ -+ trans_for_each_path(trans, path, i) -+ for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++) -+ if (btree_node_write_locked(path, l)) -+ bch2_btree_node_unlock_write(trans, path, path->l[l].b); -+} -+ - int __bch2_trans_mutex_lock(struct btree_trans *trans, - struct mutex *lock) - { -@@ -856,6 +865,9 @@ void bch2_btree_path_verify_locks(struct btree_path *path) - (want == BTREE_NODE_UNLOCKED || - have != BTREE_NODE_WRITE_LOCKED) && - want != have); -+ -+ BUG_ON(btree_node_locked(path, l) && -+ path->l[l].lock_seq != six_lock_seq(&path->l[l].b->c.lock)); - } - } - -diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h -index 7c07f9fa9add..b54ef48eb8cc 100644 ---- a/fs/bcachefs/btree_locking.h -+++ b/fs/bcachefs/btree_locking.h -@@ -16,6 +16,7 @@ - void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags); - - void bch2_trans_unlock_noassert(struct btree_trans *); -+void bch2_trans_unlock_write(struct btree_trans *); - - static inline bool is_btree_node(struct btree_path *path, unsigned l) - { -@@ -75,13 +76,6 @@ static inline void mark_btree_node_locked_noreset(struct btree_path *path, - path->nodes_locked |= (type + 1) << (level << 1); - } - --static inline void mark_btree_node_unlocked(struct btree_path *path, -- unsigned level) --{ -- EBUG_ON(btree_node_write_locked(path, level)); -- mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED); --} -- - static inline void mark_btree_node_locked(struct btree_trans *trans, - struct btree_path *path, - unsigned level, -@@ -124,19 +118,25 @@ static void btree_trans_lock_hold_time_update(struct btree_trans *trans, - - /* unlock: */ - -+void bch2_btree_node_unlock_write(struct btree_trans *, -+ struct btree_path *, struct btree *); -+ - static inline void btree_node_unlock(struct btree_trans *trans, - struct btree_path *path, unsigned level) - { - int lock_type = btree_node_locked_type(path, level); - - EBUG_ON(level >= BTREE_MAX_DEPTH); -- EBUG_ON(lock_type == BTREE_NODE_WRITE_LOCKED); - - if (lock_type != BTREE_NODE_UNLOCKED) { -+ if (unlikely(lock_type == BTREE_NODE_WRITE_LOCKED)) { -+ bch2_btree_node_unlock_write(trans, path, path->l[level].b); -+ lock_type = BTREE_NODE_INTENT_LOCKED; -+ } - six_unlock_type(&path->l[level].b->c.lock, lock_type); - btree_trans_lock_hold_time_update(trans, path, level); -+ mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED); - } -- mark_btree_node_unlocked(path, level); - } - - static inline int btree_path_lowest_level_locked(struct btree_path *path) -@@ -162,36 +162,40 @@ static inline void __bch2_btree_path_unlock(struct btree_trans *trans, - * Updates the saved lock sequence number, so that bch2_btree_node_relock() will - * succeed: - */ -+static inline void -+__bch2_btree_node_unlock_write(struct btree_trans *trans, struct btree *b) -+{ -+ if (!b->c.lock.write_lock_recurse) { -+ struct btree_path *linked; -+ unsigned i; -+ -+ trans_for_each_path_with_node(trans, b, linked, i) -+ linked->l[b->c.level].lock_seq++; -+ } -+ -+ six_unlock_write(&b->c.lock); -+} -+ - static inline void - bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_path *path, - struct btree *b) - { -- struct btree_path *linked; -- unsigned i; -- - EBUG_ON(path->l[b->c.level].b != b); - EBUG_ON(path->l[b->c.level].lock_seq != six_lock_seq(&b->c.lock)); - EBUG_ON(btree_node_locked_type(path, b->c.level) != SIX_LOCK_write); - - mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); -- -- trans_for_each_path_with_node(trans, b, linked, i) -- linked->l[b->c.level].lock_seq++; -- -- six_unlock_write(&b->c.lock); -+ __bch2_btree_node_unlock_write(trans, b); - } - --void bch2_btree_node_unlock_write(struct btree_trans *, -- struct btree_path *, struct btree *); -- - int bch2_six_check_for_deadlock(struct six_lock *lock, void *p); - - /* lock: */ - --static inline void trans_set_locked(struct btree_trans *trans) -+static inline void trans_set_locked(struct btree_trans *trans, bool try) - { - if (!trans->locked) { -- lock_acquire_exclusive(&trans->dep_map, 0, 0, NULL, _THIS_IP_); -+ lock_acquire_exclusive(&trans->dep_map, 0, try, NULL, _THIS_IP_); - trans->locked = true; - trans->last_unlock_ip = 0; - -@@ -282,7 +286,7 @@ static inline int btree_node_lock(struct btree_trans *trans, - int ret = 0; - - EBUG_ON(level >= BTREE_MAX_DEPTH); -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - if (likely(six_trylock_type(&b->lock, type)) || - btree_node_lock_increment(trans, b, level, (enum btree_node_locked_type) type) || -diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c -index 30131c3bdd97..a7f06deee13c 100644 ---- a/fs/bcachefs/btree_node_scan.c -+++ b/fs/bcachefs/btree_node_scan.c -@@ -12,6 +12,7 @@ - #include "recovery_passes.h" - - #include -+#include - #include - - struct find_btree_nodes_worker { -@@ -22,17 +23,15 @@ struct find_btree_nodes_worker { - - static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n) - { -- prt_printf(out, "%s l=%u seq=%u journal_seq=%llu cookie=%llx ", -- bch2_btree_id_str(n->btree_id), n->level, n->seq, -- n->journal_seq, n->cookie); -+ bch2_btree_id_level_to_text(out, n->btree_id, n->level); -+ prt_printf(out, " seq=%u journal_seq=%llu cookie=%llx ", -+ n->seq, n->journal_seq, n->cookie); - bch2_bpos_to_text(out, n->min_key); - prt_str(out, "-"); - bch2_bpos_to_text(out, n->max_key); - - if (n->range_updated) - prt_str(out, " range updated"); -- if (n->overwritten) -- prt_str(out, " overwritten"); - - for (unsigned i = 0; i < n->nr_ptrs; i++) { - prt_char(out, ' '); -@@ -140,6 +139,24 @@ static int found_btree_node_cmp_pos(const void *_l, const void *_r) - -found_btree_node_cmp_time(l, r); - } - -+static inline bool found_btree_node_cmp_pos_less(const void *l, const void *r, void *arg) -+{ -+ return found_btree_node_cmp_pos(l, r) < 0; -+} -+ -+static inline void found_btree_node_swap(void *_l, void *_r, void *arg) -+{ -+ struct found_btree_node *l = _l; -+ struct found_btree_node *r = _r; -+ -+ swap(*l, *r); -+} -+ -+static const struct min_heap_callbacks found_btree_node_heap_cbs = { -+ .less = found_btree_node_cmp_pos_less, -+ .swp = found_btree_node_swap, -+}; -+ - static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, - struct bio *bio, struct btree_node *bn, u64 offset) - { -@@ -159,6 +176,9 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, - return; - - if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) { -+ if (!c->chacha20) -+ return; -+ - struct nonce nonce = btree_nonce(&bn->keys, 0); - unsigned bytes = (void *) &bn->keys - (void *) &bn->flags; - -@@ -292,55 +312,48 @@ static int read_btree_nodes(struct find_btree_nodes *f) - return f->ret ?: ret; - } - --static void bubble_up(struct found_btree_node *n, struct found_btree_node *end) -+static bool nodes_overlap(const struct found_btree_node *l, -+ const struct found_btree_node *r) - { -- while (n + 1 < end && -- found_btree_node_cmp_pos(n, n + 1) > 0) { -- swap(n[0], n[1]); -- n++; -- } -+ return (l->btree_id == r->btree_id && -+ l->level == r->level && -+ bpos_gt(l->max_key, r->min_key)); - } - - static int handle_overwrites(struct bch_fs *c, -- struct found_btree_node *start, -- struct found_btree_node *end) -+ struct found_btree_node *l, -+ found_btree_nodes *nodes_heap) - { -- struct found_btree_node *n; --again: -- for (n = start + 1; -- n < end && -- n->btree_id == start->btree_id && -- n->level == start->level && -- bpos_lt(n->min_key, start->max_key); -- n++) { -- int cmp = found_btree_node_cmp_time(start, n); -+ struct found_btree_node *r; -+ -+ while ((r = min_heap_peek(nodes_heap)) && -+ nodes_overlap(l, r)) { -+ int cmp = found_btree_node_cmp_time(l, r); - - if (cmp > 0) { -- if (bpos_cmp(start->max_key, n->max_key) >= 0) -- n->overwritten = true; -+ if (bpos_cmp(l->max_key, r->max_key) >= 0) -+ min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL); - else { -- n->range_updated = true; -- n->min_key = bpos_successor(start->max_key); -- n->range_updated = true; -- bubble_up(n, end); -- goto again; -+ r->range_updated = true; -+ r->min_key = bpos_successor(l->max_key); -+ r->range_updated = true; -+ min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL); - } - } else if (cmp < 0) { -- BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0); -+ BUG_ON(bpos_eq(l->min_key, r->min_key)); - -- start->max_key = bpos_predecessor(n->min_key); -- start->range_updated = true; -- } else if (n->level) { -- n->overwritten = true; -+ l->max_key = bpos_predecessor(r->min_key); -+ l->range_updated = true; -+ } else if (r->level) { -+ min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL); - } else { -- if (bpos_cmp(start->max_key, n->max_key) >= 0) -- n->overwritten = true; -+ if (bpos_cmp(l->max_key, r->max_key) >= 0) -+ min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL); - else { -- n->range_updated = true; -- n->min_key = bpos_successor(start->max_key); -- n->range_updated = true; -- bubble_up(n, end); -- goto again; -+ r->range_updated = true; -+ r->min_key = bpos_successor(l->max_key); -+ r->range_updated = true; -+ min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL); - } - } - } -@@ -352,6 +365,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) - { - struct find_btree_nodes *f = &c->found_btree_nodes; - struct printbuf buf = PRINTBUF; -+ found_btree_nodes nodes_heap = {}; - size_t dst; - int ret = 0; - -@@ -406,29 +420,57 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) - bch2_print_string_as_lines(KERN_INFO, buf.buf); - } - -- dst = 0; -- darray_for_each(f->nodes, i) { -- if (i->overwritten) -- continue; -+ swap(nodes_heap, f->nodes); -+ -+ { -+ /* darray must have same layout as a heap */ -+ min_heap_char real_heap; -+ BUILD_BUG_ON(sizeof(nodes_heap.nr) != sizeof(real_heap.nr)); -+ BUILD_BUG_ON(sizeof(nodes_heap.size) != sizeof(real_heap.size)); -+ BUILD_BUG_ON(offsetof(found_btree_nodes, nr) != offsetof(min_heap_char, nr)); -+ BUILD_BUG_ON(offsetof(found_btree_nodes, size) != offsetof(min_heap_char, size)); -+ } -+ -+ min_heapify_all(&nodes_heap, &found_btree_node_heap_cbs, NULL); - -- ret = handle_overwrites(c, i, &darray_top(f->nodes)); -+ if (nodes_heap.nr) { -+ ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap)); - if (ret) - goto err; - -- BUG_ON(i->overwritten); -- f->nodes.data[dst++] = *i; -+ min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL); - } -- f->nodes.nr = dst; - -- if (c->opts.verbose) { -+ while (true) { -+ ret = handle_overwrites(c, &darray_last(f->nodes), &nodes_heap); -+ if (ret) -+ goto err; -+ -+ if (!nodes_heap.nr) -+ break; -+ -+ ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap)); -+ if (ret) -+ goto err; -+ -+ min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL); -+ } -+ -+ for (struct found_btree_node *n = f->nodes.data; n < &darray_last(f->nodes); n++) -+ BUG_ON(nodes_overlap(n, n + 1)); -+ -+ if (0 && c->opts.verbose) { - printbuf_reset(&buf); - prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__); - found_btree_nodes_to_text(&buf, c, f->nodes); - bch2_print_string_as_lines(KERN_INFO, buf.buf); -+ } else { -+ bch_info(c, "btree node scan found %zu nodes after overwrites", f->nodes.nr); - } - - eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); - err: -+ darray_exit(&nodes_heap); - printbuf_exit(&buf); - return ret; - } -@@ -499,7 +541,9 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, - if (c->opts.verbose) { - struct printbuf buf = PRINTBUF; - -- prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level); -+ prt_str(&buf, "recovery "); -+ bch2_btree_id_level_to_text(&buf, btree, level); -+ prt_str(&buf, " "); - bch2_bpos_to_text(&buf, node_min); - prt_str(&buf, " - "); - bch2_bpos_to_text(&buf, node_max); -@@ -533,7 +577,12 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, - bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); - printbuf_exit(&buf); - -- BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0)); -+ BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), -+ (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_btree_node, -+ .level = level + 1, -+ .btree = btree, -+ })); - - ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); - if (ret) -diff --git a/fs/bcachefs/btree_node_scan_types.h b/fs/bcachefs/btree_node_scan_types.h -index b6c36c45d0be..2811b6857c97 100644 ---- a/fs/bcachefs/btree_node_scan_types.h -+++ b/fs/bcachefs/btree_node_scan_types.h -@@ -6,7 +6,6 @@ - - struct found_btree_node { - bool range_updated:1; -- bool overwritten:1; - u8 btree_id; - u8 level; - unsigned sectors_written; -diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c -index 9bf471fa4361..2760dd9569ed 100644 ---- a/fs/bcachefs/btree_trans_commit.c -+++ b/fs/bcachefs/btree_trans_commit.c -@@ -133,7 +133,7 @@ static inline int bch2_trans_lock_write(struct btree_trans *trans) - return 0; - } - --static inline void bch2_trans_unlock_write(struct btree_trans *trans) -+static inline void bch2_trans_unlock_updates_write(struct btree_trans *trans) - { - if (likely(trans->write_locked)) { - trans_for_each_update(trans, i) -@@ -249,7 +249,7 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, - new |= 1 << BTREE_NODE_need_write; - } while (!try_cmpxchg(&b->flags, &old, new)); - -- btree_node_write_if_need(c, b, SIX_LOCK_read); -+ btree_node_write_if_need(trans, b, SIX_LOCK_read); - six_unlock_read(&b->c.lock); - - bch2_trans_put(trans); -@@ -348,7 +348,7 @@ static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans, - unsigned flags) - { - return bch2_journal_res_get(&trans->c->journal, &trans->journal_res, -- trans->journal_u64s, flags); -+ trans->journal_u64s, flags, trans); - } - - #define JSET_ENTRY_LOG_U64s 4 -@@ -384,7 +384,7 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags, - struct bkey_i *new_k; - int ret; - -- bch2_trans_unlock_write(trans); -+ bch2_trans_unlock_updates_write(trans); - bch2_trans_unlock(trans); - - new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); -@@ -479,8 +479,7 @@ static int run_one_mem_trigger(struct btree_trans *trans, - old, flags); - } - --static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i, -- bool overwrite) -+static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i) - { - verify_update_old_key(trans, i); - -@@ -507,10 +506,10 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ - return bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k), - BTREE_TRIGGER_insert| - BTREE_TRIGGER_overwrite|flags) ?: 1; -- } else if (overwrite && !i->overwrite_trigger_run) { -+ } else if (!i->overwrite_trigger_run) { - i->overwrite_trigger_run = true; - return bch2_key_trigger_old(trans, i->btree_id, i->level, old, flags) ?: 1; -- } else if (!overwrite && !i->insert_trigger_run) { -+ } else if (!i->insert_trigger_run) { - i->insert_trigger_run = true; - return bch2_key_trigger_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), flags) ?: 1; - } else { -@@ -519,39 +518,45 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ - } - - static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, -- unsigned btree_id_start) -+ unsigned *btree_id_updates_start) - { -- for (int overwrite = 1; overwrite >= 0; --overwrite) { -- bool trans_trigger_run; -+ bool trans_trigger_run; - -- /* -- * Running triggers will append more updates to the list of updates as -- * we're walking it: -- */ -- do { -- trans_trigger_run = false; -- -- for (unsigned i = btree_id_start; -- i < trans->nr_updates && trans->updates[i].btree_id <= btree_id; -- i++) { -- if (trans->updates[i].btree_id != btree_id) -- continue; -+ /* -+ * Running triggers will append more updates to the list of updates as -+ * we're walking it: -+ */ -+ do { -+ trans_trigger_run = false; - -- int ret = run_one_trans_trigger(trans, trans->updates + i, overwrite); -- if (ret < 0) -- return ret; -- if (ret) -- trans_trigger_run = true; -+ for (unsigned i = *btree_id_updates_start; -+ i < trans->nr_updates && trans->updates[i].btree_id <= btree_id; -+ i++) { -+ if (trans->updates[i].btree_id < btree_id) { -+ *btree_id_updates_start = i; -+ continue; - } -- } while (trans_trigger_run); -- } -+ -+ int ret = run_one_trans_trigger(trans, trans->updates + i); -+ if (ret < 0) -+ return ret; -+ if (ret) -+ trans_trigger_run = true; -+ } -+ } while (trans_trigger_run); -+ -+ trans_for_each_update(trans, i) -+ BUG_ON(!(i->flags & BTREE_TRIGGER_norun) && -+ i->btree_id == btree_id && -+ btree_node_type_has_trans_triggers(i->bkey_type) && -+ (!i->insert_trigger_run || !i->overwrite_trigger_run)); - - return 0; - } - - static int bch2_trans_commit_run_triggers(struct btree_trans *trans) - { -- unsigned btree_id = 0, btree_id_start = 0; -+ unsigned btree_id = 0, btree_id_updates_start = 0; - int ret = 0; - - /* -@@ -565,27 +570,15 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) - if (btree_id == BTREE_ID_alloc) - continue; - -- while (btree_id_start < trans->nr_updates && -- trans->updates[btree_id_start].btree_id < btree_id) -- btree_id_start++; -- -- ret = run_btree_triggers(trans, btree_id, btree_id_start); -+ ret = run_btree_triggers(trans, btree_id, &btree_id_updates_start); - if (ret) - return ret; - } - -- for (unsigned idx = 0; idx < trans->nr_updates; idx++) { -- struct btree_insert_entry *i = trans->updates + idx; -- -- if (i->btree_id > BTREE_ID_alloc) -- break; -- if (i->btree_id == BTREE_ID_alloc) { -- ret = run_btree_triggers(trans, BTREE_ID_alloc, idx); -- if (ret) -- return ret; -- break; -- } -- } -+ btree_id_updates_start = 0; -+ ret = run_btree_triggers(trans, BTREE_ID_alloc, &btree_id_updates_start); -+ if (ret) -+ return ret; - - #ifdef CONFIG_BCACHEFS_DEBUG - trans_for_each_update(trans, i) -@@ -609,14 +602,6 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) - return 0; - } - --static struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) --{ -- return (struct bversion) { -- .hi = res->seq >> 32, -- .lo = (res->seq << 32) | (res->offset + offset), -- }; --} -- - static inline int - bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - struct btree_insert_entry **stopped_at, -@@ -627,12 +612,11 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - unsigned u64s = 0; - int ret = 0; - -- bch2_trans_verify_not_unlocked(trans); -- bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - if (race_fault()) { - trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); -- return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject); - } - - /* -@@ -701,25 +685,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - struct jset_entry *entry = trans->journal_entries; - - percpu_down_read(&c->mark_lock); -- - for (entry = trans->journal_entries; - entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); - entry = vstruct_next(entry)) - if (entry->type == BCH_JSET_ENTRY_write_buffer_keys && - entry->start->k.type == KEY_TYPE_accounting) { -- BUG_ON(!trans->journal_res.ref); -- -- struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start); -- -- a->k.bversion = journal_pos_to_bversion(&trans->journal_res, -- (u64 *) entry - (u64 *) trans->journal_entries); -- BUG_ON(bversion_zero(a->k.bversion)); -- -- if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { -- ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal); -- if (ret) -- goto revert_fs_usage; -- } -+ ret = bch2_accounting_trans_commit_hook(trans, bkey_i_to_accounting(entry->start), flags); -+ if (ret) -+ goto revert_fs_usage; - } - percpu_up_read(&c->mark_lock); - -@@ -739,33 +712,17 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - goto fatal_err; - } - -- trans_for_each_update(trans, i) { -- enum bch_validate_flags invalid_flags = 0; -+ struct bkey_validate_context validate_context = { .from = BKEY_VALIDATE_commit }; - -- if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) -- invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; -- -- ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), -- i->bkey_type, invalid_flags); -- if (unlikely(ret)){ -- bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", -- trans->fn, (void *) i->ip_allocated); -- goto fatal_err; -- } -- btree_insert_entry_checks(trans, i); -- } -+ if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) -+ validate_context.flags = BCH_VALIDATE_write|BCH_VALIDATE_commit; - - for (struct jset_entry *i = trans->journal_entries; - i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); - i = vstruct_next(i)) { -- enum bch_validate_flags invalid_flags = 0; -- -- if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) -- invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; -- - ret = bch2_journal_entry_validate(c, NULL, i, - bcachefs_metadata_version_current, -- CPU_BIG_ENDIAN, invalid_flags); -+ CPU_BIG_ENDIAN, validate_context); - if (unlikely(ret)) { - bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", - trans->fn); -@@ -773,6 +730,19 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - } - } - -+ trans_for_each_update(trans, i) { -+ validate_context.level = i->level; -+ validate_context.btree = i->btree_id; -+ -+ ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), validate_context); -+ if (unlikely(ret)){ -+ bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", -+ trans->fn, (void *) i->ip_allocated); -+ goto fatal_err; -+ } -+ btree_insert_entry_checks(trans, i); -+ } -+ - if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) { - struct journal *j = &c->journal; - struct jset_entry *entry; -@@ -833,13 +803,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, - entry2 != entry; - entry2 = vstruct_next(entry2)) - if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys && -- entry2->start->k.type == KEY_TYPE_accounting) { -- struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); -- -- bch2_accounting_neg(a); -- bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); -- bch2_accounting_neg(a); -- } -+ entry2->start->k.type == KEY_TYPE_accounting) -+ bch2_accounting_trans_commit_revert(trans, -+ bkey_i_to_accounting(entry2->start), flags); - percpu_up_read(&c->mark_lock); - return ret; - } -@@ -902,7 +868,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags - if (!ret && unlikely(trans->journal_replay_not_finished)) - bch2_drop_overwrites_from_journal(trans); - -- bch2_trans_unlock_write(trans); -+ bch2_trans_unlock_updates_write(trans); - - if (!ret && trans->journal_pin) - bch2_journal_pin_add(&c->journal, trans->journal_res.seq, -@@ -994,24 +960,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, - return ret; - } - --static noinline int --bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags) --{ -- struct bch_fs *c = trans->c; -- int ret; -- -- if (likely(!(flags & BCH_TRANS_COMMIT_lazy_rw)) || -- test_bit(BCH_FS_started, &c->flags)) -- return -BCH_ERR_erofs_trans_commit; -- -- ret = drop_locks_do(trans, bch2_fs_read_write_early(c)); -- if (ret) -- return ret; -- -- bch2_write_ref_get(c, BCH_WRITE_REF_trans); -- return 0; --} -- - /* - * This is for updates done in the early part of fsck - btree_gc - before we've - * gone RW. we only add the new key to the list of keys for journal replay to -@@ -1022,6 +970,8 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) - { - struct bch_fs *c = trans->c; - -+ BUG_ON(current != c->recovery_task); -+ - trans_for_each_update(trans, i) { - int ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); - if (ret) -@@ -1047,8 +997,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) - struct bch_fs *c = trans->c; - int ret = 0; - -- bch2_trans_verify_not_unlocked(trans); -- bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - if (!trans->nr_updates && - !trans->journal_entries_u64s) -@@ -1058,16 +1007,13 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) - if (ret) - goto out_reset; - -- if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { -- ret = do_bch2_trans_commit_to_journal_replay(trans); -- goto out_reset; -- } -- - if (!(flags & BCH_TRANS_COMMIT_no_check_rw) && - unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) { -- ret = bch2_trans_commit_get_rw_cold(trans, flags); -- if (ret) -- goto out_reset; -+ if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) -+ ret = do_bch2_trans_commit_to_journal_replay(trans); -+ else -+ ret = -BCH_ERR_erofs_trans_commit; -+ goto out_reset; - } - - EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); -@@ -1112,8 +1058,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) - } - retry: - errored_at = NULL; -- bch2_trans_verify_not_unlocked(trans); -- bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) - memset(&trans->journal_res, 0, sizeof(trans->journal_res)); - memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta)); -diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h -index 4568a41fefaf..a6f251eb4164 100644 ---- a/fs/bcachefs/btree_types.h -+++ b/fs/bcachefs/btree_types.h -@@ -513,6 +513,9 @@ struct btree_trans { - u64 last_begin_time; - unsigned long last_begin_ip; - unsigned long last_restarted_ip; -+#ifdef CONFIG_BCACHEFS_DEBUG -+ bch_stacktrace last_restarted_trace; -+#endif - unsigned long last_unlock_ip; - unsigned long srcu_lock_time; - -@@ -787,53 +790,64 @@ static inline bool btree_node_type_has_triggers(enum btree_node_type type) - return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRIGGERS; - } - --static inline bool btree_node_type_is_extents(enum btree_node_type type) -+static inline bool btree_id_is_extents(enum btree_id btree) - { - const u64 mask = 0 --#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1)) -+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_extents)) << nr) - BCH_BTREE_IDS() - #undef x - ; - -- return BIT_ULL(type) & mask; -+ return BIT_ULL(btree) & mask; - } - --static inline bool btree_id_is_extents(enum btree_id btree) -+static inline bool btree_node_type_is_extents(enum btree_node_type type) - { -- return btree_node_type_is_extents(__btree_node_type(0, btree)); -+ return type != BKEY_TYPE_btree && btree_id_is_extents(type - 1); -+} -+ -+static inline bool btree_type_has_snapshots(enum btree_id btree) -+{ -+ const u64 mask = 0 -+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_snapshots)) << nr) -+ BCH_BTREE_IDS() -+#undef x -+ ; -+ -+ return BIT_ULL(btree) & mask; - } - --static inline bool btree_type_has_snapshots(enum btree_id id) -+static inline bool btree_type_has_snapshot_field(enum btree_id btree) - { - const u64 mask = 0 --#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr) -+#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_IS_snapshot_field|BTREE_IS_snapshots))) << nr) - BCH_BTREE_IDS() - #undef x - ; - -- return BIT_ULL(id) & mask; -+ return BIT_ULL(btree) & mask; - } - --static inline bool btree_type_has_snapshot_field(enum btree_id id) -+static inline bool btree_type_has_ptrs(enum btree_id btree) - { - const u64 mask = 0 --#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr) -+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_data)) << nr) - BCH_BTREE_IDS() - #undef x - ; - -- return BIT_ULL(id) & mask; -+ return BIT_ULL(btree) & mask; - } - --static inline bool btree_type_has_ptrs(enum btree_id id) -+static inline bool btree_type_uses_write_buffer(enum btree_id btree) - { - const u64 mask = 0 --#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr) -+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_write_buffer)) << nr) - BCH_BTREE_IDS() - #undef x - ; - -- return BIT_ULL(id) & mask; -+ return BIT_ULL(btree) & mask; - } - - struct btree_root { -diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c -index 5d809e8bd170..13d794f201a5 100644 ---- a/fs/bcachefs/btree_update.c -+++ b/fs/bcachefs/btree_update.c -@@ -144,7 +144,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, - !(ret = bkey_err(old_k)) && - bkey_eq(old_pos, old_k.k->p)) { - struct bpos whiteout_pos = -- SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);; -+ SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot); - - if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) || - snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot)) -@@ -296,7 +296,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans, - BTREE_ITER_intent| - BTREE_ITER_with_updates| - BTREE_ITER_not_extents); -- k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); -+ k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); - if ((ret = bkey_err(k))) - goto err; - if (!k.k) -@@ -323,7 +323,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans, - goto out; - next: - bch2_btree_iter_advance(&iter); -- k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); -+ k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); - if ((ret = bkey_err(k))) - goto err; - if (!k.k) -@@ -588,12 +588,9 @@ struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsi - int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, - enum btree_id btree, struct bpos end) - { -- struct bkey_s_c k; -- int ret = 0; -- -- bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_intent); -- k = bch2_btree_iter_prev(iter); -- ret = bkey_err(k); -+ bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent); -+ struct bkey_s_c k = bch2_btree_iter_peek_prev(iter); -+ int ret = bkey_err(k); - if (ret) - goto err; - -@@ -672,27 +669,19 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, - bch2_btree_insert_trans(trans, id, k, iter_flags)); - } - --int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter, -- unsigned len, unsigned update_flags) -+int bch2_btree_delete_at(struct btree_trans *trans, -+ struct btree_iter *iter, unsigned update_flags) - { -- struct bkey_i *k; -- -- k = bch2_trans_kmalloc(trans, sizeof(*k)); -- if (IS_ERR(k)) -- return PTR_ERR(k); -+ struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); -+ int ret = PTR_ERR_OR_ZERO(k); -+ if (ret) -+ return ret; - - bkey_init(&k->k); - k->k.p = iter->pos; -- bch2_key_resize(&k->k, len); - return bch2_trans_update(trans, iter, k, update_flags); - } - --int bch2_btree_delete_at(struct btree_trans *trans, -- struct btree_iter *iter, unsigned update_flags) --{ -- return bch2_btree_delete_extent_at(trans, iter, 0, update_flags); --} -- - int bch2_btree_delete(struct btree_trans *trans, - enum btree_id btree, struct bpos pos, - unsigned update_flags) -@@ -721,7 +710,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, - int ret = 0; - - bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent); -- while ((k = bch2_btree_iter_peek_upto(&iter, end)).k) { -+ while ((k = bch2_btree_iter_peek_max(&iter, end)).k) { - struct disk_reservation disk_res = - bch2_disk_reservation_init(trans->c, 0); - struct bkey_i delete; -@@ -794,8 +783,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, - return ret; - } - --int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, -- struct bpos pos, bool set) -+int bch2_btree_bit_mod_iter(struct btree_trans *trans, struct btree_iter *iter, bool set) - { - struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); - int ret = PTR_ERR_OR_ZERO(k); -@@ -804,13 +792,21 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, - - bkey_init(&k->k); - k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; -- k->k.p = pos; -+ k->k.p = iter->pos; -+ if (iter->flags & BTREE_ITER_is_extents) -+ bch2_key_resize(&k->k, 1); - -+ return bch2_trans_update(trans, iter, k, 0); -+} -+ -+int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, -+ struct bpos pos, bool set) -+{ - struct btree_iter iter; - bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent); - -- ret = bch2_btree_iter_traverse(&iter) ?: -- bch2_trans_update(trans, &iter, k, 0); -+ int ret = bch2_btree_iter_traverse(&iter) ?: -+ bch2_btree_bit_mod_iter(trans, &iter, set); - bch2_trans_iter_exit(trans, &iter); - return ret; - } -@@ -827,10 +823,17 @@ int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree, - return bch2_trans_update_buffered(trans, btree, &k); - } - --static int __bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf, unsigned u64s) -+int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf) - { -+ unsigned u64s = DIV_ROUND_UP(buf->pos, sizeof(u64)); -+ prt_chars(buf, '\0', u64s * sizeof(u64) - buf->pos); -+ -+ int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; -+ if (ret) -+ return ret; -+ - struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s)); -- int ret = PTR_ERR_OR_ZERO(e); -+ ret = PTR_ERR_OR_ZERO(e); - if (ret) - return ret; - -@@ -865,9 +868,8 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, - memcpy(l->d, buf.buf, buf.pos); - c->journal.early_journal_entries.nr += jset_u64s(u64s); - } else { -- ret = bch2_trans_commit_do(c, NULL, NULL, -- BCH_TRANS_COMMIT_lazy_rw|commit_flags, -- __bch2_trans_log_msg(trans, &buf, u64s)); -+ ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags, -+ bch2_trans_log_msg(trans, &buf)); - } - err: - printbuf_exit(&buf); -diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h -index 70b3c989fac2..8f22ef9a7651 100644 ---- a/fs/bcachefs/btree_update.h -+++ b/fs/bcachefs/btree_update.h -@@ -24,7 +24,6 @@ void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *, - #define BCH_TRANS_COMMIT_FLAGS() \ - x(no_enospc, "don't check for enospc") \ - x(no_check_rw, "don't attempt to take a ref on c->writes") \ -- x(lazy_rw, "go read-write if we haven't yet - only for use in recovery") \ - x(no_journal_res, "don't take a journal reservation, instead " \ - "pin journal entry referred to by trans->journal_res.seq") \ - x(journal_reclaim, "operation required for journal reclaim; may return error" \ -@@ -47,8 +46,6 @@ enum bch_trans_commit_flags { - - void bch2_trans_commit_flags_to_text(struct printbuf *, enum bch_trans_commit_flags); - --int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *, -- unsigned, unsigned); - int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned); - int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned); - -@@ -66,6 +63,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id, - int bch2_btree_delete_range(struct bch_fs *, enum btree_id, - struct bpos, struct bpos, unsigned, u64 *); - -+int bch2_btree_bit_mod_iter(struct btree_trans *, struct btree_iter *, bool); - int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool); - int bch2_btree_bit_mod_buffered(struct btree_trans *, enum btree_id, struct bpos, bool); - -@@ -161,6 +159,7 @@ void bch2_trans_commit_hook(struct btree_trans *, - struct btree_trans_commit_hook *); - int __bch2_trans_commit(struct btree_trans *, unsigned); - -+int bch2_trans_log_msg(struct btree_trans *, struct printbuf *); - __printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...); - __printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...); - -@@ -244,7 +243,8 @@ static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *tra - KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) - - static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter, -- struct bkey_s_c *k, unsigned flags, -+ struct bkey_s_c *k, -+ enum btree_iter_update_trigger_flags flags, - unsigned type, unsigned min_bytes) - { - struct bkey_i *mut = __bch2_bkey_make_mut_noupdate(trans, *k, type, min_bytes); -@@ -261,8 +261,9 @@ static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, str - return mut; - } - --static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter, -- struct bkey_s_c *k, unsigned flags) -+static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, -+ struct btree_iter *iter, struct bkey_s_c *k, -+ enum btree_iter_update_trigger_flags flags) - { - return __bch2_bkey_make_mut(trans, iter, k, flags, 0, 0); - } -@@ -274,7 +275,8 @@ static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, struc - static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *trans, - struct btree_iter *iter, - unsigned btree_id, struct bpos pos, -- unsigned flags, unsigned type, unsigned min_bytes) -+ enum btree_iter_update_trigger_flags flags, -+ unsigned type, unsigned min_bytes) - { - struct bkey_s_c k = __bch2_bkey_get_iter(trans, iter, - btree_id, pos, flags|BTREE_ITER_intent, type); -@@ -289,7 +291,7 @@ static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *tr - static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *trans, - struct btree_iter *iter, - unsigned btree_id, struct bpos pos, -- unsigned flags) -+ enum btree_iter_update_trigger_flags flags) - { - return __bch2_bkey_get_mut_noupdate(trans, iter, btree_id, pos, flags, 0, 0); - } -@@ -297,7 +299,8 @@ static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *tran - static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans, - struct btree_iter *iter, - unsigned btree_id, struct bpos pos, -- unsigned flags, unsigned type, unsigned min_bytes) -+ enum btree_iter_update_trigger_flags flags, -+ unsigned type, unsigned min_bytes) - { - struct bkey_i *mut = __bch2_bkey_get_mut_noupdate(trans, iter, - btree_id, pos, flags|BTREE_ITER_intent, type, min_bytes); -@@ -318,7 +321,8 @@ static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans, - static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans, - struct btree_iter *iter, - unsigned btree_id, struct bpos pos, -- unsigned flags, unsigned min_bytes) -+ enum btree_iter_update_trigger_flags flags, -+ unsigned min_bytes) - { - return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, min_bytes); - } -@@ -326,7 +330,7 @@ static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans - static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans, - struct btree_iter *iter, - unsigned btree_id, struct bpos pos, -- unsigned flags) -+ enum btree_iter_update_trigger_flags flags) - { - return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, 0); - } -@@ -337,7 +341,8 @@ static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans, - KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) - - static inline struct bkey_i *__bch2_bkey_alloc(struct btree_trans *trans, struct btree_iter *iter, -- unsigned flags, unsigned type, unsigned val_size) -+ enum btree_iter_update_trigger_flags flags, -+ unsigned type, unsigned val_size) - { - struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k) + val_size); - int ret; -diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c -index d596ef93239f..f4aeadbe53c1 100644 ---- a/fs/bcachefs/btree_update_interior.c -+++ b/fs/bcachefs/btree_update_interior.c -@@ -58,11 +58,15 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, - b->data->min_key)); - -+ bch2_bkey_buf_init(&prev); -+ bkey_init(&prev.k->k); -+ bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); -+ - if (b == btree_node_root(c, b)) { - if (!bpos_eq(b->data->min_key, POS_MIN)) { - printbuf_reset(&buf); - bch2_bpos_to_text(&buf, b->data->min_key); -- need_fsck_err(trans, btree_root_bad_min_key, -+ log_fsck_err(trans, btree_root_bad_min_key, - "btree root with incorrect min_key: %s", buf.buf); - goto topology_repair; - } -@@ -70,18 +74,14 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - if (!bpos_eq(b->data->max_key, SPOS_MAX)) { - printbuf_reset(&buf); - bch2_bpos_to_text(&buf, b->data->max_key); -- need_fsck_err(trans, btree_root_bad_max_key, -+ log_fsck_err(trans, btree_root_bad_max_key, - "btree root with incorrect max_key: %s", buf.buf); - goto topology_repair; - } - } - - if (!b->c.level) -- return 0; -- -- bch2_bkey_buf_init(&prev); -- bkey_init(&prev.k->k); -- bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); -+ goto out; - - while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { - if (k.k->type != KEY_TYPE_btree_ptr_v2) -@@ -97,16 +97,16 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - bch2_topology_error(c); - - printbuf_reset(&buf); -- prt_str(&buf, "end of prev node doesn't match start of next node\n"), -- prt_printf(&buf, " in btree %s level %u node ", -- bch2_btree_id_str(b->c.btree_id), b->c.level); -+ prt_str(&buf, "end of prev node doesn't match start of next node\n in "); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_str(&buf, " node "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - prt_str(&buf, "\n prev "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); - prt_str(&buf, "\n next "); - bch2_bkey_val_to_text(&buf, c, k); - -- need_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf); -+ log_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf); - goto topology_repair; - } - -@@ -118,25 +118,25 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - bch2_topology_error(c); - - printbuf_reset(&buf); -- prt_str(&buf, "empty interior node\n"); -- prt_printf(&buf, " in btree %s level %u node ", -- bch2_btree_id_str(b->c.btree_id), b->c.level); -+ prt_str(&buf, "empty interior node\n in "); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_str(&buf, " node "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - -- need_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf); -+ log_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf); - goto topology_repair; - } else if (!bpos_eq(prev.k->k.p, b->key.k.p)) { - bch2_topology_error(c); - - printbuf_reset(&buf); -- prt_str(&buf, "last child node doesn't end at end of parent node\n"); -- prt_printf(&buf, " in btree %s level %u node ", -- bch2_btree_id_str(b->c.btree_id), b->c.level); -+ prt_str(&buf, "last child node doesn't end at end of parent node\n in "); -+ bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); -+ prt_str(&buf, " node "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - prt_str(&buf, "\n last key "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); - -- need_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf); -+ log_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf); - goto topology_repair; - } - out: -@@ -146,13 +146,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) - printbuf_exit(&buf); - return ret; - topology_repair: -- if ((c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) && -- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) { -- bch2_inconsistent_error(c); -- ret = -BCH_ERR_btree_need_topology_repair; -- } else { -- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); -- } -+ ret = bch2_topology_error(c); - goto out; - } - -@@ -244,7 +238,6 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, - struct btree *b) - { - struct bch_fs *c = trans->c; -- unsigned i, level = b->c.level; - - bch2_btree_node_lock_write_nofail(trans, path, &b->c); - -@@ -255,13 +248,9 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, - mutex_unlock(&c->btree_cache.lock); - - six_unlock_write(&b->c.lock); -- mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED); -+ mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); - -- trans_for_each_path(trans, path, i) -- if (path->l[level].b == b) { -- btree_node_unlock(trans, path, level); -- path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); -- } -+ bch2_trans_node_drop(trans, b); - } - - static void bch2_btree_node_free_never_used(struct btree_update *as, -@@ -270,8 +259,6 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, - { - struct bch_fs *c = as->c; - struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL]; -- struct btree_path *path; -- unsigned i, level = b->c.level; - - BUG_ON(!list_empty(&b->write_blocked)); - BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as)); -@@ -293,11 +280,7 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, - - six_unlock_intent(&b->c.lock); - -- trans_for_each_path(trans, path, i) -- if (path->l[level].b == b) { -- btree_node_unlock(trans, path, level); -- path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); -- } -+ bch2_trans_node_drop(trans, b); - } - - static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, -@@ -809,7 +792,7 @@ static void btree_update_nodes_written(struct btree_update *as) - mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); - six_unlock_write(&b->c.lock); - -- btree_node_write_if_need(c, b, SIX_LOCK_intent); -+ btree_node_write_if_need(trans, b, SIX_LOCK_intent); - btree_node_unlock(trans, path, b->c.level); - bch2_path_put(trans, path_idx, true); - } -@@ -830,7 +813,7 @@ static void btree_update_nodes_written(struct btree_update *as) - b = as->new_nodes[i]; - - btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); -- btree_node_write_if_need(c, b, SIX_LOCK_read); -+ btree_node_write_if_need(trans, b, SIX_LOCK_read); - six_unlock_read(&b->c.lock); - } - -@@ -1366,9 +1349,14 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, - if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags))) - bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); - -- if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), -- btree_node_type(b), BCH_VALIDATE_write) ?: -- bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), BCH_VALIDATE_write)) { -+ struct bkey_validate_context from = (struct bkey_validate_context) { -+ .from = BKEY_VALIDATE_btree_node, -+ .level = b->c.level, -+ .btree = b->c.btree_id, -+ .flags = BCH_VALIDATE_commit, -+ }; -+ if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), from) ?: -+ bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), from)) { - bch2_fs_inconsistent(c, "%s: inserting invalid bkey", __func__); - dump_stack(); - } -@@ -1418,15 +1406,26 @@ bch2_btree_insert_keys_interior(struct btree_update *as, - (bkey_cmp_left_packed(b, k, &insert->k.p) >= 0)) - ; - -- while (!bch2_keylist_empty(keys)) { -- insert = bch2_keylist_front(keys); -+ for (; -+ insert != keys->top && bpos_le(insert->k.p, b->key.k.p); -+ insert = bkey_next(insert)) -+ bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); - -- if (bpos_gt(insert->k.p, b->key.k.p)) -- break; -+ if (bch2_btree_node_check_topology(trans, b)) { -+ struct printbuf buf = PRINTBUF; - -- bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); -- bch2_keylist_pop_front(keys); -+ for (struct bkey_i *k = keys->keys; -+ k != insert; -+ k = bkey_next(k)) { -+ bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k)); -+ prt_newline(&buf); -+ } -+ -+ panic("%s(): check_topology error: inserted keys\n%s", __func__, buf.buf); - } -+ -+ memmove_u64s_down(keys->keys, insert, keys->top_p - insert->_data); -+ keys->top_p -= insert->_data - keys->keys_p; - } - - static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos) -@@ -1575,8 +1574,6 @@ static void btree_split_insert_keys(struct btree_update *as, - bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); - - bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); -- -- BUG_ON(bch2_btree_node_check_topology(trans, b)); - } - } - -@@ -1599,8 +1596,6 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, - if (ret) - return ret; - -- bch2_btree_interior_update_will_free_node(as, b); -- - if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) { - struct btree *n[2]; - -@@ -1699,16 +1694,18 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, - if (ret) - goto err; - -+ bch2_btree_interior_update_will_free_node(as, b); -+ - if (n3) { - bch2_btree_update_get_open_buckets(as, n3); -- bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0); -+ bch2_btree_node_write_trans(trans, n3, SIX_LOCK_intent, 0); - } - if (n2) { - bch2_btree_update_get_open_buckets(as, n2); -- bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0); -+ bch2_btree_node_write_trans(trans, n2, SIX_LOCK_intent, 0); - } - bch2_btree_update_get_open_buckets(as, n1); -- bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0); -+ bch2_btree_node_write_trans(trans, n1, SIX_LOCK_intent, 0); - - /* - * The old node must be freed (in memory) _before_ unlocking the new -@@ -1827,8 +1824,6 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t - - btree_update_updated_node(as, b); - bch2_btree_node_unlock_write(trans, path, b); -- -- BUG_ON(bch2_btree_node_check_topology(trans, b)); - return 0; - split: - /* -@@ -1905,7 +1900,7 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans * - BUG_ON(ret); - - bch2_btree_update_get_open_buckets(as, n); -- bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); -+ bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0); - bch2_trans_node_add(trans, path, n); - six_unlock_intent(&n->c.lock); - -@@ -1953,8 +1948,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, - u64 start_time = local_clock(); - int ret = 0; - -- bch2_trans_verify_not_in_restart(trans); -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - BUG_ON(!trans->paths[path].should_be_locked); - BUG_ON(!btree_node_locked(&trans->paths[path], level)); - -@@ -2058,9 +2052,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, - - trace_and_count(c, btree_node_merge, trans, b); - -- bch2_btree_interior_update_will_free_node(as, b); -- bch2_btree_interior_update_will_free_node(as, m); -- - n = bch2_btree_node_alloc(as, trans, b->c.level); - - SET_BTREE_NODE_SEQ(n->data, -@@ -2096,10 +2087,13 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, - if (ret) - goto err_free_update; - -+ bch2_btree_interior_update_will_free_node(as, b); -+ bch2_btree_interior_update_will_free_node(as, m); -+ - bch2_trans_verify_paths(trans); - - bch2_btree_update_get_open_buckets(as, n); -- bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); -+ bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0); - - bch2_btree_node_free_inmem(trans, trans->paths + path, b); - bch2_btree_node_free_inmem(trans, trans->paths + sib_path, m); -@@ -2150,8 +2144,6 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, - if (ret) - goto out; - -- bch2_btree_interior_update_will_free_node(as, b); -- - n = bch2_btree_node_alloc_replacement(as, trans, b); - - bch2_btree_build_aux_trees(n); -@@ -2175,8 +2167,10 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, - if (ret) - goto err; - -+ bch2_btree_interior_update_will_free_node(as, b); -+ - bch2_btree_update_get_open_buckets(as, n); -- bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); -+ bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0); - - bch2_btree_node_free_inmem(trans, btree_iter_path(trans, iter), b); - -@@ -2201,42 +2195,50 @@ struct async_btree_rewrite { - struct list_head list; - enum btree_id btree_id; - unsigned level; -- struct bpos pos; -- __le64 seq; -+ struct bkey_buf key; - }; - - static int async_btree_node_rewrite_trans(struct btree_trans *trans, - struct async_btree_rewrite *a) - { -- struct bch_fs *c = trans->c; - struct btree_iter iter; -- struct btree *b; -- int ret; -- -- bch2_trans_node_iter_init(trans, &iter, a->btree_id, a->pos, -+ bch2_trans_node_iter_init(trans, &iter, -+ a->btree_id, a->key.k->k.p, - BTREE_MAX_DEPTH, a->level, 0); -- b = bch2_btree_iter_peek_node(&iter); -- ret = PTR_ERR_OR_ZERO(b); -+ struct btree *b = bch2_btree_iter_peek_node(&iter); -+ int ret = PTR_ERR_OR_ZERO(b); - if (ret) - goto out; - -- if (!b || b->data->keys.seq != a->seq) { -+ bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(a->key.k); -+ ret = found -+ ? bch2_btree_node_rewrite(trans, &iter, b, 0) -+ : -ENOENT; -+ -+#if 0 -+ /* Tracepoint... */ -+ if (!ret || ret == -ENOENT) { -+ struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; - -- if (b) -- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); -- else -- prt_str(&buf, "(null"); -- bch_info(c, "%s: node to rewrite not found:, searching for seq %llu, got\n%s", -- __func__, a->seq, buf.buf); -+ if (!ret) { -+ prt_printf(&buf, "rewrite node:\n "); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k)); -+ } else { -+ prt_printf(&buf, "node to rewrite not found:\n want: "); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k)); -+ prt_printf(&buf, "\n got: "); -+ if (b) -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); -+ else -+ prt_str(&buf, "(null)"); -+ } -+ bch_info(c, "%s", buf.buf); - printbuf_exit(&buf); -- goto out; - } -- -- ret = bch2_btree_node_rewrite(trans, &iter, b, 0); -+#endif - out: - bch2_trans_iter_exit(trans, &iter); -- - return ret; - } - -@@ -2247,81 +2249,97 @@ static void async_btree_node_rewrite_work(struct work_struct *work) - struct bch_fs *c = a->c; - - int ret = bch2_trans_do(c, async_btree_node_rewrite_trans(trans, a)); -- bch_err_fn_ratelimited(c, ret); -+ if (ret != -ENOENT) -+ bch_err_fn_ratelimited(c, ret); -+ -+ spin_lock(&c->btree_node_rewrites_lock); -+ list_del(&a->list); -+ spin_unlock(&c->btree_node_rewrites_lock); -+ -+ closure_wake_up(&c->btree_node_rewrites_wait); -+ -+ bch2_bkey_buf_exit(&a->key, c); - bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); - kfree(a); - } - - void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) - { -- struct async_btree_rewrite *a; -- int ret; -- -- a = kmalloc(sizeof(*a), GFP_NOFS); -- if (!a) { -- bch_err(c, "%s: error allocating memory", __func__); -+ struct async_btree_rewrite *a = kmalloc(sizeof(*a), GFP_NOFS); -+ if (!a) - return; -- } - - a->c = c; - a->btree_id = b->c.btree_id; - a->level = b->c.level; -- a->pos = b->key.k.p; -- a->seq = b->data->keys.seq; - INIT_WORK(&a->work, async_btree_node_rewrite_work); - -- if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { -- mutex_lock(&c->pending_node_rewrites_lock); -- list_add(&a->list, &c->pending_node_rewrites); -- mutex_unlock(&c->pending_node_rewrites_lock); -- return; -- } -+ bch2_bkey_buf_init(&a->key); -+ bch2_bkey_buf_copy(&a->key, c, &b->key); - -- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { -- if (test_bit(BCH_FS_started, &c->flags)) { -- bch_err(c, "%s: error getting c->writes ref", __func__); -- kfree(a); -- return; -- } -+ bool now = false, pending = false; - -- ret = bch2_fs_read_write_early(c); -- bch_err_msg(c, ret, "going read-write"); -- if (ret) { -- kfree(a); -- return; -- } -+ spin_lock(&c->btree_node_rewrites_lock); -+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_journal_replay && -+ bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { -+ list_add(&a->list, &c->btree_node_rewrites); -+ now = true; -+ } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { -+ list_add(&a->list, &c->btree_node_rewrites_pending); -+ pending = true; -+ } -+ spin_unlock(&c->btree_node_rewrites_lock); - -- bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); -+ if (now) { -+ queue_work(c->btree_node_rewrite_worker, &a->work); -+ } else if (pending) { -+ /* bch2_do_pending_node_rewrites will execute */ -+ } else { -+ bch2_bkey_buf_exit(&a->key, c); -+ kfree(a); - } -+} - -- queue_work(c->btree_node_rewrite_worker, &a->work); -+void bch2_async_btree_node_rewrites_flush(struct bch_fs *c) -+{ -+ closure_wait_event(&c->btree_node_rewrites_wait, -+ list_empty(&c->btree_node_rewrites)); - } - - void bch2_do_pending_node_rewrites(struct bch_fs *c) - { -- struct async_btree_rewrite *a, *n; -- -- mutex_lock(&c->pending_node_rewrites_lock); -- list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { -- list_del(&a->list); -+ while (1) { -+ spin_lock(&c->btree_node_rewrites_lock); -+ struct async_btree_rewrite *a = -+ list_pop_entry(&c->btree_node_rewrites_pending, -+ struct async_btree_rewrite, list); -+ if (a) -+ list_add(&a->list, &c->btree_node_rewrites); -+ spin_unlock(&c->btree_node_rewrites_lock); -+ -+ if (!a) -+ break; - - bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); - queue_work(c->btree_node_rewrite_worker, &a->work); - } -- mutex_unlock(&c->pending_node_rewrites_lock); - } - - void bch2_free_pending_node_rewrites(struct bch_fs *c) - { -- struct async_btree_rewrite *a, *n; -+ while (1) { -+ spin_lock(&c->btree_node_rewrites_lock); -+ struct async_btree_rewrite *a = -+ list_pop_entry(&c->btree_node_rewrites_pending, -+ struct async_btree_rewrite, list); -+ spin_unlock(&c->btree_node_rewrites_lock); - -- mutex_lock(&c->pending_node_rewrites_lock); -- list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { -- list_del(&a->list); -+ if (!a) -+ break; - -+ bch2_bkey_buf_exit(&a->key, c); - kfree(a); - } -- mutex_unlock(&c->pending_node_rewrites_lock); - } - - static int __bch2_btree_node_update_key(struct btree_trans *trans, -@@ -2575,8 +2593,9 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update - prt_printf(out, "%ps: ", (void *) as->ip_started); - bch2_trans_commit_flags_to_text(out, as->flags); - -- prt_printf(out, " btree=%s l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", -- bch2_btree_id_str(as->btree_id), -+ prt_str(out, " "); -+ bch2_btree_id_to_text(out, as->btree_id); -+ prt_printf(out, " l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", - as->update_level_start, - as->update_level_end, - bch2_btree_update_modes[as->mode], -@@ -2677,6 +2696,9 @@ void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c) - - void bch2_fs_btree_interior_update_exit(struct bch_fs *c) - { -+ WARN_ON(!list_empty(&c->btree_node_rewrites)); -+ WARN_ON(!list_empty(&c->btree_node_rewrites_pending)); -+ - if (c->btree_node_rewrite_worker) - destroy_workqueue(c->btree_node_rewrite_worker); - if (c->btree_interior_update_worker) -@@ -2692,8 +2714,9 @@ void bch2_fs_btree_interior_update_init_early(struct bch_fs *c) - mutex_init(&c->btree_interior_update_lock); - INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work); - -- INIT_LIST_HEAD(&c->pending_node_rewrites); -- mutex_init(&c->pending_node_rewrites_lock); -+ INIT_LIST_HEAD(&c->btree_node_rewrites); -+ INIT_LIST_HEAD(&c->btree_node_rewrites_pending); -+ spin_lock_init(&c->btree_node_rewrites_lock); - } - - int bch2_fs_btree_interior_update_init(struct bch_fs *c) -diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h -index 10f400957f21..7930ffea3075 100644 ---- a/fs/bcachefs/btree_update_interior.h -+++ b/fs/bcachefs/btree_update_interior.h -@@ -159,7 +159,7 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, - unsigned level, - unsigned flags) - { -- bch2_trans_verify_not_unlocked(trans); -+ bch2_trans_verify_not_unlocked_or_in_restart(trans); - - return bch2_foreground_maybe_merge_sibling(trans, path, level, flags, - btree_prev_sib) ?: -@@ -334,6 +334,7 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *); - struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, - struct jset_entry *, unsigned long); - -+void bch2_async_btree_node_rewrites_flush(struct bch_fs *); - void bch2_do_pending_node_rewrites(struct bch_fs *); - void bch2_free_pending_node_rewrites(struct bch_fs *); - -diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c -index 1639c60dffa0..b56c4987b8c9 100644 ---- a/fs/bcachefs/btree_write_buffer.c -+++ b/fs/bcachefs/btree_write_buffer.c -@@ -19,8 +19,6 @@ - static int bch2_btree_write_buffer_journal_flush(struct journal *, - struct journal_entry_pin *, u64); - --static int bch2_journal_keys_to_write_buffer(struct bch_fs *, struct journal_buf *); -- - static inline bool __wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r) - { - return (cmp_int(l->hi, r->hi) ?: -@@ -314,6 +312,8 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) - darray_for_each(wb->sorted, i) { - struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx]; - -+ BUG_ON(!btree_type_uses_write_buffer(k->btree)); -+ - for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++) - prefetch(&wb->flushing.keys.data[n->idx]); - -@@ -481,21 +481,55 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) - return ret; - } - --static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq) -+static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf) -+{ -+ struct journal_keys_to_wb dst; -+ int ret = 0; -+ -+ bch2_journal_keys_to_write_buffer_start(c, &dst, le64_to_cpu(buf->data->seq)); -+ -+ for_each_jset_entry_type(entry, buf->data, BCH_JSET_ENTRY_write_buffer_keys) { -+ jset_entry_for_each_key(entry, k) { -+ ret = bch2_journal_key_to_wb(c, &dst, entry->btree_id, k); -+ if (ret) -+ goto out; -+ } -+ -+ entry->type = BCH_JSET_ENTRY_btree_keys; -+ } -+out: -+ ret = bch2_journal_keys_to_write_buffer_end(c, &dst) ?: ret; -+ return ret; -+} -+ -+static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 max_seq) - { - struct journal *j = &c->journal; - struct journal_buf *buf; -+ bool blocked; - int ret = 0; - -- while (!ret && (buf = bch2_next_write_buffer_flush_journal_buf(j, seq))) { -+ while (!ret && (buf = bch2_next_write_buffer_flush_journal_buf(j, max_seq, &blocked))) { - ret = bch2_journal_keys_to_write_buffer(c, buf); -+ -+ if (!blocked && !ret) { -+ spin_lock(&j->lock); -+ buf->need_flush_to_write_buffer = false; -+ spin_unlock(&j->lock); -+ } -+ - mutex_unlock(&j->buf_lock); -+ -+ if (blocked) { -+ bch2_journal_unblock(j); -+ break; -+ } - } - - return ret; - } - --static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq, -+static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 max_seq, - bool *did_work) - { - struct bch_fs *c = trans->c; -@@ -505,7 +539,7 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq, - do { - bch2_trans_unlock(trans); - -- fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq); -+ fetch_from_journal_err = fetch_wb_keys_from_journal(c, max_seq); - - *did_work |= wb->inc.keys.nr || wb->flushing.keys.nr; - -@@ -518,8 +552,8 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq, - mutex_unlock(&wb->flushing.lock); - } while (!ret && - (fetch_from_journal_err || -- (wb->inc.pin.seq && wb->inc.pin.seq <= seq) || -- (wb->flushing.pin.seq && wb->flushing.pin.seq <= seq))); -+ (wb->inc.pin.seq && wb->inc.pin.seq <= max_seq) || -+ (wb->flushing.pin.seq && wb->flushing.pin.seq <= max_seq))); - - return ret; - } -@@ -600,6 +634,14 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, - bch2_bkey_buf_init(&tmp); - - if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) { -+ if (trace_write_buffer_maybe_flush_enabled()) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, referring_k); -+ trace_write_buffer_maybe_flush(trans, _RET_IP_, buf.buf); -+ printbuf_exit(&buf); -+ } -+ - bch2_bkey_buf_reassemble(&tmp, c, referring_k); - - if (bkey_is_btree_ptr(referring_k.k)) { -@@ -771,31 +813,6 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_ - return ret; - } - --static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf) --{ -- struct journal_keys_to_wb dst; -- int ret = 0; -- -- bch2_journal_keys_to_write_buffer_start(c, &dst, le64_to_cpu(buf->data->seq)); -- -- for_each_jset_entry_type(entry, buf->data, BCH_JSET_ENTRY_write_buffer_keys) { -- jset_entry_for_each_key(entry, k) { -- ret = bch2_journal_key_to_wb(c, &dst, entry->btree_id, k); -- if (ret) -- goto out; -- } -- -- entry->type = BCH_JSET_ENTRY_btree_keys; -- } -- -- spin_lock(&c->journal.lock); -- buf->need_flush_to_write_buffer = false; -- spin_unlock(&c->journal.lock); --out: -- ret = bch2_journal_keys_to_write_buffer_end(c, &dst) ?: ret; -- return ret; --} -- - static int wb_keys_resize(struct btree_write_buffer_keys *wb, size_t new_size) - { - if (wb->keys.size >= new_size) -diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c -index ec7d9a59bea9..345b117a4a4a 100644 ---- a/fs/bcachefs/buckets.c -+++ b/fs/bcachefs/buckets.c -@@ -18,7 +18,9 @@ - #include "error.h" - #include "inode.h" - #include "movinggc.h" -+#include "rebalance.h" - #include "recovery.h" -+#include "recovery_passes.h" - #include "reflink.h" - #include "replicas.h" - #include "subvolume.h" -@@ -260,8 +262,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, - struct printbuf buf = PRINTBUF; - int ret = 0; - -- percpu_down_read(&c->mark_lock); -- - bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { - ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update); - if (ret) -@@ -362,7 +362,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, - bch_info(c, "new key %s", buf.buf); - } - -- percpu_up_read(&c->mark_lock); - struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level, - BTREE_ITER_intent|BTREE_ITER_all_snapshots); -@@ -371,8 +370,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, - BTREE_UPDATE_internal_snapshot_node| - BTREE_TRIGGER_norun); - bch2_trans_iter_exit(trans, &iter); -- percpu_down_read(&c->mark_lock); -- - if (ret) - goto err; - -@@ -380,7 +377,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, - bch2_btree_node_update_key_early(trans, btree, level - 1, k, new); - } - err: -- percpu_up_read(&c->mark_lock); - printbuf_exit(&buf); - return ret; - } -@@ -401,8 +397,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - BUG_ON(!sectors); - - if (gen_after(ptr->gen, b_gen)) { -- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -- ptr_gen_newer_than_bucket_gen, -+ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); -+ log_fsck_err(trans, ptr_gen_newer_than_bucket_gen, - "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" - "while marking %s", - ptr->dev, bucket_nr, b_gen, -@@ -415,8 +411,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - } - - if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { -- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -- ptr_too_stale, -+ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); -+ log_fsck_err(trans, ptr_too_stale, - "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" - "while marking %s", - ptr->dev, bucket_nr, b_gen, -@@ -435,8 +431,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - } - - if (b_gen != ptr->gen) { -- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -- stale_dirty_ptr, -+ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); -+ log_fsck_err(trans, stale_dirty_ptr, - "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" - "while marking %s", - ptr->dev, bucket_nr, b_gen, -@@ -451,8 +447,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - } - - if (bucket_data_type_mismatch(bucket_data_type, ptr_data_type)) { -- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -- ptr_bucket_data_type_mismatch, -+ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); -+ log_fsck_err(trans, ptr_bucket_data_type_mismatch, - "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" - "while marking %s", - ptr->dev, bucket_nr, b_gen, -@@ -466,8 +462,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - } - - if ((u64) *bucket_sectors + sectors > U32_MAX) { -- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -- bucket_sector_count_overflow, -+ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); -+ log_fsck_err(trans, bucket_sector_count_overflow, - "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n" - "while marking %s", - ptr->dev, bucket_nr, b_gen, -@@ -485,7 +481,9 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, - printbuf_exit(&buf); - return ret; - err: -+fsck_err: - bch2_dump_trans_updates(trans); -+ bch2_inconsistent_error(c); - ret = -BCH_ERR_bucket_ref_update; - goto out; - } -@@ -543,7 +541,8 @@ static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca, - struct bkey_s_c k, - const struct extent_ptr_decoded *p, - s64 sectors, enum bch_data_type ptr_data_type, -- struct bch_alloc_v4 *a) -+ struct bch_alloc_v4 *a, -+ bool insert) - { - u32 *dst_sectors = p->has_ec ? &a->stripe_sectors : - !p->ptr.cached ? &a->dirty_sectors : -@@ -553,8 +552,8 @@ static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca, - - if (ret) - return ret; -- -- alloc_data_type_set(a, ptr_data_type); -+ if (insert) -+ alloc_data_type_set(a, ptr_data_type); - return 0; - } - -@@ -570,8 +569,10 @@ static int bch2_trigger_pointer(struct btree_trans *trans, - struct printbuf buf = PRINTBUF; - int ret = 0; - -- u64 abs_sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p); -- *sectors = insert ? abs_sectors : -abs_sectors; -+ struct bkey_i_backpointer bp; -+ bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp); -+ -+ *sectors = insert ? bp.v.bucket_len : -(s64) bp.v.bucket_len; - - struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); - if (unlikely(!ca)) { -@@ -580,41 +581,36 @@ static int bch2_trigger_pointer(struct btree_trans *trans, - goto err; - } - -- struct bpos bucket; -- struct bch_backpointer bp; -- __bch2_extent_ptr_to_bp(trans->c, ca, btree_id, level, k, p, entry, &bucket, &bp, abs_sectors); -+ struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); - - if (flags & BTREE_TRIGGER_transactional) { - struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); - ret = PTR_ERR_OR_ZERO(a) ?: -- __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &a->v); -+ __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert); - if (ret) - goto err; - - if (!p.ptr.cached) { -- ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, k, insert); -+ ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert); - if (ret) - goto err; - } - } - - if (flags & BTREE_TRIGGER_gc) { -- percpu_down_read(&c->mark_lock); - struct bucket *g = gc_bucket(ca, bucket.offset); - if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", - p.ptr.dev, - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = -BCH_ERR_trigger_pointer; -- goto err_unlock; -+ goto err; - } - - bucket_lock(g); - struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; -- ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &new); -+ ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert); - alloc_to_bucket(g, new); - bucket_unlock(g); --err_unlock: -- percpu_up_read(&c->mark_lock); - - if (!ret) - ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); -@@ -951,6 +947,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, - enum bch_data_type type, - unsigned sectors) - { -+ struct bch_fs *c = trans->c; - struct btree_iter iter; - int ret = 0; - -@@ -960,8 +957,8 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, - return PTR_ERR(a); - - if (a->v.data_type && type && a->v.data_type != type) { -- bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -- bucket_metadata_type_mismatch, -+ bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); -+ log_fsck_err(trans, bucket_metadata_type_mismatch, - "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" - "while marking %s", - iter.pos.inode, iter.pos.offset, a->v.gen, -@@ -979,6 +976,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, - ret = bch2_trans_update(trans, &iter, &a->k_i, 0); - } - err: -+fsck_err: - bch2_trans_iter_exit(trans, &iter); - return ret; - } -@@ -990,11 +988,10 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * - struct bch_fs *c = trans->c; - int ret = 0; - -- percpu_down_read(&c->mark_lock); - struct bucket *g = gc_bucket(ca, b); - if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s", - ca->dev_idx, bch2_data_type_str(data_type))) -- goto err_unlock; -+ goto err; - - bucket_lock(g); - struct bch_alloc_v4 old = bucket_m_to_alloc(*g); -@@ -1004,26 +1001,24 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * - "different types of data in same bucket: %s, %s", - bch2_data_type_str(g->data_type), - bch2_data_type_str(data_type))) -- goto err; -+ goto err_unlock; - - if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c, - "bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size", - ca->dev_idx, b, g->gen, - bch2_data_type_str(g->data_type ?: data_type), - g->dirty_sectors, sectors)) -- goto err; -+ goto err_unlock; - - g->data_type = data_type; - g->dirty_sectors += sectors; - struct bch_alloc_v4 new = bucket_m_to_alloc(*g); - bucket_unlock(g); -- percpu_up_read(&c->mark_lock); - ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); - return ret; --err: -- bucket_unlock(g); - err_unlock: -- percpu_up_read(&c->mark_lock); -+ bucket_unlock(g); -+err: - return -BCH_ERR_metadata_bucket_inconsistency; - } - -@@ -1155,6 +1150,31 @@ int bch2_trans_mark_dev_sbs(struct bch_fs *c) - return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_transactional); - } - -+bool bch2_is_superblock_bucket(struct bch_dev *ca, u64 b) -+{ -+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; -+ u64 b_offset = bucket_to_sector(ca, b); -+ u64 b_end = bucket_to_sector(ca, b + 1); -+ unsigned i; -+ -+ if (!b) -+ return true; -+ -+ for (i = 0; i < layout->nr_superblocks; i++) { -+ u64 offset = le64_to_cpu(layout->sb_offset[i]); -+ u64 end = offset + (1 << layout->sb_max_size_bits); -+ -+ if (!(offset >= b_end || end <= b_offset)) -+ return true; -+ } -+ -+ for (i = 0; i < ca->journal.nr; i++) -+ if (b == ca->journal.buckets[i]) -+ return true; -+ -+ return false; -+} -+ - /* Disk reservations: */ - - #define SECTORS_CACHE 1024 -@@ -1238,7 +1258,7 @@ int bch2_buckets_nouse_alloc(struct bch_fs *c) - for_each_member_device(c, ca) { - BUG_ON(ca->buckets_nouse); - -- ca->buckets_nouse = kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) * -+ ca->buckets_nouse = bch2_kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) * - sizeof(unsigned long), - GFP_KERNEL|__GFP_ZERO); - if (!ca->buckets_nouse) { -@@ -1264,10 +1284,15 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) - bool resize = ca->bucket_gens != NULL; - int ret; - -- BUG_ON(resize && ca->buckets_nouse); -+ if (resize) -+ lockdep_assert_held(&c->state_lock); -+ -+ if (resize && ca->buckets_nouse) -+ return -BCH_ERR_no_resize_with_buckets_nouse; - -- if (!(bucket_gens = kvmalloc(sizeof(struct bucket_gens) + nbuckets, -- GFP_KERNEL|__GFP_ZERO))) { -+ bucket_gens = bch2_kvmalloc(struct_size(bucket_gens, b, nbuckets), -+ GFP_KERNEL|__GFP_ZERO); -+ if (!bucket_gens) { - ret = -BCH_ERR_ENOMEM_bucket_gens; - goto err; - } -@@ -1277,19 +1302,16 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) - bucket_gens->nbuckets_minus_first = - bucket_gens->nbuckets - bucket_gens->first_bucket; - -- if (resize) { -- down_write(&ca->bucket_lock); -- percpu_down_write(&c->mark_lock); -- } -- - old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); - - if (resize) { -- size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets); -- -+ bucket_gens->nbuckets = min(bucket_gens->nbuckets, -+ old_bucket_gens->nbuckets); -+ bucket_gens->nbuckets_minus_first = -+ bucket_gens->nbuckets - bucket_gens->first_bucket; - memcpy(bucket_gens->b, - old_bucket_gens->b, -- n); -+ bucket_gens->nbuckets); - } - - rcu_assign_pointer(ca->bucket_gens, bucket_gens); -@@ -1297,11 +1319,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) - - nbuckets = ca->mi.nbuckets; - -- if (resize) { -- percpu_up_write(&c->mark_lock); -- up_write(&ca->bucket_lock); -- } -- - ret = 0; - err: - if (bucket_gens) -diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h -index ccc78bfe2fd4..a9acdd6c0c86 100644 ---- a/fs/bcachefs/buckets.h -+++ b/fs/bcachefs/buckets.h -@@ -82,16 +82,15 @@ static inline void bucket_lock(struct bucket *b) - - static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b) - { -- return genradix_ptr(&ca->buckets_gc, b); -+ return bucket_valid(ca, b) -+ ? genradix_ptr(&ca->buckets_gc, b) -+ : NULL; - } - - static inline struct bucket_gens *bucket_gens(struct bch_dev *ca) - { - return rcu_dereference_check(ca->bucket_gens, -- !ca->fs || -- percpu_rwsem_is_held(&ca->fs->mark_lock) || -- lockdep_is_held(&ca->fs->state_lock) || -- lockdep_is_held(&ca->bucket_lock)); -+ lockdep_is_held(&ca->fs->state_lock)); - } - - static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) -@@ -308,26 +307,7 @@ int bch2_trans_mark_dev_sbs_flags(struct bch_fs *, - enum btree_iter_update_trigger_flags); - int bch2_trans_mark_dev_sbs(struct bch_fs *); - --static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b) --{ -- struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; -- u64 b_offset = bucket_to_sector(ca, b); -- u64 b_end = bucket_to_sector(ca, b + 1); -- unsigned i; -- -- if (!b) -- return true; -- -- for (i = 0; i < layout->nr_superblocks; i++) { -- u64 offset = le64_to_cpu(layout->sb_offset[i]); -- u64 end = offset + (1 << layout->sb_max_size_bits); -- -- if (!(offset >= b_end || end <= b_offset)) -- return true; -- } -- -- return false; --} -+bool bch2_is_superblock_bucket(struct bch_dev *, u64); - - static inline const char *bch2_data_type_str(enum bch_data_type type) - { -diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h -index 28bd09a253c8..7174047b8e92 100644 ---- a/fs/bcachefs/buckets_types.h -+++ b/fs/bcachefs/buckets_types.h -@@ -24,7 +24,7 @@ struct bucket_gens { - u16 first_bucket; - size_t nbuckets; - size_t nbuckets_minus_first; -- u8 b[]; -+ u8 b[] __counted_by(nbuckets); - }; - - struct bch_dev_usage { -diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c -index 2182b555c112..46e9e32105a9 100644 ---- a/fs/bcachefs/chardev.c -+++ b/fs/bcachefs/chardev.c -@@ -6,11 +6,11 @@ - #include "buckets.h" - #include "chardev.h" - #include "disk_accounting.h" -+#include "fsck.h" - #include "journal.h" - #include "move.h" - #include "recovery_passes.h" - #include "replicas.h" --#include "super.h" - #include "super-io.h" - #include "thread_with_file.h" - -@@ -127,130 +127,6 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg - } - #endif - --struct fsck_thread { -- struct thread_with_stdio thr; -- struct bch_fs *c; -- struct bch_opts opts; --}; -- --static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr) --{ -- struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr); -- kfree(thr); --} -- --static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio) --{ -- struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); -- struct bch_fs *c = thr->c; -- -- int ret = PTR_ERR_OR_ZERO(c); -- if (ret) -- return ret; -- -- ret = bch2_fs_start(thr->c); -- if (ret) -- goto err; -- -- if (test_bit(BCH_FS_errors_fixed, &c->flags)) { -- bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name); -- ret |= 1; -- } -- if (test_bit(BCH_FS_error, &c->flags)) { -- bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name); -- ret |= 4; -- } --err: -- bch2_fs_stop(c); -- return ret; --} -- --static const struct thread_with_stdio_ops bch2_offline_fsck_ops = { -- .exit = bch2_fsck_thread_exit, -- .fn = bch2_fsck_offline_thread_fn, --}; -- --static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) --{ -- struct bch_ioctl_fsck_offline arg; -- struct fsck_thread *thr = NULL; -- darray_str(devs) = {}; -- long ret = 0; -- -- if (copy_from_user(&arg, user_arg, sizeof(arg))) -- return -EFAULT; -- -- if (arg.flags) -- return -EINVAL; -- -- if (!capable(CAP_SYS_ADMIN)) -- return -EPERM; -- -- for (size_t i = 0; i < arg.nr_devs; i++) { -- u64 dev_u64; -- ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64)); -- if (ret) -- goto err; -- -- char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX); -- ret = PTR_ERR_OR_ZERO(dev_str); -- if (ret) -- goto err; -- -- ret = darray_push(&devs, dev_str); -- if (ret) { -- kfree(dev_str); -- goto err; -- } -- } -- -- thr = kzalloc(sizeof(*thr), GFP_KERNEL); -- if (!thr) { -- ret = -ENOMEM; -- goto err; -- } -- -- thr->opts = bch2_opts_empty(); -- -- if (arg.opts) { -- char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); -- ret = PTR_ERR_OR_ZERO(optstr) ?: -- bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr); -- if (!IS_ERR(optstr)) -- kfree(optstr); -- -- if (ret) -- goto err; -- } -- -- opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio); -- opt_set(thr->opts, read_only, 1); -- opt_set(thr->opts, ratelimit_errors, 0); -- -- /* We need request_key() to be called before we punt to kthread: */ -- opt_set(thr->opts, nostart, true); -- -- bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); -- -- thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); -- -- if (!IS_ERR(thr->c) && -- thr->c->opts.errors == BCH_ON_ERROR_panic) -- thr->c->opts.errors = BCH_ON_ERROR_ro; -- -- ret = __bch2_run_thread_with_stdio(&thr->thr); --out: -- darray_for_each(devs, i) -- kfree(*i); -- darray_exit(&devs); -- return ret; --err: -- if (thr) -- bch2_fsck_thread_exit(&thr->thr); -- pr_err("ret %s", bch2_err_str(ret)); -- goto out; --} -- - static long bch2_global_ioctl(unsigned cmd, void __user *arg) - { - long ret; -@@ -775,99 +651,6 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, - return ret; - } - --static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio) --{ -- struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); -- struct bch_fs *c = thr->c; -- -- c->stdio_filter = current; -- c->stdio = &thr->thr.stdio; -- -- /* -- * XXX: can we figure out a way to do this without mucking with c->opts? -- */ -- unsigned old_fix_errors = c->opts.fix_errors; -- if (opt_defined(thr->opts, fix_errors)) -- c->opts.fix_errors = thr->opts.fix_errors; -- else -- c->opts.fix_errors = FSCK_FIX_ask; -- -- c->opts.fsck = true; -- set_bit(BCH_FS_fsck_running, &c->flags); -- -- c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; -- int ret = bch2_run_online_recovery_passes(c); -- -- clear_bit(BCH_FS_fsck_running, &c->flags); -- bch_err_fn(c, ret); -- -- c->stdio = NULL; -- c->stdio_filter = NULL; -- c->opts.fix_errors = old_fix_errors; -- -- up(&c->online_fsck_mutex); -- bch2_ro_ref_put(c); -- return ret; --} -- --static const struct thread_with_stdio_ops bch2_online_fsck_ops = { -- .exit = bch2_fsck_thread_exit, -- .fn = bch2_fsck_online_thread_fn, --}; -- --static long bch2_ioctl_fsck_online(struct bch_fs *c, -- struct bch_ioctl_fsck_online arg) --{ -- struct fsck_thread *thr = NULL; -- long ret = 0; -- -- if (arg.flags) -- return -EINVAL; -- -- if (!capable(CAP_SYS_ADMIN)) -- return -EPERM; -- -- if (!bch2_ro_ref_tryget(c)) -- return -EROFS; -- -- if (down_trylock(&c->online_fsck_mutex)) { -- bch2_ro_ref_put(c); -- return -EAGAIN; -- } -- -- thr = kzalloc(sizeof(*thr), GFP_KERNEL); -- if (!thr) { -- ret = -ENOMEM; -- goto err; -- } -- -- thr->c = c; -- thr->opts = bch2_opts_empty(); -- -- if (arg.opts) { -- char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); -- -- ret = PTR_ERR_OR_ZERO(optstr) ?: -- bch2_parse_mount_opts(c, &thr->opts, NULL, optstr); -- if (!IS_ERR(optstr)) -- kfree(optstr); -- -- if (ret) -- goto err; -- } -- -- ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_online_fsck_ops); --err: -- if (ret < 0) { -- bch_err_fn(c, ret); -- if (thr) -- bch2_fsck_thread_exit(&thr->thr); -- up(&c->online_fsck_mutex); -- bch2_ro_ref_put(c); -- } -- return ret; --} -- - #define BCH_IOCTL(_name, _argtype) \ - do { \ - _argtype i; \ -diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c -index ce8fc677bef9..23a383577d4c 100644 ---- a/fs/bcachefs/checksum.c -+++ b/fs/bcachefs/checksum.c -@@ -2,6 +2,7 @@ - #include "bcachefs.h" - #include "checksum.h" - #include "errcode.h" -+#include "error.h" - #include "super.h" - #include "super-io.h" - -@@ -252,6 +253,10 @@ int bch2_encrypt(struct bch_fs *c, unsigned type, - if (!bch2_csum_type_is_encryption(type)) - return 0; - -+ if (bch2_fs_inconsistent_on(!c->chacha20, -+ c, "attempting to encrypt without encryption key")) -+ return -BCH_ERR_no_encryption_key; -+ - return do_encrypt(c->chacha20, nonce, data, len); - } - -@@ -337,8 +342,9 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type, - size_t sgl_len = 0; - int ret = 0; - -- if (!bch2_csum_type_is_encryption(type)) -- return 0; -+ if (bch2_fs_inconsistent_on(!c->chacha20, -+ c, "attempting to encrypt without encryption key")) -+ return -BCH_ERR_no_encryption_key; - - darray_init(&sgl); - -diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h -index e40499fde9a4..43b9d71f2f2b 100644 ---- a/fs/bcachefs/checksum.h -+++ b/fs/bcachefs/checksum.h -@@ -109,7 +109,7 @@ int bch2_enable_encryption(struct bch_fs *, bool); - void bch2_fs_encryption_exit(struct bch_fs *); - int bch2_fs_encryption_init(struct bch_fs *); - --static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opts type, -+static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opt type, - bool data) - { - switch (type) { -diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c -index 1410365a8891..114bf2f3879f 100644 ---- a/fs/bcachefs/compress.c -+++ b/fs/bcachefs/compress.c -@@ -2,13 +2,34 @@ - #include "bcachefs.h" - #include "checksum.h" - #include "compress.h" -+#include "error.h" - #include "extents.h" -+#include "io_write.h" -+#include "opts.h" - #include "super-io.h" - - #include - #include - #include - -+static inline enum bch_compression_opts bch2_compression_type_to_opt(enum bch_compression_type type) -+{ -+ switch (type) { -+ case BCH_COMPRESSION_TYPE_none: -+ case BCH_COMPRESSION_TYPE_incompressible: -+ return BCH_COMPRESSION_OPT_none; -+ case BCH_COMPRESSION_TYPE_lz4_old: -+ case BCH_COMPRESSION_TYPE_lz4: -+ return BCH_COMPRESSION_OPT_lz4; -+ case BCH_COMPRESSION_TYPE_gzip: -+ return BCH_COMPRESSION_OPT_gzip; -+ case BCH_COMPRESSION_TYPE_zstd: -+ return BCH_COMPRESSION_OPT_zstd; -+ default: -+ BUG(); -+ } -+} -+ - /* Bounce buffer: */ - struct bbuf { - void *b; -@@ -158,6 +179,19 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, - void *workspace; - int ret; - -+ enum bch_compression_opts opt = bch2_compression_type_to_opt(crc.compression_type); -+ mempool_t *workspace_pool = &c->compress_workspace[opt]; -+ if (unlikely(!mempool_initialized(workspace_pool))) { -+ if (fsck_err(c, compression_type_not_marked_in_sb, -+ "compression type %s set but not marked in superblock", -+ __bch2_compression_types[crc.compression_type])) -+ ret = bch2_check_set_has_compressed_data(c, opt); -+ else -+ ret = -BCH_ERR_compression_workspace_not_initialized; -+ if (ret) -+ goto out; -+ } -+ - src_data = bio_map_or_bounce(c, src, READ); - - switch (crc.compression_type) { -@@ -176,13 +210,13 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, - .avail_out = dst_len, - }; - -- workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS); -+ workspace = mempool_alloc(workspace_pool, GFP_NOFS); - - zlib_set_workspace(&strm, workspace); - zlib_inflateInit2(&strm, -MAX_WBITS); - ret = zlib_inflate(&strm, Z_FINISH); - -- mempool_free(workspace, &c->decompress_workspace); -+ mempool_free(workspace, workspace_pool); - - if (ret != Z_STREAM_END) - goto err; -@@ -195,14 +229,14 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, - if (real_src_len > src_len - 4) - goto err; - -- workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS); -+ workspace = mempool_alloc(workspace_pool, GFP_NOFS); - ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound()); - - ret = zstd_decompress_dctx(ctx, - dst_data, dst_len, - src_data.b + 4, real_src_len); - -- mempool_free(workspace, &c->decompress_workspace); -+ mempool_free(workspace, workspace_pool); - - if (ret != dst_len) - goto err; -@@ -212,6 +246,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, - BUG(); - } - ret = 0; -+fsck_err: - out: - bio_unmap_or_unbounce(c, src_data); - return ret; -@@ -220,11 +255,14 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, - goto out; - } - --int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio, -- struct bch_extent_crc_unpacked *crc) -+int bch2_bio_uncompress_inplace(struct bch_write_op *op, -+ struct bio *bio) - { -+ struct bch_fs *c = op->c; -+ struct bch_extent_crc_unpacked *crc = &op->crc; - struct bbuf data = { NULL }; - size_t dst_len = crc->uncompressed_size << 9; -+ int ret = 0; - - /* bio must own its pages: */ - BUG_ON(!bio->bi_vcnt); -@@ -232,17 +270,26 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio, - - if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max || - crc->compressed_size << 9 > c->opts.encoded_extent_max) { -- bch_err(c, "error rewriting existing data: extent too big"); -+ struct printbuf buf = PRINTBUF; -+ bch2_write_op_error(&buf, op); -+ prt_printf(&buf, "error rewriting existing data: extent too big"); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); - return -EIO; - } - - data = __bounce_alloc(c, dst_len, WRITE); - - if (__bio_uncompress(c, bio, data.b, *crc)) { -- if (!c->opts.no_data_io) -- bch_err(c, "error rewriting existing data: decompression error"); -- bio_unmap_or_unbounce(c, data); -- return -EIO; -+ if (!c->opts.no_data_io) { -+ struct printbuf buf = PRINTBUF; -+ bch2_write_op_error(&buf, op); -+ prt_printf(&buf, "error rewriting existing data: decompression error"); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ } -+ ret = -EIO; -+ goto err; - } - - /* -@@ -259,9 +306,9 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio, - crc->uncompressed_size = crc->live_size; - crc->offset = 0; - crc->csum = (struct bch_csum) { 0, 0 }; -- -+err: - bio_unmap_or_unbounce(c, data); -- return 0; -+ return ret; - } - - int bch2_bio_uncompress(struct bch_fs *c, struct bio *src, -@@ -394,8 +441,21 @@ static unsigned __bio_compress(struct bch_fs *c, - unsigned pad; - int ret = 0; - -- BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR); -- BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type])); -+ /* bch2_compression_decode catches unknown compression types: */ -+ BUG_ON(compression.type >= BCH_COMPRESSION_OPT_NR); -+ -+ mempool_t *workspace_pool = &c->compress_workspace[compression.type]; -+ if (unlikely(!mempool_initialized(workspace_pool))) { -+ if (fsck_err(c, compression_opt_not_marked_in_sb, -+ "compression opt %s set but not marked in superblock", -+ bch2_compression_opts[compression.type])) { -+ ret = bch2_check_set_has_compressed_data(c, compression.type); -+ if (ret) /* memory allocation failure, don't compress */ -+ return 0; -+ } else { -+ return 0; -+ } -+ } - - /* If it's only one block, don't bother trying to compress: */ - if (src->bi_iter.bi_size <= c->opts.block_size) -@@ -404,7 +464,7 @@ static unsigned __bio_compress(struct bch_fs *c, - dst_data = bio_map_or_bounce(c, dst, WRITE); - src_data = bio_map_or_bounce(c, src, READ); - -- workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOFS); -+ workspace = mempool_alloc(workspace_pool, GFP_NOFS); - - *src_len = src->bi_iter.bi_size; - *dst_len = dst->bi_iter.bi_size; -@@ -447,7 +507,7 @@ static unsigned __bio_compress(struct bch_fs *c, - *src_len = round_down(*src_len, block_bytes(c)); - } - -- mempool_free(workspace, &c->compress_workspace[compression_type]); -+ mempool_free(workspace, workspace_pool); - - if (ret) - goto err; -@@ -477,6 +537,9 @@ static unsigned __bio_compress(struct bch_fs *c, - err: - ret = BCH_COMPRESSION_TYPE_incompressible; - goto out; -+fsck_err: -+ ret = 0; -+ goto out; - } - - unsigned bch2_bio_compress(struct bch_fs *c, -@@ -559,7 +622,6 @@ void bch2_fs_compress_exit(struct bch_fs *c) - { - unsigned i; - -- mempool_exit(&c->decompress_workspace); - for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++) - mempool_exit(&c->compress_workspace[i]); - mempool_exit(&c->compression_bounce[WRITE]); -@@ -568,7 +630,6 @@ void bch2_fs_compress_exit(struct bch_fs *c) - - static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) - { -- size_t decompress_workspace_size = 0; - ZSTD_parameters params = zstd_get_params(zstd_max_clevel(), - c->opts.encoded_extent_max); - -@@ -576,19 +637,17 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) - - struct { - unsigned feature; -- enum bch_compression_type type; -+ enum bch_compression_opts type; - size_t compress_workspace; -- size_t decompress_workspace; - } compression_types[] = { -- { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, -- max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS), -- 0 }, -- { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip, -- zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), -- zlib_inflate_workspacesize(), }, -- { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd, -- c->zstd_workspace_size, -- zstd_dctx_workspace_bound() }, -+ { BCH_FEATURE_lz4, BCH_COMPRESSION_OPT_lz4, -+ max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS) }, -+ { BCH_FEATURE_gzip, BCH_COMPRESSION_OPT_gzip, -+ max(zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), -+ zlib_inflate_workspacesize()) }, -+ { BCH_FEATURE_zstd, BCH_COMPRESSION_OPT_zstd, -+ max(c->zstd_workspace_size, -+ zstd_dctx_workspace_bound()) }, - }, *i; - bool have_compressed = false; - -@@ -613,9 +672,6 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) - for (i = compression_types; - i < compression_types + ARRAY_SIZE(compression_types); - i++) { -- decompress_workspace_size = -- max(decompress_workspace_size, i->decompress_workspace); -- - if (!(features & (1 << i->feature))) - continue; - -@@ -628,11 +684,6 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) - return -BCH_ERR_ENOMEM_compression_workspace_init; - } - -- if (!mempool_initialized(&c->decompress_workspace) && -- mempool_init_kvmalloc_pool(&c->decompress_workspace, -- 1, decompress_workspace_size)) -- return -BCH_ERR_ENOMEM_decompression_workspace_init; -- - return 0; - } - -diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h -index 607fd5e232c9..bec2f05bfd52 100644 ---- a/fs/bcachefs/compress.h -+++ b/fs/bcachefs/compress.h -@@ -47,8 +47,8 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) - return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; - } - --int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *, -- struct bch_extent_crc_unpacked *); -+struct bch_write_op; -+int bch2_bio_uncompress_inplace(struct bch_write_op *, struct bio *); - int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *, - struct bvec_iter, struct bch_extent_crc_unpacked); - unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *, -diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h -index 8f4c3f0665c4..c6151495985f 100644 ---- a/fs/bcachefs/darray.h -+++ b/fs/bcachefs/darray.h -@@ -83,7 +83,7 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t); - for (typeof(&(_d).data[0]) _i = (_d).data; _i < (_d).data + (_d).nr; _i++) - - #define darray_for_each_reverse(_d, _i) \ -- for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i) -+ for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i) - - #define darray_init(_d) \ - do { \ -diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c -index 8e75a852b358..fbe22e56da91 100644 ---- a/fs/bcachefs/data_update.c -+++ b/fs/bcachefs/data_update.c -@@ -33,7 +33,7 @@ static bool bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k) - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - - bkey_for_each_ptr(ptrs, ptr) { -- if (!bch2_dev_tryget(c, ptr->dev)) { -+ if (unlikely(!bch2_dev_tryget(c, ptr->dev))) { - bkey_for_each_ptr(ptrs, ptr2) { - if (ptr2 == ptr) - break; -@@ -91,15 +91,28 @@ static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struc - return true; - } - --static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k) -+static noinline void trace_move_extent_finish2(struct data_update *u, -+ struct bkey_i *new, -+ struct bkey_i *insert) - { -- if (trace_move_extent_finish_enabled()) { -- struct printbuf buf = PRINTBUF; -+ struct bch_fs *c = u->op.c; -+ struct printbuf buf = PRINTBUF; - -- bch2_bkey_val_to_text(&buf, c, k); -- trace_move_extent_finish(c, buf.buf); -- printbuf_exit(&buf); -- } -+ prt_newline(&buf); -+ -+ bch2_data_update_to_text(&buf, u); -+ prt_newline(&buf); -+ -+ prt_str_indented(&buf, "new replicas:\t"); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new)); -+ prt_newline(&buf); -+ -+ prt_str_indented(&buf, "insert:\t"); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); -+ prt_newline(&buf); -+ -+ trace_move_extent_finish(c, buf.buf); -+ printbuf_exit(&buf); - } - - static void trace_move_extent_fail2(struct data_update *m, -@@ -110,11 +123,8 @@ static void trace_move_extent_fail2(struct data_update *m, - { - struct bch_fs *c = m->op.c; - struct bkey_s_c old = bkey_i_to_s_c(m->k.k); -- const union bch_extent_entry *entry; -- struct bch_extent_ptr *ptr; -- struct extent_ptr_decoded p; - struct printbuf buf = PRINTBUF; -- unsigned i, rewrites_found = 0; -+ unsigned rewrites_found = 0; - - if (!trace_move_extent_fail_enabled()) - return; -@@ -122,27 +132,25 @@ static void trace_move_extent_fail2(struct data_update *m, - prt_str(&buf, msg); - - if (insert) { -- i = 0; -+ const union bch_extent_entry *entry; -+ struct bch_extent_ptr *ptr; -+ struct extent_ptr_decoded p; -+ -+ unsigned ptr_bit = 1; - bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) { -- if (((1U << i) & m->data_opts.rewrite_ptrs) && -+ if ((ptr_bit & m->data_opts.rewrite_ptrs) && - (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) && - !ptr->cached) -- rewrites_found |= 1U << i; -- i++; -+ rewrites_found |= ptr_bit; -+ ptr_bit <<= 1; - } - } - -- prt_printf(&buf, "\nrewrite ptrs: %u%u%u%u", -- (m->data_opts.rewrite_ptrs & (1 << 0)) != 0, -- (m->data_opts.rewrite_ptrs & (1 << 1)) != 0, -- (m->data_opts.rewrite_ptrs & (1 << 2)) != 0, -- (m->data_opts.rewrite_ptrs & (1 << 3)) != 0); -+ prt_str(&buf, "rewrites found:\t"); -+ bch2_prt_u64_base2(&buf, rewrites_found); -+ prt_newline(&buf); - -- prt_printf(&buf, "\nrewrites found: %u%u%u%u", -- (rewrites_found & (1 << 0)) != 0, -- (rewrites_found & (1 << 1)) != 0, -- (rewrites_found & (1 << 2)) != 0, -- (rewrites_found & (1 << 3)) != 0); -+ bch2_data_update_opts_to_text(&buf, c, &m->op.opts, &m->data_opts); - - prt_str(&buf, "\nold: "); - bch2_bkey_val_to_text(&buf, c, old); -@@ -194,7 +202,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, - struct bpos next_pos; - bool should_check_enospc; - s64 i_sectors_delta = 0, disk_sectors_delta = 0; -- unsigned rewrites_found = 0, durability, i; -+ unsigned rewrites_found = 0, durability, ptr_bit; - - bch2_trans_begin(trans); - -@@ -231,16 +239,16 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, - * - * Fist, drop rewrite_ptrs from @new: - */ -- i = 0; -+ ptr_bit = 1; - bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry_c) { -- if (((1U << i) & m->data_opts.rewrite_ptrs) && -+ if ((ptr_bit & m->data_opts.rewrite_ptrs) && - (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) && - !ptr->cached) { - bch2_extent_ptr_set_cached(c, &m->op.opts, - bkey_i_to_s(insert), ptr); -- rewrites_found |= 1U << i; -+ rewrites_found |= ptr_bit; - } -- i++; -+ ptr_bit <<= 1; - } - - if (m->data_opts.rewrite_ptrs && -@@ -323,8 +331,11 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, - * it's been hard to reproduce, so this should give us some more - * information when it does occur: - */ -- int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), -- BCH_VALIDATE_commit); -+ int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), -+ (struct bkey_validate_context) { -+ .btree = m->btree_id, -+ .flags = BCH_VALIDATE_commit, -+ }); - if (invalid) { - struct printbuf buf = PRINTBUF; - -@@ -362,7 +373,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, - k.k->p, bkey_start_pos(&insert->k)) ?: - bch2_insert_snapshot_whiteouts(trans, m->btree_id, - k.k->p, insert->k.p) ?: -- bch2_bkey_set_needs_rebalance(c, insert, &op->opts) ?: -+ bch2_bkey_set_needs_rebalance(c, &op->opts, insert) ?: - bch2_trans_update(trans, &iter, insert, - BTREE_UPDATE_internal_snapshot_node) ?: - bch2_trans_commit(trans, &op->res, -@@ -374,7 +385,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, - bch2_btree_iter_set_pos(&iter, next_pos); - - this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size); -- trace_move_extent_finish2(c, bkey_i_to_s_c(&new->k_i)); -+ if (trace_move_extent_finish_enabled()) -+ trace_move_extent_finish2(m, &new->k_i, insert); - } - err: - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -@@ -414,14 +426,15 @@ int bch2_data_update_index_update(struct bch_write_op *op) - return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op)); - } - --void bch2_data_update_read_done(struct data_update *m, -- struct bch_extent_crc_unpacked crc) -+void bch2_data_update_read_done(struct data_update *m) - { -+ m->read_done = true; -+ - /* write bio must own pages: */ - BUG_ON(!m->op.wbio.bio.bi_vcnt); - -- m->op.crc = crc; -- m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9; -+ m->op.crc = m->rbio.pick.crc; -+ m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; - - closure_call(&m->op.cl, bch2_write, NULL, NULL); - } -@@ -431,31 +444,34 @@ void bch2_data_update_exit(struct data_update *update) - struct bch_fs *c = update->op.c; - struct bkey_s_c k = bkey_i_to_s_c(update->k.k); - -+ bch2_bio_free_pages_pool(c, &update->op.wbio.bio); -+ kfree(update->bvecs); -+ update->bvecs = NULL; -+ - if (c->opts.nocow_enabled) - bkey_nocow_unlock(c, k); - bkey_put_dev_refs(c, k); -- bch2_bkey_buf_exit(&update->k, c); - bch2_disk_reservation_put(c, &update->op.res); -- bch2_bio_free_pages_pool(c, &update->op.wbio.bio); -+ bch2_bkey_buf_exit(&update->k, c); - } - --static void bch2_update_unwritten_extent(struct btree_trans *trans, -- struct data_update *update) -+static int bch2_update_unwritten_extent(struct btree_trans *trans, -+ struct data_update *update) - { - struct bch_fs *c = update->op.c; -- struct bio *bio = &update->op.wbio.bio; - struct bkey_i_extent *e; - struct write_point *wp; - struct closure cl; - struct btree_iter iter; - struct bkey_s_c k; -- int ret; -+ int ret = 0; - - closure_init_stack(&cl); - bch2_keylist_init(&update->op.insert_keys, update->op.inline_keys); - -- while (bio_sectors(bio)) { -- unsigned sectors = bio_sectors(bio); -+ while (bpos_lt(update->op.pos, update->k.k->k.p)) { -+ unsigned sectors = update->k.k->k.p.offset - -+ update->op.pos.offset; - - bch2_trans_begin(trans); - -@@ -491,7 +507,7 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans, - bch_err_fn_ratelimited(c, ret); - - if (ret) -- return; -+ break; - - sectors = min(sectors, wp->sectors_free); - -@@ -501,7 +517,6 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans, - bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); - bch2_alloc_sectors_done(c, wp); - -- bio_advance(bio, sectors << 9); - update->op.pos.offset += sectors; - - extent_for_each_ptr(extent_i_to_s(e), ptr) -@@ -520,41 +535,60 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans, - bch2_trans_unlock(trans); - closure_sync(&cl); - } -+ -+ return ret; - } - - void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) - { -- printbuf_tabstop_push(out, 20); -- prt_str(out, "rewrite ptrs:\t"); -+ if (!out->nr_tabstops) -+ printbuf_tabstop_push(out, 20); -+ -+ prt_str_indented(out, "rewrite ptrs:\t"); - bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); - prt_newline(out); - -- prt_str(out, "kill ptrs:\t"); -+ prt_str_indented(out, "kill ptrs:\t"); - bch2_prt_u64_base2(out, data_opts->kill_ptrs); - prt_newline(out); - -- prt_str(out, "target:\t"); -+ prt_str_indented(out, "target:\t"); - bch2_target_to_text(out, c, data_opts->target); - prt_newline(out); - -- prt_str(out, "compression:\t"); -- bch2_compression_opt_to_text(out, background_compression(*io_opts)); -+ prt_str_indented(out, "compression:\t"); -+ bch2_compression_opt_to_text(out, io_opts->background_compression); - prt_newline(out); - -- prt_str(out, "opts.replicas:\t"); -+ prt_str_indented(out, "opts.replicas:\t"); - prt_u64(out, io_opts->data_replicas); -+ prt_newline(out); - -- prt_str(out, "extra replicas:\t"); -+ prt_str_indented(out, "extra replicas:\t"); - prt_u64(out, data_opts->extra_replicas); -+ prt_newline(out); - } - - void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) -+{ -+ bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); -+ prt_newline(out); -+ -+ prt_str_indented(out, "old key:\t"); -+ bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); -+} -+ -+void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update *m) - { - bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); - prt_newline(out); -+ printbuf_indent_add(out, 2); - bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); -+ prt_printf(out, "read_done:\t\%u\n", m->read_done); -+ bch2_write_op_to_text(out, &m->op); -+ printbuf_indent_sub(out, 2); - } - - int bch2_extent_drop_ptrs(struct btree_trans *trans, -@@ -600,6 +634,40 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, - bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); - } - -+static bool can_allocate_without_blocking(struct bch_fs *c, -+ struct data_update *m) -+{ -+ if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(m->op.watermark))) -+ return false; -+ -+ unsigned target = m->op.flags & BCH_WRITE_only_specified_devs -+ ? m->op.target -+ : 0; -+ struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_user, target); -+ -+ darray_for_each(m->op.devs_have, i) -+ __clear_bit(*i, devs.d); -+ -+ rcu_read_lock(); -+ unsigned nr_replicas = 0, i; -+ for_each_set_bit(i, devs.d, BCH_SB_MEMBERS_MAX) { -+ struct bch_dev *ca = bch2_dev_rcu(c, i); -+ -+ struct bch_dev_usage usage; -+ bch2_dev_usage_read_fast(ca, &usage); -+ -+ if (!dev_buckets_free(ca, usage, m->op.watermark)) -+ continue; -+ -+ nr_replicas += ca->mi.durability; -+ if (nr_replicas >= m->op.nr_replicas) -+ break; -+ } -+ rcu_read_unlock(); -+ -+ return nr_replicas >= m->op.nr_replicas; -+} -+ - int bch2_data_update_init(struct btree_trans *trans, - struct btree_iter *iter, - struct moving_context *ctxt, -@@ -614,7 +682,7 @@ int bch2_data_update_init(struct btree_trans *trans, - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; -- unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; -+ unsigned reserve_sectors = k.k->size * data_opts.extra_replicas; - int ret = 0; - - /* -@@ -622,17 +690,8 @@ int bch2_data_update_init(struct btree_trans *trans, - * and we have to check for this because we go rw before repairing the - * snapshots table - just skip it, we can move it later. - */ -- if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot))) -- return -BCH_ERR_data_update_done; -- -- if (!bkey_get_dev_refs(c, k)) -- return -BCH_ERR_data_update_done; -- -- if (c->opts.nocow_enabled && -- !bkey_nocow_lock(c, ctxt, k)) { -- bkey_put_dev_refs(c, k); -- return -BCH_ERR_nocow_lock_blocked; -- } -+ if (unlikely(k.k->p.snapshot && !bch2_snapshot_exists(c, k.k->p.snapshot))) -+ return -BCH_ERR_data_update_done_no_snapshot; - - bch2_bkey_buf_init(&m->k); - bch2_bkey_buf_reassemble(&m->k, c, k); -@@ -647,27 +706,27 @@ int bch2_data_update_init(struct btree_trans *trans, - m->op.target = data_opts.target; - m->op.write_point = wp; - m->op.nr_replicas = 0; -- m->op.flags |= BCH_WRITE_PAGES_STABLE| -- BCH_WRITE_PAGES_OWNED| -- BCH_WRITE_DATA_ENCODED| -- BCH_WRITE_MOVE| -+ m->op.flags |= BCH_WRITE_pages_stable| -+ BCH_WRITE_pages_owned| -+ BCH_WRITE_data_encoded| -+ BCH_WRITE_move| - m->data_opts.write_flags; -- m->op.compression_opt = background_compression(io_opts); -+ m->op.compression_opt = io_opts.background_compression; - m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; - - unsigned durability_have = 0, durability_removing = 0; - -- i = 0; -+ unsigned ptr_bit = 1; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (!p.ptr.cached) { - rcu_read_lock(); -- if (BIT(i) & m->data_opts.rewrite_ptrs) { -+ if (ptr_bit & m->data_opts.rewrite_ptrs) { - if (crc_is_compressed(p.crc)) - reserve_sectors += k.k->size; - - m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); - durability_removing += bch2_extent_ptr_desired_durability(c, &p); -- } else if (!(BIT(i) & m->data_opts.kill_ptrs)) { -+ } else if (!(ptr_bit & m->data_opts.kill_ptrs)) { - bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); - durability_have += bch2_extent_ptr_durability(c, &p); - } -@@ -687,7 +746,7 @@ int bch2_data_update_init(struct btree_trans *trans, - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) - m->op.incompressible = true; - -- i++; -+ ptr_bit <<= 1; - } - - unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have)); -@@ -724,7 +783,15 @@ int bch2_data_update_init(struct btree_trans *trans, - /* if iter == NULL, it's just a promote */ - if (iter) - ret = bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &m->data_opts); -- goto out; -+ if (!ret) -+ ret = -BCH_ERR_data_update_done_no_writes_needed; -+ goto out_bkey_buf_exit; -+ } -+ -+ if ((m->op.flags & BCH_WRITE_alloc_nowait) && -+ !can_allocate_without_blocking(c, m)) { -+ ret = -BCH_ERR_data_update_done_would_block; -+ goto out_bkey_buf_exit; - } - - if (reserve_sectors) { -@@ -733,31 +800,77 @@ int bch2_data_update_init(struct btree_trans *trans, - ? 0 - : BCH_DISK_RESERVATION_NOFAIL); - if (ret) -- goto out; -+ goto out_bkey_buf_exit; -+ } -+ -+ if (!bkey_get_dev_refs(c, k)) { -+ ret = -BCH_ERR_data_update_done_no_dev_refs; -+ goto out_put_disk_res; -+ } -+ -+ if (c->opts.nocow_enabled && -+ !bkey_nocow_lock(c, ctxt, k)) { -+ ret = -BCH_ERR_nocow_lock_blocked; -+ goto out_put_dev_refs; - } - - if (bkey_extent_is_unwritten(k)) { -- bch2_update_unwritten_extent(trans, m); -- goto out; -+ ret = bch2_update_unwritten_extent(trans, m) ?: -+ -BCH_ERR_data_update_done_unwritten; -+ goto out_nocow_unlock; - } - -+ /* write path might have to decompress data: */ -+ unsigned buf_bytes = 0; -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -+ buf_bytes = max_t(unsigned, buf_bytes, p.crc.uncompressed_size << 9); -+ -+ unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE); -+ -+ m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL); -+ if (!m->bvecs) -+ goto enomem; -+ -+ bio_init(&m->rbio.bio, NULL, m->bvecs, nr_vecs, REQ_OP_READ); -+ bio_init(&m->op.wbio.bio, NULL, m->bvecs, nr_vecs, 0); -+ -+ if (bch2_bio_alloc_pages(&m->op.wbio.bio, buf_bytes, GFP_KERNEL)) -+ goto enomem; -+ -+ rbio_init(&m->rbio.bio, c, io_opts, NULL); -+ m->rbio.bio.bi_iter.bi_size = buf_bytes; -+ m->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); -+ -+ bio_set_prio(&m->op.wbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); -+ - return 0; --out: -- bch2_data_update_exit(m); -- return ret ?: -BCH_ERR_data_update_done; -+enomem: -+ ret = -ENOMEM; -+ kfree(m->bvecs); -+ m->bvecs = NULL; -+out_nocow_unlock: -+ if (c->opts.nocow_enabled) -+ bkey_nocow_unlock(c, k); -+out_put_dev_refs: -+ bkey_put_dev_refs(c, k); -+out_put_disk_res: -+ bch2_disk_reservation_put(c, &m->op.res); -+out_bkey_buf_exit: -+ bch2_bkey_buf_exit(&m->k, c); -+ return ret; - } - - void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts) - { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- unsigned i = 0; -+ unsigned ptr_bit = 1; - - bkey_for_each_ptr(ptrs, ptr) { -- if ((opts->rewrite_ptrs & (1U << i)) && ptr->cached) { -- opts->kill_ptrs |= 1U << i; -- opts->rewrite_ptrs ^= 1U << i; -+ if ((opts->rewrite_ptrs & ptr_bit) && ptr->cached) { -+ opts->kill_ptrs |= ptr_bit; -+ opts->rewrite_ptrs ^= ptr_bit; - } - -- i++; -+ ptr_bit <<= 1; - } - } -diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h -index e4b50723428e..f4cf5d17cc37 100644 ---- a/fs/bcachefs/data_update.h -+++ b/fs/bcachefs/data_update.h -@@ -4,6 +4,7 @@ - #define _BCACHEFS_DATA_UPDATE_H - - #include "bkey_buf.h" -+#include "io_read.h" - #include "io_write_types.h" - - struct moving_context; -@@ -22,20 +23,24 @@ void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *, - - struct data_update { - /* extent being updated: */ -+ bool read_done; - enum btree_id btree_id; - struct bkey_buf k; - struct data_update_opts data_opts; - struct moving_context *ctxt; - struct bch_move_stats *stats; -+ -+ struct bch_read_bio rbio; - struct bch_write_op op; -+ struct bio_vec *bvecs; - }; - - void bch2_data_update_to_text(struct printbuf *, struct data_update *); -+void bch2_data_update_inflight_to_text(struct printbuf *, struct data_update *); - - int bch2_data_update_index_update(struct bch_write_op *); - --void bch2_data_update_read_done(struct data_update *, -- struct bch_extent_crc_unpacked); -+void bch2_data_update_read_done(struct data_update *); - - int bch2_extent_drop_ptrs(struct btree_trans *, - struct btree_iter *, -diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c -index 45aec1afdb0e..55333e82d1fe 100644 ---- a/fs/bcachefs/debug.c -+++ b/fs/bcachefs/debug.c -@@ -20,6 +20,7 @@ - #include "extents.h" - #include "fsck.h" - #include "inode.h" -+#include "journal_reclaim.h" - #include "super.h" - - #include -@@ -472,7 +473,9 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * - if (!out->nr_tabstops) - printbuf_tabstop_push(out, 32); - -- prt_printf(out, "%px btree=%s l=%u\n", b, bch2_btree_id_str(b->c.btree_id), b->c.level); -+ prt_printf(out, "%px ", b); -+ bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level); -+ prt_printf(out, "\n"); - - printbuf_indent_add(out, 2); - -diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c -index faffc98d5605..600eee936f13 100644 ---- a/fs/bcachefs/dirent.c -+++ b/fs/bcachefs/dirent.c -@@ -101,7 +101,7 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { - }; - - int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - struct qstr d_name = bch2_dirent_get_name(d); -@@ -120,7 +120,7 @@ int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, - * Check new keys don't exceed the max length - * (older keys may be larger.) - */ -- bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, -+ bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, - c, dirent_name_too_long, - "dirent name too big (%u > %u)", - d_name.len, BCH_NAME_MAX); -@@ -266,7 +266,7 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, - } else { - target->subvol = le32_to_cpu(d.v->d_child_subvol); - -- ret = bch2_subvolume_get(trans, target->subvol, true, BTREE_ITER_cached, &s); -+ ret = bch2_subvolume_get(trans, target->subvol, true, &s); - - target->inum = le64_to_cpu(s.inode); - } -@@ -500,7 +500,7 @@ int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 subvol, u32 - struct bkey_s_c k; - int ret; - -- for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, -+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents, - SPOS(dir, 0, snapshot), - POS(dir, U64_MAX), 0, k, ret) - if (k.k->type == KEY_TYPE_dirent) { -@@ -549,7 +549,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) - bch2_bkey_buf_init(&sk); - - int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_dirents, -+ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents, - POS(inum.inum, ctx->pos), - POS(inum.inum, U64_MAX), - inum.subvol, 0, k, ({ -diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h -index 53ad99666022..a633f83c1ac7 100644 ---- a/fs/bcachefs/dirent.h -+++ b/fs/bcachefs/dirent.h -@@ -4,10 +4,10 @@ - - #include "str_hash.h" - --enum bch_validate_flags; - extern const struct bch_hash_desc bch2_dirent_hash_desc; - --int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - #define bch2_bkey_ops_dirent ((struct bkey_ops) { \ -@@ -31,6 +31,11 @@ static inline unsigned dirent_val_u64s(unsigned len) - sizeof(u64)); - } - -+static inline unsigned int dirent_occupied_size(const struct qstr *name) -+{ -+ return (BKEY_U64s + dirent_val_u64s(name->len)) * sizeof(u64); -+} -+ - int bch2_dirent_read_target(struct btree_trans *, subvol_inum, - struct bkey_s_c_dirent, subvol_inum *); - -diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c -index 07eb8fa1b026..b32e91ba8be8 100644 ---- a/fs/bcachefs/disk_accounting.c -+++ b/fs/bcachefs/disk_accounting.c -@@ -79,6 +79,8 @@ static inline void accounting_key_init(struct bkey_i *k, struct disk_accounting_ - memcpy_u64s_small(acc->v.d, d, nr); - } - -+static int bch2_accounting_update_sb_one(struct bch_fs *, struct bpos); -+ - int bch2_disk_accounting_mod(struct btree_trans *trans, - struct disk_accounting_pos *k, - s64 *d, unsigned nr, bool gc) -@@ -96,9 +98,16 @@ int bch2_disk_accounting_mod(struct btree_trans *trans, - - accounting_key_init(&k_i.k, k, d, nr); - -- return likely(!gc) -- ? bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k) -- : bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true); -+ if (unlikely(gc)) { -+ int ret = bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true); -+ if (ret == -BCH_ERR_btree_insert_need_mark_replicas) -+ ret = drop_locks_do(trans, -+ bch2_accounting_update_sb_one(trans->c, disk_accounting_pos_to_bpos(k))) ?: -+ bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true); -+ return ret; -+ } else { -+ return bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k); -+ } - } - - int bch2_mod_dev_cached_sectors(struct btree_trans *trans, -@@ -127,14 +136,15 @@ static inline bool is_zero(char *start, char *end) - #define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) - - int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, k.k->p); - void *end = &acc_k + 1; - int ret = 0; - -- bkey_fsck_err_on(bversion_zero(k.k->bversion), -+ bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) && -+ bversion_zero(k.k->bversion), - c, accounting_key_version_0, - "accounting key with version=0"); - -@@ -217,7 +227,8 @@ void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_po - prt_printf(out, "id=%u", k->snapshot.id); - break; - case BCH_DISK_ACCOUNTING_btree: -- prt_printf(out, "btree=%s", bch2_btree_id_str(k->btree.id)); -+ prt_str(out, "btree="); -+ bch2_btree_id_to_text(out, k->btree.id); - break; - } - } -@@ -243,10 +254,10 @@ void bch2_accounting_swab(struct bkey_s k) - } - - static inline void __accounting_to_replicas(struct bch_replicas_entry_v1 *r, -- struct disk_accounting_pos acc) -+ struct disk_accounting_pos *acc) - { -- unsafe_memcpy(r, &acc.replicas, -- replicas_entry_bytes(&acc.replicas), -+ unsafe_memcpy(r, &acc->replicas, -+ replicas_entry_bytes(&acc->replicas), - "variable length struct"); - } - -@@ -257,7 +268,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc - - switch (acc_k.type) { - case BCH_DISK_ACCOUNTING_replicas: -- __accounting_to_replicas(r, acc_k); -+ __accounting_to_replicas(r, &acc_k); - return true; - default: - return false; -@@ -322,6 +333,14 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun - - eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, NULL); -+ -+ if (trace_accounting_mem_insert_enabled()) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_accounting_to_text(&buf, c, a.s_c); -+ trace_accounting_mem_insert(c, buf.buf); -+ printbuf_exit(&buf); -+ } - return 0; - err: - free_percpu(n.v[1]); -@@ -461,32 +480,6 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc - return ret; - } - --void bch2_fs_accounting_to_text(struct printbuf *out, struct bch_fs *c) --{ -- struct bch_accounting_mem *acc = &c->accounting; -- -- percpu_down_read(&c->mark_lock); -- out->atomic++; -- -- eytzinger0_for_each(i, acc->k.nr) { -- struct disk_accounting_pos acc_k; -- bpos_to_disk_accounting_pos(&acc_k, acc->k.data[i].pos); -- -- bch2_accounting_key_to_text(out, &acc_k); -- -- u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; -- bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); -- -- prt_str(out, ":"); -- for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++) -- prt_printf(out, " %llu", v[j]); -- prt_newline(out); -- } -- -- --out->atomic; -- percpu_up_read(&c->mark_lock); --} -- - static void bch2_accounting_free_counters(struct bch_accounting_mem *acc, bool gc) - { - darray_for_each(acc->k, e) { -@@ -625,7 +618,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, - switch (acc.type) { - case BCH_DISK_ACCOUNTING_replicas: { - struct bch_replicas_padded r; -- __accounting_to_replicas(&r.e, acc); -+ __accounting_to_replicas(&r.e, &acc); - - for (unsigned i = 0; i < r.e.nr_devs; i++) - if (r.e.devs[i] != BCH_SB_MEMBER_INVALID && -@@ -699,11 +692,45 @@ int bch2_accounting_read(struct bch_fs *c) - struct btree_trans *trans = bch2_trans_get(c); - struct printbuf buf = PRINTBUF; - -- int ret = for_each_btree_key(trans, iter, -- BTREE_ID_accounting, POS_MIN, -+ /* -+ * We might run more than once if we rewind to start topology repair or -+ * btree node scan - and those might cause us to get different results, -+ * so we can't just skip if we've already run. -+ * -+ * Instead, zero out any accounting we have: -+ */ -+ percpu_down_write(&c->mark_lock); -+ darray_for_each(acc->k, e) -+ percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters); -+ for_each_member_device(c, ca) -+ percpu_memset(ca->usage, 0, sizeof(*ca->usage)); -+ percpu_memset(c->usage, 0, sizeof(*c->usage)); -+ percpu_up_write(&c->mark_lock); -+ -+ struct btree_iter iter; -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN, -+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots); -+ iter.flags &= ~BTREE_ITER_with_journal; -+ int ret = for_each_btree_key_continue(trans, iter, - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ - struct bkey u; - struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u); -+ -+ if (k.k->type != KEY_TYPE_accounting) -+ continue; -+ -+ struct disk_accounting_pos acc_k; -+ bpos_to_disk_accounting_pos(&acc_k, k.k->p); -+ -+ if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) -+ break; -+ -+ if (!bch2_accounting_is_mem(acc_k)) { -+ struct disk_accounting_pos next = { .type = acc_k.type + 1 }; -+ bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); -+ continue; -+ } -+ - accounting_read_key(trans, k); - })); - if (ret) -@@ -715,6 +742,12 @@ int bch2_accounting_read(struct bch_fs *c) - - darray_for_each(*keys, i) { - if (i->k->k.type == KEY_TYPE_accounting) { -+ struct disk_accounting_pos acc_k; -+ bpos_to_disk_accounting_pos(&acc_k, i->k->k.p); -+ -+ if (!bch2_accounting_is_mem(acc_k)) -+ continue; -+ - struct bkey_s_c k = bkey_i_to_s_c(i->k); - unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, - sizeof(acc->k.data[0]), -@@ -748,15 +781,16 @@ int bch2_accounting_read(struct bch_fs *c) - keys->gap = keys->nr = dst - keys->data; - - percpu_down_write(&c->mark_lock); -- unsigned i = 0; -- while (i < acc->k.nr) { -- unsigned idx = inorder_to_eytzinger0(i, acc->k.nr); - -+ darray_for_each_reverse(acc->k, i) { - struct disk_accounting_pos acc_k; -- bpos_to_disk_accounting_pos(&acc_k, acc->k.data[idx].pos); -+ bpos_to_disk_accounting_pos(&acc_k, i->pos); - - u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; -- bch2_accounting_mem_read_counters(acc, idx, v, ARRAY_SIZE(v), false); -+ memset(v, 0, sizeof(v)); -+ -+ for (unsigned j = 0; j < i->nr_counters; j++) -+ v[j] = percpu_u64_get(i->v[0] + j); - - /* - * If the entry counters are zeroed, it should be treated as -@@ -765,26 +799,25 @@ int bch2_accounting_read(struct bch_fs *c) - * Remove it, so that if it's re-added it gets re-marked in the - * superblock: - */ -- ret = bch2_is_zero(v, sizeof(v[0]) * acc->k.data[idx].nr_counters) -+ ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters) - ? -BCH_ERR_remove_disk_accounting_entry -- : bch2_disk_accounting_validate_late(trans, acc_k, -- v, acc->k.data[idx].nr_counters); -+ : bch2_disk_accounting_validate_late(trans, acc_k, v, i->nr_counters); - - if (ret == -BCH_ERR_remove_disk_accounting_entry) { -- free_percpu(acc->k.data[idx].v[0]); -- free_percpu(acc->k.data[idx].v[1]); -- darray_remove_item(&acc->k, &acc->k.data[idx]); -- eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), -- accounting_pos_cmp, NULL); -+ free_percpu(i->v[0]); -+ free_percpu(i->v[1]); -+ darray_remove_item(&acc->k, i); - ret = 0; - continue; - } - - if (ret) - goto fsck_err; -- i++; - } - -+ eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), -+ accounting_pos_cmp, NULL); -+ - preempt_disable(); - struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); - -@@ -804,7 +837,7 @@ int bch2_accounting_read(struct bch_fs *c) - break; - case BCH_DISK_ACCOUNTING_dev_data_type: - rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu(c, k.dev_data_type.dev); -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev); - if (ca) { - struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type]; - percpu_u64_set(&d->buckets, v[0]); -@@ -881,10 +914,13 @@ void bch2_verify_accounting_clean(struct bch_fs *c) - bpos_to_disk_accounting_pos(&acc_k, k.k->p); - - if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) -- continue; -+ break; - -- if (acc_k.type == BCH_DISK_ACCOUNTING_inum) -+ if (!bch2_accounting_is_mem(acc_k)) { -+ struct disk_accounting_pos next = { .type = acc_k.type + 1 }; -+ bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); - continue; -+ } - - bch2_accounting_mem_read(c, k.k->p, v, nr); - -@@ -910,7 +946,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c) - break; - case BCH_DISK_ACCOUNTING_dev_data_type: { - rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev); -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); - if (!ca) { - rcu_read_unlock(); - continue; -diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h -index 4ea6c8a092bc..5360cbb3ec29 100644 ---- a/fs/bcachefs/disk_accounting.h -+++ b/fs/bcachefs/disk_accounting.h -@@ -2,6 +2,7 @@ - #ifndef _BCACHEFS_DISK_ACCOUNTING_H - #define _BCACHEFS_DISK_ACCOUNTING_H - -+#include "btree_update.h" - #include "eytzinger.h" - #include "sb-members.h" - -@@ -62,27 +63,32 @@ static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage - - static inline void bpos_to_disk_accounting_pos(struct disk_accounting_pos *acc, struct bpos p) - { -- acc->_pad = p; -+ BUILD_BUG_ON(sizeof(*acc) != sizeof(p)); -+ - #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -- bch2_bpos_swab(&acc->_pad); -+ acc->_pad = p; -+#else -+ memcpy_swab(acc, &p, sizeof(p)); - #endif - } - --static inline struct bpos disk_accounting_pos_to_bpos(struct disk_accounting_pos *k) -+static inline struct bpos disk_accounting_pos_to_bpos(struct disk_accounting_pos *acc) - { -- struct bpos ret = k->_pad; -- -+ struct bpos p; - #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -- bch2_bpos_swab(&ret); -+ p = acc->_pad; -+#else -+ memcpy_swab(&p, acc, sizeof(p)); - #endif -- return ret; -+ return p; - } - - int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *, - s64 *, unsigned, bool); - int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool); - --int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *); - void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - void bch2_accounting_swab(struct bkey_s); -@@ -112,6 +118,12 @@ enum bch_accounting_mode { - int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode); - void bch2_accounting_mem_gc(struct bch_fs *); - -+static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc) -+{ -+ return acc.type < BCH_DISK_ACCOUNTING_TYPE_NR && -+ acc.type != BCH_DISK_ACCOUNTING_inum; -+} -+ - /* - * Update in memory counters so they match the btree update we're doing; called - * from transaction commit path -@@ -126,9 +138,10 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - bool gc = mode == BCH_ACCOUNTING_gc; - -- EBUG_ON(gc && !acc->gc_running); -+ if (gc && !acc->gc_running) -+ return 0; - -- if (acc_k.type == BCH_DISK_ACCOUNTING_inum) -+ if (!bch2_accounting_is_mem(acc_k)) - return 0; - - if (mode == BCH_ACCOUNTING_normal) { -@@ -141,7 +154,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, - break; - case BCH_DISK_ACCOUNTING_dev_data_type: - rcu_read_lock(); -- struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev); -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); - if (ca) { - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].buckets, a.v->d[0]); - this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].sectors, a.v->d[1]); -@@ -204,9 +217,45 @@ static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p, - bch2_accounting_mem_read_counters(acc, idx, v, nr, false); - } - -+static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) -+{ -+ EBUG_ON(!res->ref); -+ -+ return (struct bversion) { -+ .hi = res->seq >> 32, -+ .lo = (res->seq << 32) | (res->offset + offset), -+ }; -+} -+ -+static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans, -+ struct bkey_i_accounting *a, -+ unsigned commit_flags) -+{ -+ a->k.bversion = journal_pos_to_bversion(&trans->journal_res, -+ (u64 *) a - (u64 *) trans->journal_entries); -+ -+ EBUG_ON(bversion_zero(a->k.bversion)); -+ -+ return likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply)) -+ ? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal) -+ : 0; -+} -+ -+static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans, -+ struct bkey_i_accounting *a_i, -+ unsigned commit_flags) -+{ -+ if (likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { -+ struct bkey_s_accounting a = accounting_i_to_s(a_i); -+ -+ bch2_accounting_neg(a); -+ bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); -+ bch2_accounting_neg(a); -+ } -+} -+ - int bch2_fs_replicas_usage_read(struct bch_fs *, darray_char *); - int bch2_fs_accounting_read(struct bch_fs *, darray_char *, unsigned); --void bch2_fs_accounting_to_text(struct printbuf *, struct bch_fs *); - - int bch2_gc_accounting_start(struct bch_fs *); - int bch2_gc_accounting_done(struct bch_fs *); -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -index 749dcf368841..b211e90ac54e 100644 ---- a/fs/bcachefs/ec.c -+++ b/fs/bcachefs/ec.c -@@ -26,6 +26,7 @@ - #include "util.h" - - #include -+#include - - #ifdef __KERNEL__ - -@@ -109,7 +110,7 @@ struct ec_bio { - /* Stripes btree keys: */ - - int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - int ret = 0; -@@ -129,7 +130,7 @@ int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, - "invalid csum granularity (%u >= 64)", - s->csum_granularity_bits); - -- ret = bch2_bkey_ptrs_validate(c, k, flags); -+ ret = bch2_bkey_ptrs_validate(c, k, from); - fsck_err: - return ret; - } -@@ -304,13 +305,12 @@ static int mark_stripe_bucket(struct btree_trans *trans, - } - - if (flags & BTREE_TRIGGER_gc) { -- percpu_down_read(&c->mark_lock); - struct bucket *g = gc_bucket(ca, bucket.offset); - if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", - ptr->dev, - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = -BCH_ERR_mark_stripe; -- goto err_unlock; -+ goto err; - } - - bucket_lock(g); -@@ -318,8 +318,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, - ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); - alloc_to_bucket(g, new); - bucket_unlock(g); --err_unlock: -- percpu_up_read(&c->mark_lock); -+ - if (!ret) - ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); - } -@@ -732,7 +731,7 @@ static void ec_block_endio(struct bio *bio) - ? BCH_MEMBER_ERROR_write - : BCH_MEMBER_ERROR_read, - "erasure coding %s error: %s", -- bio_data_dir(bio) ? "write" : "read", -+ str_write_read(bio_data_dir(bio)), - bch2_blk_status_to_str(bio->bi_status))) - clear_bit(ec_bio->idx, ec_bio->buf->valid); - -@@ -909,7 +908,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, - bch2_bkey_val_to_text(&msgbuf, c, orig_k); - bch_err_ratelimited(c, - "error doing reconstruct read: %s\n %s", msg, msgbuf.buf); -- printbuf_exit(&msgbuf);; -+ printbuf_exit(&msgbuf); - ret = -BCH_ERR_stripe_reconstruct; - goto out; - } -@@ -1275,11 +1274,11 @@ static int ec_stripe_update_extent(struct btree_trans *trans, - struct bch_dev *ca, - struct bpos bucket, u8 gen, - struct ec_stripe_buf *s, -- struct bpos *bp_pos) -+ struct bkey_s_c_backpointer bp, -+ struct bkey_buf *last_flushed) - { - struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; - struct bch_fs *c = trans->c; -- struct bch_backpointer bp; - struct btree_iter iter; - struct bkey_s_c k; - const struct bch_extent_ptr *ptr_c; -@@ -1288,33 +1287,26 @@ static int ec_stripe_update_extent(struct btree_trans *trans, - struct bkey_i *n; - int ret, dev, block; - -- ret = bch2_get_next_backpointer(trans, ca, bucket, gen, -- bp_pos, &bp, BTREE_ITER_cached); -- if (ret) -- return ret; -- if (bpos_eq(*bp_pos, SPOS_MAX)) -- return 0; -- -- if (bp.level) { -+ if (bp.v->level) { - struct printbuf buf = PRINTBUF; - struct btree_iter node_iter; - struct btree *b; - -- b = bch2_backpointer_get_node(trans, &node_iter, *bp_pos, bp); -+ b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed); - bch2_trans_iter_exit(trans, &node_iter); - - if (!b) - return 0; - - prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b); -- bch2_backpointer_to_text(&buf, &bp); -+ bch2_bkey_val_to_text(&buf, c, bp.s_c); - - bch2_fs_inconsistent(c, "%s", buf.buf); - printbuf_exit(&buf); - return -EIO; - } - -- k = bch2_backpointer_get_key(trans, &iter, *bp_pos, bp, BTREE_ITER_intent); -+ k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed); - ret = bkey_err(k); - if (ret) - return ret; -@@ -1373,7 +1365,6 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b - struct bch_fs *c = trans->c; - struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; - struct bch_extent_ptr ptr = v->ptrs[block]; -- struct bpos bp_pos = POS_MIN; - int ret = 0; - - struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev); -@@ -1382,19 +1373,27 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b - - struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr); - -- while (1) { -- ret = commit_do(trans, NULL, NULL, -- BCH_TRANS_COMMIT_no_check_rw| -- BCH_TRANS_COMMIT_no_enospc, -- ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, &bp_pos)); -- if (ret) -- break; -- if (bkey_eq(bp_pos, POS_MAX)) -+ struct bkey_buf last_flushed; -+ bch2_bkey_buf_init(&last_flushed); -+ bkey_init(&last_flushed.k->k); -+ -+ ret = for_each_btree_key_max_commit(trans, bp_iter, BTREE_ID_backpointers, -+ bucket_pos_to_bp_start(ca, bucket_pos), -+ bucket_pos_to_bp_end(ca, bucket_pos), 0, bp_k, -+ NULL, NULL, -+ BCH_TRANS_COMMIT_no_check_rw| -+ BCH_TRANS_COMMIT_no_enospc, ({ -+ if (bkey_ge(bp_k.k->p, bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket_pos), 0))) - break; - -- bp_pos = bpos_nosnap_successor(bp_pos); -- } -+ if (bp_k.k->type != KEY_TYPE_backpointer) -+ continue; - -+ ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, -+ bkey_s_c_to_backpointer(bp_k), &last_flushed); -+ })); -+ -+ bch2_bkey_buf_exit(&last_flushed, c); - bch2_dev_put(ca); - return ret; - } -@@ -1716,7 +1715,7 @@ static void ec_stripe_key_init(struct bch_fs *c, - set_bkey_val_u64s(&s->k, u64s); - } - --static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) -+static struct ec_stripe_new *ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) - { - struct ec_stripe_new *s; - -@@ -1724,7 +1723,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) -- return -BCH_ERR_ENOMEM_ec_new_stripe_alloc; -+ return NULL; - - mutex_init(&s->lock); - closure_init(&s->iodone, NULL); -@@ -1739,10 +1738,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) - ec_stripe_key_init(c, &s->new_stripe.key, - s->nr_data, s->nr_parity, - h->blocksize, h->disk_label); -- -- h->s = s; -- h->nr_created++; -- return 0; -+ return s; - } - - static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h) -@@ -1887,25 +1883,26 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, - return h; - } - --static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h, -+static int new_stripe_alloc_buckets(struct btree_trans *trans, -+ struct ec_stripe_head *h, struct ec_stripe_new *s, - enum bch_watermark watermark, struct closure *cl) - { - struct bch_fs *c = trans->c; - struct bch_devs_mask devs = h->devs; - struct open_bucket *ob; - struct open_buckets buckets; -- struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; -+ struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; - unsigned i, j, nr_have_parity = 0, nr_have_data = 0; - bool have_cache = true; - int ret = 0; - -- BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); -- BUG_ON(v->nr_redundant != h->s->nr_parity); -+ BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity); -+ BUG_ON(v->nr_redundant != s->nr_parity); - - /* * We bypass the sector allocator which normally does this: */ - bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); - -- for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { -+ for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) { - /* - * Note: we don't yet repair invalid blocks (failed/removed - * devices) when reusing stripes - we still need a codepath to -@@ -1915,21 +1912,21 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ - if (v->ptrs[i].dev != BCH_SB_MEMBER_INVALID) - __clear_bit(v->ptrs[i].dev, devs.d); - -- if (i < h->s->nr_data) -+ if (i < s->nr_data) - nr_have_data++; - else - nr_have_parity++; - } - -- BUG_ON(nr_have_data > h->s->nr_data); -- BUG_ON(nr_have_parity > h->s->nr_parity); -+ BUG_ON(nr_have_data > s->nr_data); -+ BUG_ON(nr_have_parity > s->nr_parity); - - buckets.nr = 0; -- if (nr_have_parity < h->s->nr_parity) { -+ if (nr_have_parity < s->nr_parity) { - ret = bch2_bucket_alloc_set_trans(trans, &buckets, - &h->parity_stripe, - &devs, -- h->s->nr_parity, -+ s->nr_parity, - &nr_have_parity, - &have_cache, 0, - BCH_DATA_parity, -@@ -1937,14 +1934,14 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ - cl); - - open_bucket_for_each(c, &buckets, ob, i) { -- j = find_next_zero_bit(h->s->blocks_gotten, -- h->s->nr_data + h->s->nr_parity, -- h->s->nr_data); -- BUG_ON(j >= h->s->nr_data + h->s->nr_parity); -+ j = find_next_zero_bit(s->blocks_gotten, -+ s->nr_data + s->nr_parity, -+ s->nr_data); -+ BUG_ON(j >= s->nr_data + s->nr_parity); - -- h->s->blocks[j] = buckets.v[i]; -+ s->blocks[j] = buckets.v[i]; - v->ptrs[j] = bch2_ob_ptr(c, ob); -- __set_bit(j, h->s->blocks_gotten); -+ __set_bit(j, s->blocks_gotten); - } - - if (ret) -@@ -1952,11 +1949,11 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ - } - - buckets.nr = 0; -- if (nr_have_data < h->s->nr_data) { -+ if (nr_have_data < s->nr_data) { - ret = bch2_bucket_alloc_set_trans(trans, &buckets, - &h->block_stripe, - &devs, -- h->s->nr_data, -+ s->nr_data, - &nr_have_data, - &have_cache, 0, - BCH_DATA_user, -@@ -1964,13 +1961,13 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ - cl); - - open_bucket_for_each(c, &buckets, ob, i) { -- j = find_next_zero_bit(h->s->blocks_gotten, -- h->s->nr_data, 0); -- BUG_ON(j >= h->s->nr_data); -+ j = find_next_zero_bit(s->blocks_gotten, -+ s->nr_data, 0); -+ BUG_ON(j >= s->nr_data); - -- h->s->blocks[j] = buckets.v[i]; -+ s->blocks[j] = buckets.v[i]; - v->ptrs[j] = bch2_ob_ptr(c, ob); -- __set_bit(j, h->s->blocks_gotten); -+ __set_bit(j, s->blocks_gotten); - } - - if (ret) -@@ -2016,73 +2013,78 @@ static s64 get_existing_stripe(struct bch_fs *c, - return ret; - } - --static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h) -+static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new *s) - { -- struct bch_fs *c = trans->c; -- struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; -- struct bch_stripe *existing_v; -+ struct bch_stripe *new_v = &bkey_i_to_stripe(&s->new_stripe.key)->v; -+ struct bch_stripe *existing_v = &bkey_i_to_stripe(&s->existing_stripe.key)->v; - unsigned i; -- s64 idx; -- int ret; -- -- /* -- * If we can't allocate a new stripe, and there's no stripes with empty -- * blocks for us to reuse, that means we have to wait on copygc: -- */ -- idx = get_existing_stripe(c, h); -- if (idx < 0) -- return -BCH_ERR_stripe_alloc_blocked; -- -- ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe); -- bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c, -- "reading stripe key: %s", bch2_err_str(ret)); -- if (ret) { -- bch2_stripe_close(c, h->s); -- return ret; -- } - -- existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v; -- -- BUG_ON(existing_v->nr_redundant != h->s->nr_parity); -- h->s->nr_data = existing_v->nr_blocks - -+ BUG_ON(existing_v->nr_redundant != s->nr_parity); -+ s->nr_data = existing_v->nr_blocks - - existing_v->nr_redundant; - -- ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize); -+ int ret = ec_stripe_buf_init(&s->existing_stripe, 0, le16_to_cpu(existing_v->sectors)); - if (ret) { -- bch2_stripe_close(c, h->s); -+ bch2_stripe_close(c, s); - return ret; - } - -- BUG_ON(h->s->existing_stripe.size != h->blocksize); -- BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); -+ BUG_ON(s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); - - /* - * Free buckets we initially allocated - they might conflict with - * blocks from the stripe we're reusing: - */ -- for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) { -- bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]); -- h->s->blocks[i] = 0; -+ for_each_set_bit(i, s->blocks_gotten, new_v->nr_blocks) { -+ bch2_open_bucket_put(c, c->open_buckets + s->blocks[i]); -+ s->blocks[i] = 0; - } -- memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten)); -- memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated)); -+ memset(s->blocks_gotten, 0, sizeof(s->blocks_gotten)); -+ memset(s->blocks_allocated, 0, sizeof(s->blocks_allocated)); - -- for (i = 0; i < existing_v->nr_blocks; i++) { -+ for (unsigned i = 0; i < existing_v->nr_blocks; i++) { - if (stripe_blockcount_get(existing_v, i)) { -- __set_bit(i, h->s->blocks_gotten); -- __set_bit(i, h->s->blocks_allocated); -+ __set_bit(i, s->blocks_gotten); -+ __set_bit(i, s->blocks_allocated); - } - -- ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); -+ ec_block_io(c, &s->existing_stripe, READ, i, &s->iodone); - } - -- bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key); -- h->s->have_existing_stripe = true; -+ bkey_copy(&s->new_stripe.key, &s->existing_stripe.key); -+ s->have_existing_stripe = true; - - return 0; - } - --static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h) -+static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h, -+ struct ec_stripe_new *s) -+{ -+ struct bch_fs *c = trans->c; -+ s64 idx; -+ int ret; -+ -+ /* -+ * If we can't allocate a new stripe, and there's no stripes with empty -+ * blocks for us to reuse, that means we have to wait on copygc: -+ */ -+ idx = get_existing_stripe(c, h); -+ if (idx < 0) -+ return -BCH_ERR_stripe_alloc_blocked; -+ -+ ret = get_stripe_key_trans(trans, idx, &s->existing_stripe); -+ bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c, -+ "reading stripe key: %s", bch2_err_str(ret)); -+ if (ret) { -+ bch2_stripe_close(c, s); -+ return ret; -+ } -+ -+ return init_new_stripe_from_existing(c, s); -+} -+ -+static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h, -+ struct ec_stripe_new *s) - { - struct bch_fs *c = trans->c; - struct btree_iter iter; -@@ -2091,15 +2093,19 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st - struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); - int ret; - -- if (!h->s->res.sectors) { -- ret = bch2_disk_reservation_get(c, &h->s->res, -+ if (!s->res.sectors) { -+ ret = bch2_disk_reservation_get(c, &s->res, - h->blocksize, -- h->s->nr_parity, -+ s->nr_parity, - BCH_DISK_RESERVATION_NOFAIL); - if (ret) - return ret; - } - -+ /* -+ * Allocate stripe slot -+ * XXX: we're going to need a bitrange btree of free stripes -+ */ - for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos, - BTREE_ITER_slots|BTREE_ITER_intent, k, ret) { - if (bkey_gt(k.k->p, POS(0, U32_MAX))) { -@@ -2114,7 +2120,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st - } - - if (bkey_deleted(k.k) && -- bch2_try_open_stripe(c, h->s, k.k->p.offset)) -+ bch2_try_open_stripe(c, s, k.k->p.offset)) - break; - } - -@@ -2125,16 +2131,16 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st - - ret = ec_stripe_mem_alloc(trans, &iter); - if (ret) { -- bch2_stripe_close(c, h->s); -+ bch2_stripe_close(c, s); - goto err; - } - -- h->s->new_stripe.key.k.p = iter.pos; -+ s->new_stripe.key.k.p = iter.pos; - out: - bch2_trans_iter_exit(trans, &iter); - return ret; - err: -- bch2_disk_reservation_put(c, &h->s->res); -+ bch2_disk_reservation_put(c, &s->res); - goto out; - } - -@@ -2165,22 +2171,27 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, - return h; - - if (!h->s) { -- ret = ec_new_stripe_alloc(c, h); -- if (ret) { -+ h->s = ec_new_stripe_alloc(c, h); -+ if (!h->s) { -+ ret = -BCH_ERR_ENOMEM_ec_new_stripe_alloc; - bch_err(c, "failed to allocate new stripe"); - goto err; - } -+ -+ h->nr_created++; - } - -- if (h->s->allocated) -+ struct ec_stripe_new *s = h->s; -+ -+ if (s->allocated) - goto allocated; - -- if (h->s->have_existing_stripe) -+ if (s->have_existing_stripe) - goto alloc_existing; - - /* First, try to allocate a full stripe: */ -- ret = new_stripe_alloc_buckets(trans, h, BCH_WATERMARK_stripe, NULL) ?: -- __bch2_ec_stripe_head_reserve(trans, h); -+ ret = new_stripe_alloc_buckets(trans, h, s, BCH_WATERMARK_stripe, NULL) ?: -+ __bch2_ec_stripe_head_reserve(trans, h, s); - if (!ret) - goto allocate_buf; - if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || -@@ -2192,15 +2203,15 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, - * existing stripe: - */ - while (1) { -- ret = __bch2_ec_stripe_head_reuse(trans, h); -+ ret = __bch2_ec_stripe_head_reuse(trans, h, s); - if (!ret) - break; - if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked) - goto err; - - if (watermark == BCH_WATERMARK_copygc) { -- ret = new_stripe_alloc_buckets(trans, h, watermark, NULL) ?: -- __bch2_ec_stripe_head_reserve(trans, h); -+ ret = new_stripe_alloc_buckets(trans, h, s, watermark, NULL) ?: -+ __bch2_ec_stripe_head_reserve(trans, h, s); - if (ret) - goto err; - goto allocate_buf; -@@ -2218,19 +2229,19 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, - * Retry allocating buckets, with the watermark for this - * particular write: - */ -- ret = new_stripe_alloc_buckets(trans, h, watermark, cl); -+ ret = new_stripe_alloc_buckets(trans, h, s, watermark, cl); - if (ret) - goto err; - - allocate_buf: -- ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize); -+ ret = ec_stripe_buf_init(&s->new_stripe, 0, h->blocksize); - if (ret) - goto err; - -- h->s->allocated = true; -+ s->allocated = true; - allocated: -- BUG_ON(!h->s->idx); -- BUG_ON(!h->s->new_stripe.data[0]); -+ BUG_ON(!s->idx); -+ BUG_ON(!s->new_stripe.data[0]); - BUG_ON(trans->restarted); - return h; - err: -@@ -2295,7 +2306,7 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ - int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx) - { - return bch2_trans_run(c, -- for_each_btree_key_upto_commit(trans, iter, -+ for_each_btree_key_max_commit(trans, iter, - BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX), - BTREE_ITER_intent, k, - NULL, NULL, 0, ({ -@@ -2458,11 +2469,9 @@ void bch2_fs_ec_exit(struct bch_fs *c) - - while (1) { - mutex_lock(&c->ec_stripe_head_lock); -- h = list_first_entry_or_null(&c->ec_stripe_head_list, -- struct ec_stripe_head, list); -- if (h) -- list_del(&h->list); -+ h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list); - mutex_unlock(&c->ec_stripe_head_lock); -+ - if (!h) - break; - -diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h -index 43326370b410..583ca6a226da 100644 ---- a/fs/bcachefs/ec.h -+++ b/fs/bcachefs/ec.h -@@ -6,9 +6,8 @@ - #include "buckets_types.h" - #include "extents_types.h" - --enum bch_validate_flags; -- --int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); - int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, -diff --git a/fs/bcachefs/ec_format.h b/fs/bcachefs/ec_format.h -index 64ef52e00078..b9770f24f213 100644 ---- a/fs/bcachefs/ec_format.h -+++ b/fs/bcachefs/ec_format.h -@@ -20,6 +20,23 @@ struct bch_stripe { - */ - __u8 disk_label; - -+ /* -+ * Variable length sections: -+ * - Pointers -+ * - Checksums -+ * 2D array of [stripe block/device][csum block], with checksum block -+ * size given by csum_granularity_bits -+ * - Block sector counts: per-block array of u16s -+ * -+ * XXX: -+ * Either checksums should have come last, or we should have included a -+ * checksum_size field (the size in bytes of the checksum itself, not -+ * the blocksize the checksum covers). -+ * -+ * Currently we aren't able to access the block sector counts if the -+ * checksum type is unknown. -+ */ -+ - struct bch_extent_ptr ptrs[]; - } __packed __aligned(8); - -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index 9c4fe5cdbfb7..d65a75e7216e 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -54,7 +54,8 @@ - x(ENOMEM, ENOMEM_compression_bounce_read_init) \ - x(ENOMEM, ENOMEM_compression_bounce_write_init) \ - x(ENOMEM, ENOMEM_compression_workspace_init) \ -- x(ENOMEM, ENOMEM_decompression_workspace_init) \ -+ x(ENOMEM, ENOMEM_backpointer_mismatches_bitmap) \ -+ x(EIO, compression_workspace_not_initialized) \ - x(ENOMEM, ENOMEM_bucket_gens) \ - x(ENOMEM, ENOMEM_buckets_nouse) \ - x(ENOMEM, ENOMEM_usage_init) \ -@@ -116,6 +117,8 @@ - x(ENOENT, ENOENT_dirent_doesnt_match_inode) \ - x(ENOENT, ENOENT_dev_not_found) \ - x(ENOENT, ENOENT_dev_idx_not_found) \ -+ x(ENOENT, ENOENT_inode_no_backpointer) \ -+ x(ENOENT, ENOENT_no_snapshot_tree_subvol) \ - x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \ - x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \ - x(EEXIST, EEXIST_str_hash_set) \ -@@ -148,6 +151,7 @@ - x(BCH_ERR_transaction_restart, transaction_restart_split_race) \ - x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \ - x(BCH_ERR_transaction_restart, transaction_restart_nested) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_commit) \ - x(0, no_btree_node) \ - x(BCH_ERR_no_btree_node, no_btree_node_relock) \ - x(BCH_ERR_no_btree_node, no_btree_node_upgrade) \ -@@ -164,7 +168,6 @@ - x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \ - x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \ - x(0, backpointer_to_overwritten_btree_node) \ -- x(0, lock_fail_root_changed) \ - x(0, journal_reclaim_would_deadlock) \ - x(EINVAL, fsck) \ - x(BCH_ERR_fsck, fsck_fix) \ -@@ -173,8 +176,15 @@ - x(BCH_ERR_fsck, fsck_errors_not_fixed) \ - x(BCH_ERR_fsck, fsck_repair_unimplemented) \ - x(BCH_ERR_fsck, fsck_repair_impossible) \ -- x(0, restart_recovery) \ -+ x(EINVAL, restart_recovery) \ -+ x(EINVAL, not_in_recovery) \ -+ x(EINVAL, cannot_rewind_recovery) \ - x(0, data_update_done) \ -+ x(BCH_ERR_data_update_done, data_update_done_would_block) \ -+ x(BCH_ERR_data_update_done, data_update_done_unwritten) \ -+ x(BCH_ERR_data_update_done, data_update_done_no_writes_needed) \ -+ x(BCH_ERR_data_update_done, data_update_done_no_snapshot) \ -+ x(BCH_ERR_data_update_done, data_update_done_no_dev_refs) \ - x(EINVAL, device_state_not_allowed) \ - x(EINVAL, member_info_missing) \ - x(EINVAL, mismatched_block_size) \ -@@ -192,7 +202,9 @@ - x(EINVAL, opt_parse_error) \ - x(EINVAL, remove_with_metadata_missing_unimplemented)\ - x(EINVAL, remove_would_lose_data) \ -- x(EINVAL, btree_iter_with_journal_not_supported) \ -+ x(EINVAL, no_resize_with_buckets_nouse) \ -+ x(EINVAL, inode_unpack_error) \ -+ x(EINVAL, varint_decode_error) \ - x(EROFS, erofs_trans_commit) \ - x(EROFS, erofs_no_writes) \ - x(EROFS, erofs_journal_err) \ -@@ -241,7 +253,10 @@ - x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \ - x(BCH_ERR_invalid, invalid_bkey) \ - x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ -+ x(EIO, journal_shutdown) \ -+ x(EIO, journal_flush_err) \ - x(EIO, btree_node_read_err) \ -+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_cached) \ - x(EIO, sb_not_downgraded) \ - x(EIO, btree_node_write_all_failed) \ - x(EIO, btree_node_read_error) \ -@@ -257,6 +272,8 @@ - x(EIO, no_device_to_read_from) \ - x(EIO, missing_indirect_extent) \ - x(EIO, invalidate_stripe_to_dev) \ -+ x(EIO, no_encryption_key) \ -+ x(EIO, insufficient_journal_devices) \ - x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ - x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ - x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ -@@ -305,6 +322,7 @@ static inline long bch2_err_class(long err) - - #define BLK_STS_REMOVED ((__force blk_status_t)128) - -+#include - const char *bch2_blk_status_to_str(blk_status_t); - - #endif /* _BCACHFES_ERRCODE_H */ -diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c -index b679def8fb98..038da6a61f6b 100644 ---- a/fs/bcachefs/error.c -+++ b/fs/bcachefs/error.c -@@ -1,7 +1,9 @@ - // SPDX-License-Identifier: GPL-2.0 - #include "bcachefs.h" -+#include "btree_cache.h" - #include "btree_iter.h" - #include "error.h" -+#include "fs-common.h" - #include "journal.h" - #include "recovery_passes.h" - #include "super.h" -@@ -33,7 +35,7 @@ bool bch2_inconsistent_error(struct bch_fs *c) - int bch2_topology_error(struct bch_fs *c) - { - set_bit(BCH_FS_topology_error, &c->flags); -- if (!test_bit(BCH_FS_fsck_running, &c->flags)) { -+ if (!test_bit(BCH_FS_recovery_running, &c->flags)) { - bch2_inconsistent_error(c); - return -BCH_ERR_btree_need_topology_repair; - } else { -@@ -218,6 +220,30 @@ static const u8 fsck_flags_extra[] = { - #undef x - }; - -+static int do_fsck_ask_yn(struct bch_fs *c, -+ struct btree_trans *trans, -+ struct printbuf *question, -+ const char *action) -+{ -+ prt_str(question, ", "); -+ prt_str(question, action); -+ -+ if (bch2_fs_stdio_redirect(c)) -+ bch2_print(c, "%s", question->buf); -+ else -+ bch2_print_string_as_lines(KERN_ERR, question->buf); -+ -+ int ask = bch2_fsck_ask_yn(c, trans); -+ -+ if (trans) { -+ int ret = bch2_trans_relock(trans); -+ if (ret) -+ return ret; -+ } -+ -+ return ask; -+} -+ - int __bch2_fsck_err(struct bch_fs *c, - struct btree_trans *trans, - enum bch_fsck_flags flags, -@@ -226,7 +252,7 @@ int __bch2_fsck_err(struct bch_fs *c, - { - struct fsck_err_state *s = NULL; - va_list args; -- bool print = true, suppressing = false, inconsistent = false; -+ bool print = true, suppressing = false, inconsistent = false, exiting = false; - struct printbuf buf = PRINTBUF, *out = &buf; - int ret = -BCH_ERR_fsck_ignore; - const char *action_orig = "fix?", *action = action_orig; -@@ -256,9 +282,10 @@ int __bch2_fsck_err(struct bch_fs *c, - !trans && - bch2_current_has_btree_trans(c)); - -- if ((flags & FSCK_CAN_FIX) && -- test_bit(err, c->sb.errors_silent)) -- return -BCH_ERR_fsck_fix; -+ if (test_bit(err, c->sb.errors_silent)) -+ return flags & FSCK_CAN_FIX -+ ? -BCH_ERR_fsck_fix -+ : -BCH_ERR_fsck_ignore; - - bch2_sb_error_count(c, err); - -@@ -289,16 +316,14 @@ int __bch2_fsck_err(struct bch_fs *c, - */ - if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { - ret = s->ret; -- mutex_unlock(&c->fsck_error_msgs_lock); -- goto err; -+ goto err_unlock; - } - - kfree(s->last_msg); - s->last_msg = kstrdup(buf.buf, GFP_KERNEL); - if (!s->last_msg) { -- mutex_unlock(&c->fsck_error_msgs_lock); - ret = -ENOMEM; -- goto err; -+ goto err_unlock; - } - - if (c->opts.ratelimit_errors && -@@ -318,13 +343,19 @@ int __bch2_fsck_err(struct bch_fs *c, - prt_printf(out, bch2_log_msg(c, "")); - #endif - -- if ((flags & FSCK_CAN_FIX) && -- (flags & FSCK_AUTOFIX) && -+ if ((flags & FSCK_AUTOFIX) && - (c->opts.errors == BCH_ON_ERROR_continue || - c->opts.errors == BCH_ON_ERROR_fix_safe)) { - prt_str(out, ", "); -- prt_actioning(out, action); -- ret = -BCH_ERR_fsck_fix; -+ if (flags & FSCK_CAN_FIX) { -+ prt_actioning(out, action); -+ ret = -BCH_ERR_fsck_fix; -+ } else { -+ prt_str(out, ", continuing"); -+ ret = -BCH_ERR_fsck_ignore; -+ } -+ -+ goto print; - } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { - if (c->opts.errors != BCH_ON_ERROR_continue || - !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { -@@ -348,31 +379,18 @@ int __bch2_fsck_err(struct bch_fs *c, - : c->opts.fix_errors; - - if (fix == FSCK_FIX_ask) { -- prt_str(out, ", "); -- prt_str(out, action); -- -- if (bch2_fs_stdio_redirect(c)) -- bch2_print(c, "%s", out->buf); -- else -- bch2_print_string_as_lines(KERN_ERR, out->buf); - print = false; - -- int ask = bch2_fsck_ask_yn(c, trans); -- -- if (trans) { -- ret = bch2_trans_relock(trans); -- if (ret) { -- mutex_unlock(&c->fsck_error_msgs_lock); -- goto err; -- } -- } -+ ret = do_fsck_ask_yn(c, trans, out, action); -+ if (ret < 0) -+ goto err_unlock; - -- if (ask >= YN_ALLNO && s) -- s->fix = ask == YN_ALLNO -+ if (ret >= YN_ALLNO && s) -+ s->fix = ret == YN_ALLNO - ? FSCK_FIX_no - : FSCK_FIX_yes; - -- ret = ask & 1 -+ ret = ret & 1 - ? -BCH_ERR_fsck_fix - : -BCH_ERR_fsck_ignore; - } else if (fix == FSCK_FIX_yes || -@@ -385,9 +403,7 @@ int __bch2_fsck_err(struct bch_fs *c, - prt_str(out, ", not "); - prt_actioning(out, action); - } -- } else if (flags & FSCK_NEED_FSCK) { -- prt_str(out, " (run fsck to correct)"); -- } else { -+ } else if (!(flags & FSCK_CAN_IGNORE)) { - prt_str(out, " (repair unimplemented)"); - } - -@@ -396,14 +412,13 @@ int __bch2_fsck_err(struct bch_fs *c, - !(flags & FSCK_CAN_IGNORE))) - ret = -BCH_ERR_fsck_errors_not_fixed; - -- bool exiting = -- test_bit(BCH_FS_fsck_running, &c->flags) && -- (ret != -BCH_ERR_fsck_fix && -- ret != -BCH_ERR_fsck_ignore); -- -- if (exiting) -+ if (test_bit(BCH_FS_fsck_running, &c->flags) && -+ (ret != -BCH_ERR_fsck_fix && -+ ret != -BCH_ERR_fsck_ignore)) { -+ exiting = true; - print = true; -- -+ } -+print: - if (print) { - if (bch2_fs_stdio_redirect(c)) - bch2_print(c, "%s\n", out->buf); -@@ -419,17 +434,24 @@ int __bch2_fsck_err(struct bch_fs *c, - if (s) - s->ret = ret; - -- mutex_unlock(&c->fsck_error_msgs_lock); -- - if (inconsistent) - bch2_inconsistent_error(c); - -- if (ret == -BCH_ERR_fsck_fix) { -- set_bit(BCH_FS_errors_fixed, &c->flags); -- } else { -- set_bit(BCH_FS_errors_not_fixed, &c->flags); -- set_bit(BCH_FS_error, &c->flags); -+ /* -+ * We don't yet track whether the filesystem currently has errors, for -+ * log_fsck_err()s: that would require us to track for every error type -+ * which recovery pass corrects it, to get the fsck exit status correct: -+ */ -+ if (flags & FSCK_CAN_FIX) { -+ if (ret == -BCH_ERR_fsck_fix) { -+ set_bit(BCH_FS_errors_fixed, &c->flags); -+ } else { -+ set_bit(BCH_FS_errors_not_fixed, &c->flags); -+ set_bit(BCH_FS_error, &c->flags); -+ } - } -+err_unlock: -+ mutex_unlock(&c->fsck_error_msgs_lock); - err: - if (action != action_orig) - kfree(action); -@@ -437,28 +459,52 @@ int __bch2_fsck_err(struct bch_fs *c, - return ret; - } - -+static const char * const bch2_bkey_validate_contexts[] = { -+#define x(n) #n, -+ BKEY_VALIDATE_CONTEXTS() -+#undef x -+ NULL -+}; -+ - int __bch2_bkey_fsck_err(struct bch_fs *c, - struct bkey_s_c k, -- enum bch_validate_flags validate_flags, -+ struct bkey_validate_context from, - enum bch_sb_error_id err, - const char *fmt, ...) - { -- if (validate_flags & BCH_VALIDATE_silent) -+ if (from.flags & BCH_VALIDATE_silent) - return -BCH_ERR_fsck_delete_bkey; - - unsigned fsck_flags = 0; -- if (!(validate_flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) -+ if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) { -+ if (test_bit(err, c->sb.errors_silent)) -+ return -BCH_ERR_fsck_delete_bkey; -+ - fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; -+ } -+ if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) -+ fsck_flags |= fsck_flags_extra[err]; - - struct printbuf buf = PRINTBUF; -- va_list args; -+ prt_printf(&buf, "invalid bkey in %s", -+ bch2_bkey_validate_contexts[from.from]); -+ -+ if (from.from == BKEY_VALIDATE_journal) -+ prt_printf(&buf, " journal seq=%llu offset=%u", -+ from.journal_seq, from.journal_offset); -+ -+ prt_str(&buf, " btree="); -+ bch2_btree_id_to_text(&buf, from.btree); -+ prt_printf(&buf, " level=%u: ", from.level); - -- prt_str(&buf, "invalid bkey "); - bch2_bkey_val_to_text(&buf, c, k); - prt_str(&buf, "\n "); -+ -+ va_list args; - va_start(args, fmt); - prt_vprintf(&buf, fmt, args); - va_end(args); -+ - prt_str(&buf, ": delete?"); - - int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf); -@@ -483,3 +529,36 @@ void bch2_flush_fsck_errs(struct bch_fs *c) - - mutex_unlock(&c->fsck_error_msgs_lock); - } -+ -+int bch2_inum_err_msg_trans(struct btree_trans *trans, struct printbuf *out, subvol_inum inum) -+{ -+ u32 restart_count = trans->restart_count; -+ int ret = 0; -+ -+ /* XXX: we don't yet attempt to print paths when we don't know the subvol */ -+ if (inum.subvol) -+ ret = lockrestart_do(trans, bch2_inum_to_path(trans, inum, out)); -+ if (!inum.subvol || ret) -+ prt_printf(out, "inum %llu:%llu", inum.subvol, inum.inum); -+ -+ return trans_was_restarted(trans, restart_count); -+} -+ -+int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, -+ subvol_inum inum, u64 offset) -+{ -+ int ret = bch2_inum_err_msg_trans(trans, out, inum); -+ prt_printf(out, " offset %llu: ", offset); -+ return ret; -+} -+ -+void bch2_inum_err_msg(struct bch_fs *c, struct printbuf *out, subvol_inum inum) -+{ -+ bch2_trans_run(c, bch2_inum_err_msg_trans(trans, out, inum)); -+} -+ -+void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out, -+ subvol_inum inum, u64 offset) -+{ -+ bch2_trans_run(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); -+} -diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h -index 6551ada926b6..7acf2a27ca28 100644 ---- a/fs/bcachefs/error.h -+++ b/fs/bcachefs/error.h -@@ -45,32 +45,11 @@ int bch2_topology_error(struct bch_fs *); - bch2_inconsistent_error(c); \ - }) - --#define bch2_fs_inconsistent_on(cond, c, ...) \ -+#define bch2_fs_inconsistent_on(cond, ...) \ - ({ \ - bool _ret = unlikely(!!(cond)); \ -- \ -- if (_ret) \ -- bch2_fs_inconsistent(c, __VA_ARGS__); \ -- _ret; \ --}) -- --/* -- * Later we might want to mark only the particular device inconsistent, not the -- * entire filesystem: -- */ -- --#define bch2_dev_inconsistent(ca, ...) \ --do { \ -- bch_err(ca, __VA_ARGS__); \ -- bch2_inconsistent_error((ca)->fs); \ --} while (0) -- --#define bch2_dev_inconsistent_on(cond, ca, ...) \ --({ \ -- bool _ret = unlikely(!!(cond)); \ -- \ - if (_ret) \ -- bch2_dev_inconsistent(ca, __VA_ARGS__); \ -+ bch2_fs_inconsistent(__VA_ARGS__); \ - _ret; \ - }) - -@@ -123,9 +102,9 @@ int __bch2_fsck_err(struct bch_fs *, struct btree_trans *, - - void bch2_flush_fsck_errs(struct bch_fs *); - --#define __fsck_err(c, _flags, _err_type, ...) \ -+#define fsck_err_wrap(_do) \ - ({ \ -- int _ret = bch2_fsck_err(c, _flags, _err_type, __VA_ARGS__); \ -+ int _ret = _do; \ - if (_ret != -BCH_ERR_fsck_fix && \ - _ret != -BCH_ERR_fsck_ignore) { \ - ret = _ret; \ -@@ -135,6 +114,8 @@ void bch2_flush_fsck_errs(struct bch_fs *); - _ret == -BCH_ERR_fsck_fix; \ - }) - -+#define __fsck_err(...) fsck_err_wrap(bch2_fsck_err(__VA_ARGS__)) -+ - /* These macros return true if error should be fixed: */ - - /* XXX: mark in superblock that filesystem contains errors, if we ignore: */ -@@ -149,12 +130,6 @@ void bch2_flush_fsck_errs(struct bch_fs *); - (unlikely(cond) ? __fsck_err(c, _flags, _err_type, __VA_ARGS__) : false);\ - }) - --#define need_fsck_err_on(cond, c, _err_type, ...) \ -- __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__) -- --#define need_fsck_err(c, _err_type, ...) \ -- __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__) -- - #define mustfix_fsck_err(c, _err_type, ...) \ - __fsck_err(c, FSCK_CAN_FIX, _err_type, __VA_ARGS__) - -@@ -167,11 +142,22 @@ void bch2_flush_fsck_errs(struct bch_fs *); - #define fsck_err_on(cond, c, _err_type, ...) \ - __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) - -+#define log_fsck_err(c, _err_type, ...) \ -+ __fsck_err(c, FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -+ -+#define log_fsck_err_on(cond, ...) \ -+({ \ -+ bool _ret = unlikely(!!(cond)); \ -+ if (_ret) \ -+ log_fsck_err(__VA_ARGS__); \ -+ _ret; \ -+}) -+ - enum bch_validate_flags; - __printf(5, 6) - int __bch2_bkey_fsck_err(struct bch_fs *, - struct bkey_s_c, -- enum bch_validate_flags, -+ struct bkey_validate_context from, - enum bch_sb_error_id, - const char *, ...); - -@@ -181,7 +167,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *, - */ - #define bkey_fsck_err(c, _err_type, _err_msg, ...) \ - do { \ -- int _ret = __bch2_bkey_fsck_err(c, k, flags, \ -+ int _ret = __bch2_bkey_fsck_err(c, k, from, \ - BCH_FSCK_ERR_##_err_type, \ - _err_msg, ##__VA_ARGS__); \ - if (_ret != -BCH_ERR_fsck_fix && \ -@@ -252,4 +238,10 @@ void bch2_io_error(struct bch_dev *, enum bch_member_error_type); - _ret; \ - }) - -+int bch2_inum_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum); -+int bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum, u64); -+ -+void bch2_inum_err_msg(struct bch_fs *, struct printbuf *, subvol_inum); -+void bch2_inum_offset_err_msg(struct bch_fs *, struct printbuf *, subvol_inum, u64); -+ - #endif /* _BCACHEFS_ERROR_H */ -diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c -index 5f4fecb358da..6aac579a692a 100644 ---- a/fs/bcachefs/extent_update.c -+++ b/fs/bcachefs/extent_update.c -@@ -64,7 +64,7 @@ static int count_iters_for_insert(struct btree_trans *trans, - break; - case KEY_TYPE_reflink_p: { - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); -- u64 idx = le64_to_cpu(p.v->idx); -+ u64 idx = REFLINK_P_IDX(p.v); - unsigned sectors = bpos_min(*end, p.k->p).offset - - bkey_start_offset(p.k); - struct btree_iter iter; -@@ -128,7 +128,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans, - - bch2_trans_copy_iter(©, iter); - -- for_each_btree_key_upto_continue_norestart(copy, insert->k.p, 0, k, ret) { -+ for_each_btree_key_max_continue_norestart(copy, insert->k.p, 0, k, ret) { - unsigned offset = 0; - - if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -index 37e3d69bec06..05d5f71a7ca9 100644 ---- a/fs/bcachefs/extents.c -+++ b/fs/bcachefs/extents.c -@@ -21,6 +21,7 @@ - #include "extents.h" - #include "inode.h" - #include "journal.h" -+#include "rebalance.h" - #include "replicas.h" - #include "super.h" - #include "super-io.h" -@@ -88,6 +89,14 @@ static inline bool ptr_better(struct bch_fs *c, - u64 l1 = dev_latency(c, p1.ptr.dev); - u64 l2 = dev_latency(c, p2.ptr.dev); - -+ /* -+ * Square the latencies, to bias more in favor of the faster -+ * device - we never want to stop issuing reads to the slower -+ * device altogether, so that we can update our latency numbers: -+ */ -+ l1 *= l1; -+ l2 *= l2; -+ - /* Pick at random, biased in favor of the faster device: */ - - return bch2_rand_range(l1 + l2) > l1; -@@ -169,7 +178,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, - /* KEY_TYPE_btree_ptr: */ - - int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -@@ -177,7 +186,7 @@ int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, - c, btree_ptr_val_too_big, - "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - -- ret = bch2_bkey_ptrs_validate(c, k, flags); -+ ret = bch2_bkey_ptrs_validate(c, k, from); - fsck_err: - return ret; - } -@@ -189,7 +198,7 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, - } - - int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); - int ret = 0; -@@ -203,12 +212,13 @@ int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, - c, btree_ptr_v2_min_key_bad, - "min_key > key"); - -- if (flags & BCH_VALIDATE_write) -+ if ((from.flags & BCH_VALIDATE_write) && -+ c->sb.version_min >= bcachefs_metadata_version_btree_ptr_sectors_written) - bkey_fsck_err_on(!bp.v->sectors_written, - c, btree_ptr_v2_written_0, - "sectors_written == 0"); - -- ret = bch2_bkey_ptrs_validate(c, k, flags); -+ ret = bch2_bkey_ptrs_validate(c, k, from); - fsck_err: - return ret; - } -@@ -395,7 +405,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) - /* KEY_TYPE_reservation: */ - - int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); - int ret = 0; -@@ -1120,6 +1130,57 @@ void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_cr - bch2_prt_compression_type(out, crc->compression_type); - } - -+static void bch2_extent_rebalance_to_text(struct printbuf *out, struct bch_fs *c, -+ const struct bch_extent_rebalance *r) -+{ -+ prt_str(out, "rebalance:"); -+ -+ prt_printf(out, " replicas=%u", r->data_replicas); -+ if (r->data_replicas_from_inode) -+ prt_str(out, " (inode)"); -+ -+ prt_str(out, " checksum="); -+ bch2_prt_csum_opt(out, r->data_checksum); -+ if (r->data_checksum_from_inode) -+ prt_str(out, " (inode)"); -+ -+ if (r->background_compression || r->background_compression_from_inode) { -+ prt_str(out, " background_compression="); -+ bch2_compression_opt_to_text(out, r->background_compression); -+ -+ if (r->background_compression_from_inode) -+ prt_str(out, " (inode)"); -+ } -+ -+ if (r->background_target || r->background_target_from_inode) { -+ prt_str(out, " background_target="); -+ if (c) -+ bch2_target_to_text(out, c, r->background_target); -+ else -+ prt_printf(out, "%u", r->background_target); -+ -+ if (r->background_target_from_inode) -+ prt_str(out, " (inode)"); -+ } -+ -+ if (r->promote_target || r->promote_target_from_inode) { -+ prt_str(out, " promote_target="); -+ if (c) -+ bch2_target_to_text(out, c, r->promote_target); -+ else -+ prt_printf(out, "%u", r->promote_target); -+ -+ if (r->promote_target_from_inode) -+ prt_str(out, " (inode)"); -+ } -+ -+ if (r->erasure_code || r->erasure_code_from_inode) { -+ prt_printf(out, " ec=%u", r->erasure_code); -+ if (r->erasure_code_from_inode) -+ prt_str(out, " (inode)"); -+ } -+} -+ - void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c k) - { -@@ -1155,18 +1216,10 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, - (u64) ec->idx, ec->block); - break; - } -- case BCH_EXTENT_ENTRY_rebalance: { -- const struct bch_extent_rebalance *r = &entry->rebalance; -- -- prt_str(out, "rebalance: target "); -- if (c) -- bch2_target_to_text(out, c, r->target); -- else -- prt_printf(out, "%u", r->target); -- prt_str(out, " compression "); -- bch2_compression_opt_to_text(out, r->compression); -+ case BCH_EXTENT_ENTRY_rebalance: -+ bch2_extent_rebalance_to_text(out, c, &entry->rebalance); - break; -- } -+ - default: - prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); - return; -@@ -1178,13 +1231,19 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, - - static int extent_ptr_validate(struct bch_fs *c, - struct bkey_s_c k, -- enum bch_validate_flags flags, -+ struct bkey_validate_context from, - const struct bch_extent_ptr *ptr, - unsigned size_ondisk, - bool metadata) - { - int ret = 0; - -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ bkey_for_each_ptr(ptrs, ptr2) -+ bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, -+ c, ptr_to_duplicate_device, -+ "multiple pointers to same device (%u)", ptr->dev); -+ - /* bad pointers are repaired by check_fix_ptrs(): */ - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); -@@ -1199,13 +1258,6 @@ static int extent_ptr_validate(struct bch_fs *c, - unsigned bucket_size = ca->mi.bucket_size; - rcu_read_unlock(); - -- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- bkey_for_each_ptr(ptrs, ptr2) -- bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, -- c, ptr_to_duplicate_device, -- "multiple pointers to same device (%u)", ptr->dev); -- -- - bkey_fsck_err_on(bucket >= nbuckets, - c, ptr_after_last_bucket, - "pointer past last bucket (%llu > %llu)", bucket, nbuckets); -@@ -1221,7 +1273,7 @@ static int extent_ptr_validate(struct bch_fs *c, - } - - int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; -@@ -1248,7 +1300,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, - - switch (extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: -- ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false); -+ ret = extent_ptr_validate(c, k, from, &entry->ptr, size_ondisk, false); - if (ret) - return ret; - -@@ -1270,9 +1322,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, - case BCH_EXTENT_ENTRY_crc128: - crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - -- bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, -- c, ptr_crc_uncompressed_size_too_small, -- "checksum offset + key size > uncompressed size"); - bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), - c, ptr_crc_csum_type_unknown, - "invalid checksum type"); -@@ -1280,6 +1329,19 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, - c, ptr_crc_compression_type_unknown, - "invalid compression type"); - -+ bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, -+ c, ptr_crc_uncompressed_size_too_small, -+ "checksum offset + key size > uncompressed size"); -+ bkey_fsck_err_on(crc_is_encoded(crc) && -+ (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && -+ (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), -+ c, ptr_crc_uncompressed_size_too_big, -+ "too large encoded extent"); -+ bkey_fsck_err_on(!crc_is_compressed(crc) && -+ crc.compressed_size != crc.uncompressed_size, -+ c, ptr_crc_uncompressed_size_mismatch, -+ "not compressed but compressed != uncompressed size"); -+ - if (bch2_csum_type_is_encryption(crc.csum_type)) { - if (nonce == UINT_MAX) - nonce = crc.offset + crc.nonce; -@@ -1293,12 +1355,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, - "redundant crc entry"); - crc_since_last_ptr = true; - -- bkey_fsck_err_on(crc_is_encoded(crc) && -- (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && -- (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), -- c, ptr_crc_uncompressed_size_too_big, -- "too large encoded extent"); -- - size_ondisk = crc.compressed_size; - break; - case BCH_EXTENT_ENTRY_stripe_ptr: -@@ -1391,166 +1447,6 @@ void bch2_ptr_swab(struct bkey_s k) - } - } - --const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) --{ -- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- const union bch_extent_entry *entry; -- -- bkey_extent_entry_for_each(ptrs, entry) -- if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) -- return &entry->rebalance; -- -- return NULL; --} -- --unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, -- unsigned target, unsigned compression) --{ -- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- unsigned rewrite_ptrs = 0; -- -- if (compression) { -- unsigned compression_type = bch2_compression_opt_to_type(compression); -- const union bch_extent_entry *entry; -- struct extent_ptr_decoded p; -- unsigned i = 0; -- -- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -- p.ptr.unwritten) { -- rewrite_ptrs = 0; -- goto incompressible; -- } -- -- if (!p.ptr.cached && p.crc.compression_type != compression_type) -- rewrite_ptrs |= 1U << i; -- i++; -- } -- } --incompressible: -- if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { -- unsigned i = 0; -- -- bkey_for_each_ptr(ptrs, ptr) { -- if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target)) -- rewrite_ptrs |= 1U << i; -- i++; -- } -- } -- -- return rewrite_ptrs; --} -- --bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) --{ -- const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); -- -- /* -- * If it's an indirect extent, we don't delete the rebalance entry when -- * done so that we know what options were applied - check if it still -- * needs work done: -- */ -- if (r && -- k.k->type == KEY_TYPE_reflink_v && -- !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression)) -- r = NULL; -- -- return r != NULL; --} -- --static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k, -- unsigned target, unsigned compression) --{ -- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- const union bch_extent_entry *entry; -- struct extent_ptr_decoded p; -- u64 sectors = 0; -- -- if (compression) { -- unsigned compression_type = bch2_compression_opt_to_type(compression); -- -- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -- if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -- p.ptr.unwritten) { -- sectors = 0; -- goto incompressible; -- } -- -- if (!p.ptr.cached && p.crc.compression_type != compression_type) -- sectors += p.crc.compressed_size; -- } -- } --incompressible: -- if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { -- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -- if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target)) -- sectors += p.crc.compressed_size; -- } -- -- return sectors; --} -- --u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) --{ -- const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); -- -- return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0; --} -- --int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, -- struct bch_io_opts *opts) --{ -- struct bkey_s k = bkey_i_to_s(_k); -- struct bch_extent_rebalance *r; -- unsigned target = opts->background_target; -- unsigned compression = background_compression(*opts); -- bool needs_rebalance; -- -- if (!bkey_extent_is_direct_data(k.k)) -- return 0; -- -- /* get existing rebalance entry: */ -- r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); -- if (r) { -- if (k.k->type == KEY_TYPE_reflink_v) { -- /* -- * indirect extents: existing options take precedence, -- * so that we don't move extents back and forth if -- * they're referenced by different inodes with different -- * options: -- */ -- if (r->target) -- target = r->target; -- if (r->compression) -- compression = r->compression; -- } -- -- r->target = target; -- r->compression = compression; -- } -- -- needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression); -- -- if (needs_rebalance && !r) { -- union bch_extent_entry *new = bkey_val_end(k); -- -- new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance; -- new->rebalance.compression = compression; -- new->rebalance.target = target; -- new->rebalance.unused = 0; -- k.k->u64s += extent_entry_u64s(new); -- } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) { -- /* -- * For indirect extents, don't delete the rebalance entry when -- * we're finished so that we know we specifically moved it or -- * compressed it to its current location/compression type -- */ -- extent_entry_drop(k, (union bch_extent_entry *) r); -- } -- -- return 0; --} -- - /* Generic extent code: */ - - int bch2_cut_front_s(struct bpos where, struct bkey_s k) -@@ -1610,7 +1506,7 @@ int bch2_cut_front_s(struct bpos where, struct bkey_s k) - case KEY_TYPE_reflink_p: { - struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k); - -- le64_add_cpu(&p.v->idx, sub); -+ SET_REFLINK_P_IDX(p.v, REFLINK_P_IDX(p.v) + sub); - break; - } - case KEY_TYPE_inline_data: -diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h -index bcffcf60aaaf..620b284aa34f 100644 ---- a/fs/bcachefs/extents.h -+++ b/fs/bcachefs/extents.h -@@ -8,7 +8,6 @@ - - struct bch_fs; - struct btree_trans; --enum bch_validate_flags; - - /* extent entries: */ - -@@ -410,12 +409,12 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, - /* KEY_TYPE_btree_ptr: */ - - int bch2_btree_ptr_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); - - int bch2_btree_ptr_v2_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, - int, struct bkey_s); -@@ -452,7 +451,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); - /* KEY_TYPE_reservation: */ - - int bch2_reservation_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); - -@@ -696,7 +695,7 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct - void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); - int bch2_bkey_ptrs_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - - static inline bool bch2_extent_ptr_eq(struct bch_extent_ptr ptr1, - struct bch_extent_ptr ptr2) -@@ -710,15 +709,6 @@ static inline bool bch2_extent_ptr_eq(struct bch_extent_ptr ptr1, - - void bch2_ptr_swab(struct bkey_s); - --const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c); --unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, -- unsigned, unsigned); --bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c); --u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); -- --int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, -- struct bch_io_opts *); -- - /* Generic extent code: */ - - enum bch_extent_overlap { -diff --git a/fs/bcachefs/extents_format.h b/fs/bcachefs/extents_format.h -index 3bd2fdbb0817..c198dfc376d6 100644 ---- a/fs/bcachefs/extents_format.h -+++ b/fs/bcachefs/extents_format.h -@@ -201,19 +201,8 @@ struct bch_extent_stripe_ptr { - #endif - }; - --struct bch_extent_rebalance { --#if defined(__LITTLE_ENDIAN_BITFIELD) -- __u64 type:6, -- unused:34, -- compression:8, /* enum bch_compression_opt */ -- target:16; --#elif defined (__BIG_ENDIAN_BITFIELD) -- __u64 target:16, -- compression:8, -- unused:34, -- type:6; --#endif --}; -+/* bch_extent_rebalance: */ -+#include "rebalance_format.h" - - union bch_extent_entry { - #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 -diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c -index 7e10a9ddcfd9..d70d9f634cea 100644 ---- a/fs/bcachefs/fs-common.c -+++ b/fs/bcachefs/fs-common.c -@@ -69,9 +69,7 @@ int bch2_create_trans(struct btree_trans *trans, - if (!snapshot_src.inum) { - /* Inode wasn't specified, just snapshot: */ - struct bch_subvolume s; -- -- ret = bch2_subvolume_get(trans, snapshot_src.subvol, true, -- BTREE_ITER_cached, &s); -+ ret = bch2_subvolume_get(trans, snapshot_src.subvol, true, &s); - if (ret) - goto err; - -@@ -154,6 +152,7 @@ int bch2_create_trans(struct btree_trans *trans, - if (is_subdir_for_nlink(new_inode)) - dir_u->bi_nlink++; - dir_u->bi_mtime = dir_u->bi_ctime = now; -+ dir_u->bi_size += dirent_occupied_size(name); - - ret = bch2_inode_write(trans, &dir_iter, dir_u); - if (ret) -@@ -172,6 +171,10 @@ int bch2_create_trans(struct btree_trans *trans, - new_inode->bi_dir_offset = dir_offset; - } - -+ if (S_ISDIR(mode) && -+ !new_inode->bi_subvol) -+ new_inode->bi_depth = dir_u->bi_depth + 1; -+ - inode_iter.flags &= ~BTREE_ITER_all_snapshots; - bch2_btree_iter_set_snapshot(&inode_iter, snapshot); - -@@ -218,6 +221,7 @@ int bch2_link_trans(struct btree_trans *trans, - } - - dir_u->bi_mtime = dir_u->bi_ctime = now; -+ dir_u->bi_size += dirent_occupied_size(name); - - dir_hash = bch2_hash_info_init(c, dir_u); - -@@ -320,6 +324,7 @@ int bch2_unlink_trans(struct btree_trans *trans, - - dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now; - dir_u->bi_nlink -= is_subdir_for_nlink(inode_u); -+ dir_u->bi_size -= dirent_occupied_size(name); - - ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, - &dir_hash, &dirent_iter, -@@ -458,6 +463,14 @@ int bch2_rename_trans(struct btree_trans *trans, - goto err; - } - -+ if (mode == BCH_RENAME) { -+ src_dir_u->bi_size -= dirent_occupied_size(src_name); -+ dst_dir_u->bi_size += dirent_occupied_size(dst_name); -+ } -+ -+ if (mode == BCH_RENAME_OVERWRITE) -+ src_dir_u->bi_size -= dirent_occupied_size(src_name); -+ - if (src_inode_u->bi_parent_subvol) - src_inode_u->bi_parent_subvol = dst_dir.subvol; - -@@ -512,6 +525,15 @@ int bch2_rename_trans(struct btree_trans *trans, - dst_dir_u->bi_nlink++; - } - -+ if (S_ISDIR(src_inode_u->bi_mode) && -+ !src_inode_u->bi_subvol) -+ src_inode_u->bi_depth = dst_dir_u->bi_depth + 1; -+ -+ if (mode == BCH_RENAME_EXCHANGE && -+ S_ISDIR(dst_inode_u->bi_mode) && -+ !dst_inode_u->bi_subvol) -+ dst_inode_u->bi_depth = src_dir_u->bi_depth + 1; -+ - if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) { - dst_dir_u->bi_nlink--; - src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE; -@@ -548,3 +570,94 @@ int bch2_rename_trans(struct btree_trans *trans, - bch2_trans_iter_exit(trans, &src_dir_iter); - return ret; - } -+ -+static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsigned n) -+{ -+ bch2_printbuf_make_room(out, n); -+ -+ unsigned can_print = min(n, printbuf_remaining(out)); -+ -+ b += n; -+ -+ for (unsigned i = 0; i < can_print; i++) -+ out->buf[out->pos++] = *((char *) --b); -+ -+ printbuf_nul_terminate(out); -+} -+ -+static inline void prt_str_reversed(struct printbuf *out, const char *s) -+{ -+ prt_bytes_reversed(out, s, strlen(s)); -+} -+ -+static inline void reverse_bytes(void *b, size_t n) -+{ -+ char *e = b + n, *s = b; -+ -+ while (s < e) { -+ --e; -+ swap(*s, *e); -+ s++; -+ } -+} -+ -+/* XXX: we don't yet attempt to print paths when we don't know the subvol */ -+int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printbuf *path) -+{ -+ unsigned orig_pos = path->pos; -+ int ret = 0; -+ -+ while (!(inum.subvol == BCACHEFS_ROOT_SUBVOL && -+ inum.inum == BCACHEFS_ROOT_INO)) { -+ struct bch_inode_unpacked inode; -+ ret = bch2_inode_find_by_inum_trans(trans, inum, &inode); -+ if (ret) -+ goto disconnected; -+ -+ if (!inode.bi_dir && !inode.bi_dir_offset) { -+ ret = -BCH_ERR_ENOENT_inode_no_backpointer; -+ goto disconnected; -+ } -+ -+ inum.subvol = inode.bi_parent_subvol ?: inum.subvol; -+ inum.inum = inode.bi_dir; -+ -+ u32 snapshot; -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ goto disconnected; -+ -+ struct btree_iter d_iter; -+ struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter, -+ BTREE_ID_dirents, SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot), -+ 0, dirent); -+ ret = bkey_err(d.s_c); -+ if (ret) -+ goto disconnected; -+ -+ struct qstr dirent_name = bch2_dirent_get_name(d); -+ prt_bytes_reversed(path, dirent_name.name, dirent_name.len); -+ -+ prt_char(path, '/'); -+ -+ bch2_trans_iter_exit(trans, &d_iter); -+ } -+ -+ if (orig_pos == path->pos) -+ prt_char(path, '/'); -+out: -+ ret = path->allocation_failure ? -ENOMEM : 0; -+ if (ret) -+ goto err; -+ -+ reverse_bytes(path->buf + orig_pos, path->pos - orig_pos); -+ return 0; -+err: -+ return ret; -+disconnected: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto err; -+ -+ prt_str_reversed(path, "(disconnected)"); -+ goto out; -+} -diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h -index c934e807b380..2b59210bb5e8 100644 ---- a/fs/bcachefs/fs-common.h -+++ b/fs/bcachefs/fs-common.h -@@ -42,4 +42,6 @@ int bch2_rename_trans(struct btree_trans *, - bool bch2_reinherit_attrs(struct bch_inode_unpacked *, - struct bch_inode_unpacked *); - -+int bch2_inum_to_path(struct btree_trans *, subvol_inum, struct printbuf *); -+ - #endif /* _BCACHEFS_FS_COMMON_H */ -diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c -index 95972809e76d..83e15908250d 100644 ---- a/fs/bcachefs/fs-io-buffered.c -+++ b/fs/bcachefs/fs-io-buffered.c -@@ -149,12 +149,10 @@ static void bchfs_read(struct btree_trans *trans, - struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_buf sk; -- int flags = BCH_READ_RETRY_IF_STALE| -- BCH_READ_MAY_PROMOTE; -+ int flags = BCH_READ_retry_if_stale| -+ BCH_READ_may_promote; - int ret = 0; - -- rbio->c = c; -- rbio->start_time = local_clock(); - rbio->subvol = inum.subvol; - - bch2_bkey_buf_init(&sk); -@@ -164,7 +162,8 @@ static void bchfs_read(struct btree_trans *trans, - BTREE_ITER_slots); - while (1) { - struct bkey_s_c k; -- unsigned bytes, sectors, offset_into_extent; -+ unsigned bytes, sectors; -+ s64 offset_into_extent; - enum btree_id data_btree = BTREE_ID_extents; - - bch2_trans_begin(trans); -@@ -197,7 +196,7 @@ static void bchfs_read(struct btree_trans *trans, - - k = bkey_i_to_s_c(sk.k); - -- sectors = min(sectors, k.k->size - offset_into_extent); -+ sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); - - if (readpages_iter) { - ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors, -@@ -210,14 +209,14 @@ static void bchfs_read(struct btree_trans *trans, - swap(rbio->bio.bi_iter.bi_size, bytes); - - if (rbio->bio.bi_iter.bi_size == bytes) -- flags |= BCH_READ_LAST_FRAGMENT; -+ flags |= BCH_READ_last_fragment; - - bch2_bio_page_state_set(&rbio->bio, k); - - bch2_read_extent(trans, rbio, iter.pos, - data_btree, k, offset_into_extent, flags); - -- if (flags & BCH_READ_LAST_FRAGMENT) -+ if (flags & BCH_READ_last_fragment) - break; - - swap(rbio->bio.bi_iter.bi_size, bytes); -@@ -230,10 +229,12 @@ static void bchfs_read(struct btree_trans *trans, - bch2_trans_iter_exit(trans, &iter); - - if (ret) { -- bch_err_inum_offset_ratelimited(c, -- iter.pos.inode, -- iter.pos.offset << 9, -- "read error %i from btree lookup", ret); -+ struct printbuf buf = PRINTBUF; -+ bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9); -+ prt_printf(&buf, "read error %i from btree lookup", ret); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ - rbio->bio.bi_status = BLK_STS_IOERR; - bio_endio(&rbio->bio); - } -@@ -248,6 +249,7 @@ void bch2_readahead(struct readahead_control *ractl) - struct bch_io_opts opts; - struct folio *folio; - struct readpages_iter readpages_iter; -+ struct blk_plug plug; - - bch2_inode_opts_get(&opts, c, &inode->ei_inode); - -@@ -255,6 +257,16 @@ void bch2_readahead(struct readahead_control *ractl) - if (ret) - return; - -+ /* -+ * Besides being a general performance optimization, plugging helps with -+ * avoiding btree transaction srcu warnings - submitting a bio can -+ * block, and we don't want todo that with the transaction locked. -+ * -+ * However, plugged bios are submitted when we schedule; we ideally -+ * would have our own scheduler hook to call unlock_long() before -+ * scheduling. -+ */ -+ blk_start_plug(&plug); - bch2_pagecache_add_get(inode); - - struct btree_trans *trans = bch2_trans_get(c); -@@ -266,12 +278,13 @@ void bch2_readahead(struct readahead_control *ractl) - struct bch_read_bio *rbio = - rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ, - GFP_KERNEL, &c->bio_read), -- opts); -+ c, -+ opts, -+ bch2_readpages_end_io); - - readpage_iter_advance(&readpages_iter); - - rbio->bio.bi_iter.bi_sector = folio_sector(folio); -- rbio->bio.bi_end_io = bch2_readpages_end_io; - BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); - - bchfs_read(trans, rbio, inode_inum(inode), -@@ -281,7 +294,7 @@ void bch2_readahead(struct readahead_control *ractl) - bch2_trans_put(trans); - - bch2_pagecache_add_put(inode); -- -+ blk_finish_plug(&plug); - darray_exit(&readpages_iter.folios); - } - -@@ -296,24 +309,30 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_read_bio *rbio; - struct bch_io_opts opts; -+ struct blk_plug plug; - int ret; - DECLARE_COMPLETION_ONSTACK(done); - -+ BUG_ON(folio_test_uptodate(folio)); -+ BUG_ON(folio_test_dirty(folio)); -+ - if (!bch2_folio_create(folio, GFP_KERNEL)) - return -ENOMEM; - - bch2_inode_opts_get(&opts, c, &inode->ei_inode); - - rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read), -- opts); -+ c, -+ opts, -+ bch2_read_single_folio_end_io); - rbio->bio.bi_private = &done; -- rbio->bio.bi_end_io = bch2_read_single_folio_end_io; -- - rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; - rbio->bio.bi_iter.bi_sector = folio_sector(folio); - BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); - -+ blk_start_plug(&plug); - bch2_trans_run(c, (bchfs_read(trans, rbio, inode_inum(inode), NULL), 0)); -+ blk_finish_plug(&plug); - wait_for_completion(&done); - - ret = blk_status_to_errno(rbio->bio.bi_status); -@@ -400,7 +419,7 @@ static void bch2_writepage_io_done(struct bch_write_op *op) - } - } - -- if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) { -+ if (io->op.flags & BCH_WRITE_wrote_data_inline) { - bio_for_each_folio_all(fi, bio) { - struct bch_folio *s; - -@@ -605,15 +624,6 @@ static int __bch2_writepage(struct folio *folio, - BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio, - sectors << 9, offset << 9)); - -- /* Check for writing past i_size: */ -- WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > -- round_up(i_size, block_bytes(c)) && -- !test_bit(BCH_FS_emergency_ro, &c->flags), -- "writing past i_size: %llu > %llu (unrounded %llu)\n", -- bio_end_sector(&w->io->op.wbio.bio) << 9, -- round_up(i_size, block_bytes(c)), -- i_size); -- - w->io->op.res.sectors += reserved_sectors; - w->io->op.i_sectors_delta -= dirty_sectors; - w->io->op.new_i_size = i_size; -@@ -669,7 +679,7 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, - folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, - FGP_WRITEBEGIN | fgf_set_order(len), - mapping_gfp_mask(mapping)); -- if (IS_ERR_OR_NULL(folio)) -+ if (IS_ERR(folio)) - goto err_unlock; - - offset = pos - folio_pos(folio); -diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c -index 6d3a05ae5da8..535bc5fcbcc0 100644 ---- a/fs/bcachefs/fs-io-direct.c -+++ b/fs/bcachefs/fs-io-direct.c -@@ -70,8 +70,10 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) - struct bch_io_opts opts; - struct dio_read *dio; - struct bio *bio; -+ struct blk_plug plug; - loff_t offset = req->ki_pos; - bool sync = is_sync_kiocb(req); -+ bool split = false; - size_t shorten; - ssize_t ret; - -@@ -98,8 +100,6 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) - GFP_KERNEL, - &c->dio_read_bioset); - -- bio->bi_end_io = bch2_direct_IO_read_endio; -- - dio = container_of(bio, struct dio_read, rbio.bio); - closure_init(&dio->cl, NULL); - -@@ -128,14 +128,17 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) - */ - dio->should_dirty = iter_is_iovec(iter); - -+ blk_start_plug(&plug); -+ - goto start; - while (iter->count) { -+ split = true; -+ - bio = bio_alloc_bioset(NULL, - bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), - REQ_OP_READ, - GFP_KERNEL, - &c->bio_read); -- bio->bi_end_io = bch2_direct_IO_read_split_endio; - start: - bio->bi_opf = REQ_OP_READ|REQ_SYNC; - bio->bi_iter.bi_sector = offset >> 9; -@@ -157,9 +160,19 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) - if (iter->count) - closure_get(&dio->cl); - -- bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); -+ struct bch_read_bio *rbio = -+ rbio_init(bio, -+ c, -+ opts, -+ split -+ ? bch2_direct_IO_read_split_endio -+ : bch2_direct_IO_read_endio); -+ -+ bch2_read(c, rbio, inode_inum(inode)); - } - -+ blk_finish_plug(&plug); -+ - iter->count += shorten; - - if (sync) { -@@ -506,8 +519,8 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio) - dio->op.devs_need_flush = &inode->ei_devs_need_flush; - - if (sync) -- dio->op.flags |= BCH_WRITE_SYNC; -- dio->op.flags |= BCH_WRITE_CHECK_ENOSPC; -+ dio->op.flags |= BCH_WRITE_sync; -+ dio->op.flags |= BCH_WRITE_check_enospc; - - ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, - bio_sectors(bio), true); -diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c -index 1d4910ea0f1d..e072900e6a5b 100644 ---- a/fs/bcachefs/fs-io-pagecache.c -+++ b/fs/bcachefs/fs-io-pagecache.c -@@ -29,7 +29,7 @@ int bch2_filemap_get_contig_folios_d(struct address_space *mapping, - break; - - f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp); -- if (IS_ERR_OR_NULL(f)) -+ if (IS_ERR(f)) - break; - - BUG_ON(fs->nr && folio_pos(f) != pos); -@@ -199,7 +199,7 @@ int bch2_folio_set(struct bch_fs *c, subvol_inum inum, - unsigned folio_idx = 0; - - return bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, -+ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, - POS(inum.inum, offset), - POS(inum.inum, U64_MAX), - inum.subvol, BTREE_ITER_slots, k, ({ -diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c -index 2456c41b215e..94bf34b9b65f 100644 ---- a/fs/bcachefs/fs-io.c -+++ b/fs/bcachefs/fs-io.c -@@ -167,6 +167,34 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, - - /* fsync: */ - -+static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_inum inum, -+ u64 *seq) -+{ -+ struct printbuf buf = PRINTBUF; -+ struct bch_inode_unpacked u; -+ struct btree_iter iter; -+ int ret = bch2_inode_peek(trans, &iter, &u, inum, 0); -+ if (ret) -+ return ret; -+ -+ u64 cur_seq = journal_cur_seq(&trans->c->journal); -+ *seq = min(cur_seq, u.bi_journal_seq); -+ -+ if (fsck_err_on(u.bi_journal_seq > cur_seq, -+ trans, inode_journal_seq_in_future, -+ "inode journal seq in future (currently at %llu)\n%s", -+ cur_seq, -+ (bch2_inode_unpacked_to_text(&buf, &u), -+ buf.buf))) { -+ u.bi_journal_seq = cur_seq; -+ ret = bch2_inode_write(trans, &iter, &u); -+ } -+fsck_err: -+ bch2_trans_iter_exit(trans, &iter); -+ printbuf_exit(&buf); -+ return ret; -+} -+ - /* - * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an - * insert trigger: look up the btree inode instead -@@ -180,9 +208,10 @@ static int bch2_flush_inode(struct bch_fs *c, - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) - return -EROFS; - -- struct bch_inode_unpacked u; -- int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?: -- bch2_journal_flush_seq(&c->journal, u.bi_journal_seq, TASK_INTERRUPTIBLE) ?: -+ u64 seq; -+ int ret = bch2_trans_commit_do(c, NULL, NULL, 0, -+ bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?: -+ bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?: - bch2_inode_flush_nocow_writes(c, inode); - bch2_write_ref_put(c, BCH_WRITE_REF_fsync); - return ret; -@@ -222,7 +251,7 @@ static inline int range_has_data(struct bch_fs *c, u32 subvol, - struct bpos end) - { - return bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, start, end, -+ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, start, end, - subvol, 0, k, ({ - bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k); - }))); -@@ -256,7 +285,7 @@ static int __bch2_truncate_folio(struct bch_inode_info *inode, - - folio = __filemap_get_folio(mapping, index, - FGP_LOCK|FGP_CREAT, GFP_KERNEL); -- if (IS_ERR_OR_NULL(folio)) { -+ if (IS_ERR(folio)) { - ret = -ENOMEM; - goto out; - } -@@ -806,7 +835,7 @@ static int quota_reserve_range(struct bch_inode_info *inode, - u64 sectors = end - start; - - int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, -+ for_each_btree_key_in_subvolume_max(trans, iter, - BTREE_ID_extents, - POS(inode->v.i_ino, start), - POS(inode->v.i_ino, end - 1), -@@ -877,11 +906,18 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, - bch2_mark_pagecache_unallocated(src, pos_src >> 9, - (pos_src + aligned_len) >> 9); - -+ /* -+ * XXX: we'd like to be telling bch2_remap_range() if we have -+ * permission to write to the source file, and thus if io path option -+ * changes should be propagated through the copy, but we need mnt_idmap -+ * from the pathwalk, awkward -+ */ - ret = bch2_remap_range(c, - inode_inum(dst), pos_dst >> 9, - inode_inum(src), pos_src >> 9, - aligned_len >> 9, -- pos_dst + len, &i_sectors_delta); -+ pos_dst + len, &i_sectors_delta, -+ false); - if (ret < 0) - goto err; - -@@ -922,7 +958,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) - return -ENXIO; - - int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, -+ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, - POS(inode->v.i_ino, offset >> 9), - POS(inode->v.i_ino, U64_MAX), - inum.subvol, 0, k, ({ -@@ -958,7 +994,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) - return -ENXIO; - - int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, -+ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, - POS(inode->v.i_ino, offset >> 9), - POS(inode->v.i_ino, U64_MAX), - inum.subvol, BTREE_ITER_slots, k, ({ -diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c -index 405cf08bda34..15725b4ce393 100644 ---- a/fs/bcachefs/fs-ioctl.c -+++ b/fs/bcachefs/fs-ioctl.c -@@ -406,7 +406,7 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, - sync_inodes_sb(c->vfs_sb); - up_read(&c->vfs_sb->s_umount); - } --retry: -+ - if (arg.src_ptr) { - error = user_path_at(arg.dirfd, - (const char __user *)(unsigned long)arg.src_ptr, -@@ -486,11 +486,6 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, - err2: - if (arg.src_ptr) - path_put(&src_path); -- -- if (retry_estale(error, lookup_flags)) { -- lookup_flags |= LOOKUP_REVAL; -- goto retry; -- } - err1: - return error; - } -diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c -index a41d0d8a2f7b..90ade8f648d9 100644 ---- a/fs/bcachefs/fs.c -+++ b/fs/bcachefs/fs.c -@@ -23,6 +23,7 @@ - #include "journal.h" - #include "keylist.h" - #include "quota.h" -+#include "rebalance.h" - #include "snapshot.h" - #include "super.h" - #include "xattr.h" -@@ -38,6 +39,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -65,6 +67,9 @@ void bch2_inode_update_after_write(struct btree_trans *trans, - i_gid_write(&inode->v, bi->bi_gid); - inode->v.i_mode = bi->bi_mode; - -+ if (fields & ATTR_SIZE) -+ i_size_write(&inode->v, bi->bi_size); -+ - if (fields & ATTR_ATIME) - inode_set_atime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_atime)); - if (fields & ATTR_MTIME) -@@ -89,10 +94,25 @@ int __must_check bch2_write_inode(struct bch_fs *c, - retry: - bch2_trans_begin(trans); - -- ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), -- BTREE_ITER_intent) ?: -- (set ? set(trans, inode, &inode_u, p) : 0) ?: -- bch2_inode_write(trans, &iter, &inode_u) ?: -+ ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent); -+ if (ret) -+ goto err; -+ -+ struct bch_extent_rebalance old_r = bch2_inode_rebalance_opts_get(c, &inode_u); -+ -+ ret = (set ? set(trans, inode, &inode_u, p) : 0); -+ if (ret) -+ goto err; -+ -+ struct bch_extent_rebalance new_r = bch2_inode_rebalance_opts_get(c, &inode_u); -+ -+ if (memcmp(&old_r, &new_r, sizeof(new_r))) { -+ ret = bch2_set_rebalance_needs_scan_trans(trans, inode_u.bi_inum); -+ if (ret) -+ goto err; -+ } -+ -+ ret = bch2_inode_write(trans, &iter, &inode_u) ?: - bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); - - /* -@@ -101,7 +121,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, - */ - if (!ret) - bch2_inode_update_after_write(trans, inode, &inode_u, fields); -- -+err: - bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -@@ -160,8 +180,9 @@ static bool subvol_inum_eq(subvol_inum a, subvol_inum b) - static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed) - { - const subvol_inum *inum = data; -+ siphash_key_t k = { .key[0] = seed }; - -- return jhash(&inum->inum, sizeof(inum->inum), seed); -+ return siphash_2u64(inum->subvol, inum->inum, &k); - } - - static u32 bch2_vfs_inode_obj_hash_fn(const void *data, u32 len, u32 seed) -@@ -190,11 +211,18 @@ static const struct rhashtable_params bch2_vfs_inodes_params = { - .automatic_shrinking = true, - }; - -+static const struct rhashtable_params bch2_vfs_inodes_by_inum_params = { -+ .head_offset = offsetof(struct bch_inode_info, by_inum_hash), -+ .key_offset = offsetof(struct bch_inode_info, ei_inum.inum), -+ .key_len = sizeof(u64), -+ .automatic_shrinking = true, -+}; -+ - int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) - { - struct bch_fs *c = trans->c; -- struct rhashtable *ht = &c->vfs_inodes_table; -- subvol_inum inum = (subvol_inum) { .inum = p.offset }; -+ struct rhltable *ht = &c->vfs_inodes_by_inum_table; -+ u64 inum = p.offset; - DARRAY(u32) subvols; - int ret = 0; - -@@ -219,15 +247,15 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) - struct rhash_lock_head __rcu *const *bkt; - struct rhash_head *he; - unsigned int hash; -- struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); -+ struct bucket_table *tbl = rht_dereference_rcu(ht->ht.tbl, &ht->ht); - restart: -- hash = rht_key_hashfn(ht, tbl, &inum, bch2_vfs_inodes_params); -+ hash = rht_key_hashfn(&ht->ht, tbl, &inum, bch2_vfs_inodes_by_inum_params); - bkt = rht_bucket(tbl, hash); - do { - struct bch_inode_info *inode; - - rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) { -- if (inode->ei_inum.inum == inum.inum) { -+ if (inode->ei_inum.inum == inum) { - ret = darray_push_gfp(&subvols, inode->ei_inum.subvol, - GFP_NOWAIT|__GFP_NOWARN); - if (ret) { -@@ -248,7 +276,7 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) - /* Ensure we see any new tables. */ - smp_rmb(); - -- tbl = rht_dereference_rcu(tbl->future_tbl, ht); -+ tbl = rht_dereference_rcu(tbl->future_tbl, &ht->ht); - if (unlikely(tbl)) - goto restart; - rcu_read_unlock(); -@@ -327,7 +355,11 @@ static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inod - spin_unlock(&inode->v.i_lock); - - if (remove) { -- int ret = rhashtable_remove_fast(&c->vfs_inodes_table, -+ int ret = rhltable_remove(&c->vfs_inodes_by_inum_table, -+ &inode->by_inum_hash, bch2_vfs_inodes_by_inum_params); -+ BUG_ON(ret); -+ -+ ret = rhashtable_remove_fast(&c->vfs_inodes_table, - &inode->hash, bch2_vfs_inodes_params); - BUG_ON(ret); - inode->v.i_hash.pprev = NULL; -@@ -372,6 +404,11 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, - discard_new_inode(&inode->v); - return old; - } else { -+ int ret = rhltable_insert(&c->vfs_inodes_by_inum_table, -+ &inode->by_inum_hash, -+ bch2_vfs_inodes_by_inum_params); -+ BUG_ON(ret); -+ - inode_fake_hash(&inode->v); - - inode_sb_list_add(&inode->v); -@@ -465,7 +502,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) - struct bch_inode_unpacked inode_u; - struct bch_subvolume subvol; - int ret = lockrestart_do(trans, -- bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: -+ bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: - bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?: - PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)); - bch2_trans_put(trans); -@@ -535,8 +572,7 @@ __bch2_create(struct mnt_idmap *idmap, - inum.subvol = inode_u.bi_subvol ?: dir->ei_inum.subvol; - inum.inum = inode_u.bi_inum; - -- ret = bch2_subvolume_get(trans, inum.subvol, true, -- BTREE_ITER_with_updates, &subvol) ?: -+ ret = bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: - bch2_trans_commit(trans, NULL, &journal_seq, 0); - if (unlikely(ret)) { - bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, -@@ -549,7 +585,7 @@ __bch2_create(struct mnt_idmap *idmap, - - if (!(flags & BCH_CREATE_TMPFILE)) { - bch2_inode_update_after_write(trans, dir, &dir_u, -- ATTR_MTIME|ATTR_CTIME); -+ ATTR_MTIME|ATTR_CTIME|ATTR_SIZE); - mutex_unlock(&dir->ei_update_lock); - } - -@@ -617,7 +653,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, - - struct bch_subvolume subvol; - struct bch_inode_unpacked inode_u; -- ret = bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: -+ ret = bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: - bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?: - PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)); - -@@ -628,7 +664,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, - goto err; - - /* regular files may have hardlinks: */ -- if (bch2_fs_inconsistent_on(bch2_inode_should_have_bp(&inode_u) && -+ if (bch2_fs_inconsistent_on(bch2_inode_should_have_single_bp(&inode_u) && - !bkey_eq(k.k->p, POS(inode_u.bi_dir, inode_u.bi_dir_offset)), - c, - "dirent points to inode that does not point back:\n %s", -@@ -706,7 +742,7 @@ static int __bch2_link(struct bch_fs *c, - - if (likely(!ret)) { - bch2_inode_update_after_write(trans, dir, &dir_u, -- ATTR_MTIME|ATTR_CTIME); -+ ATTR_MTIME|ATTR_CTIME|ATTR_SIZE); - bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME); - } - -@@ -759,7 +795,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, - goto err; - - bch2_inode_update_after_write(trans, dir, &dir_u, -- ATTR_MTIME|ATTR_CTIME); -+ ATTR_MTIME|ATTR_CTIME|ATTR_SIZE); - bch2_inode_update_after_write(trans, inode, &inode_u, - ATTR_MTIME); - -@@ -937,11 +973,11 @@ static int bch2_rename2(struct mnt_idmap *idmap, - dst_inode->v.i_ino != dst_inode_u.bi_inum); - - bch2_inode_update_after_write(trans, src_dir, &src_dir_u, -- ATTR_MTIME|ATTR_CTIME); -+ ATTR_MTIME|ATTR_CTIME|ATTR_SIZE); - - if (src_dir != dst_dir) - bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u, -- ATTR_MTIME|ATTR_CTIME); -+ ATTR_MTIME|ATTR_CTIME|ATTR_SIZE); - - bch2_inode_update_after_write(trans, src_inode, &src_inode_u, - ATTR_CTIME); -@@ -1245,7 +1281,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_buf cur, prev; -- unsigned offset_into_extent, sectors; - bool have_extent = false; - int ret = 0; - -@@ -1278,7 +1313,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - - bch2_btree_iter_set_snapshot(&iter, snapshot); - -- k = bch2_btree_iter_peek_upto(&iter, end); -+ k = bch2_btree_iter_peek_max(&iter, end); - ret = bkey_err(k); - if (ret) - continue; -@@ -1292,9 +1327,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - continue; - } - -- offset_into_extent = iter.pos.offset - -- bkey_start_offset(k.k); -- sectors = k.k->size - offset_into_extent; -+ s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); -+ unsigned sectors = k.k->size - offset_into_extent; - - bch2_bkey_buf_reassemble(&cur, c, k); - -@@ -1306,7 +1340,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - k = bkey_i_to_s_c(cur.k); - bch2_bkey_buf_realloc(&prev, c, k.k->u64s); - -- sectors = min(sectors, k.k->size - offset_into_extent); -+ sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); - - bch2_cut_front(POS(k.k->p.inode, - bkey_start_offset(k.k) + -@@ -1736,7 +1770,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, - bch2_inode_update_after_write(trans, inode, bi, ~0); - - inode->v.i_blocks = bi->bi_sectors; -- inode->v.i_ino = bi->bi_inum; - inode->v.i_rdev = bi->bi_dev; - inode->v.i_generation = bi->bi_generation; - inode->v.i_size = bi->bi_size; -@@ -2200,7 +2233,8 @@ static int bch2_fs_get_tree(struct fs_context *fc) - sb->s_time_gran = c->sb.nsec_per_time_unit; - sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; - sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); -- sb->s_uuid = c->sb.user_uuid; -+ super_set_uuid(sb, c->sb.user_uuid.b, sizeof(c->sb.user_uuid)); -+ super_set_sysfs_name_uuid(sb); - sb->s_shrink->seeks = 0; - c->vfs_sb = sb; - strscpy(sb->s_id, c->name, sizeof(sb->s_id)); -@@ -2345,13 +2379,16 @@ static int bch2_init_fs_context(struct fs_context *fc) - - void bch2_fs_vfs_exit(struct bch_fs *c) - { -+ if (c->vfs_inodes_by_inum_table.ht.tbl) -+ rhltable_destroy(&c->vfs_inodes_by_inum_table); - if (c->vfs_inodes_table.tbl) - rhashtable_destroy(&c->vfs_inodes_table); - } - - int bch2_fs_vfs_init(struct bch_fs *c) - { -- return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params); -+ return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params) ?: -+ rhltable_init(&c->vfs_inodes_by_inum_table, &bch2_vfs_inodes_by_inum_params); - } - - static struct file_system_type bcache_fs_type = { -diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h -index 59f9f7ae728d..dd2198541455 100644 ---- a/fs/bcachefs/fs.h -+++ b/fs/bcachefs/fs.h -@@ -14,6 +14,7 @@ - struct bch_inode_info { - struct inode v; - struct rhash_head hash; -+ struct rhlist_head by_inum_hash; - subvol_inum ei_inum; - - struct list_head ei_vfs_inode_list; -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -index 75c8a97a6954..8fcf7c8e5ede 100644 ---- a/fs/bcachefs/fsck.c -+++ b/fs/bcachefs/fsck.c -@@ -1,6 +1,7 @@ - // SPDX-License-Identifier: GPL-2.0 - - #include "bcachefs.h" -+#include "bcachefs_ioctl.h" - #include "bkey_buf.h" - #include "btree_cache.h" - #include "btree_update.h" -@@ -16,6 +17,7 @@ - #include "recovery_passes.h" - #include "snapshot.h" - #include "super.h" -+#include "thread_with_file.h" - #include "xattr.h" - - #include -@@ -73,7 +75,7 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, - { - u64 sectors = 0; - -- int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, -+ int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(inum, 0, snapshot), - POS(inum, U64_MAX), - 0, k, ({ -@@ -90,7 +92,7 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, - { - u64 subdirs = 0; - -- int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, -+ int ret = for_each_btree_key_max(trans, iter, BTREE_ID_dirents, - SPOS(inum, 0, snapshot), - POS(inum, U64_MAX), - 0, k, ({ -@@ -107,7 +109,7 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol, - u32 *snapshot, u64 *inum) - { - struct bch_subvolume s; -- int ret = bch2_subvolume_get(trans, subvol, false, 0, &s); -+ int ret = bch2_subvolume_get(trans, subvol, false, &s); - - *snapshot = le32_to_cpu(s.snapshot); - *inum = le64_to_cpu(s.inode); -@@ -170,7 +172,7 @@ static int lookup_dirent_in_snapshot(struct btree_trans *trans, - if (ret) - return ret; - -- struct bkey_s_c_dirent d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); -+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - *target = le64_to_cpu(d.v->d_inum); - *type = d.v->d_type; - bch2_trans_iter_exit(trans, &iter); -@@ -203,6 +205,36 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos) - return ret; - } - -+/* -+ * Find any subvolume associated with a tree of snapshots -+ * We can't rely on master_subvol - it might have been deleted. -+ */ -+static int find_snapshot_tree_subvol(struct btree_trans *trans, -+ u32 tree_id, u32 *subvol) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, ret) { -+ if (k.k->type != KEY_TYPE_snapshot) -+ continue; -+ -+ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); -+ if (le32_to_cpu(s.v->tree) != tree_id) -+ continue; -+ -+ if (s.v->subvol) { -+ *subvol = le32_to_cpu(s.v->subvol); -+ goto found; -+ } -+ } -+ ret = -BCH_ERR_ENOENT_no_snapshot_tree_subvol; -+found: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ - /* Get lost+found, create if it doesn't exist: */ - static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - struct bch_inode_unpacked *lostfound, -@@ -210,6 +242,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - { - struct bch_fs *c = trans->c; - struct qstr lostfound_str = QSTR("lost+found"); -+ struct btree_iter lostfound_iter = { NULL }; - u64 inum = 0; - unsigned d_type = 0; - int ret; -@@ -220,20 +253,24 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - if (ret) - return ret; - -- subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; -+ u32 subvolid; -+ ret = find_snapshot_tree_subvol(trans, -+ bch2_snapshot_tree(c, snapshot), &subvolid); -+ bch_err_msg(c, ret, "finding subvol associated with snapshot tree %u", -+ bch2_snapshot_tree(c, snapshot)); -+ if (ret) -+ return ret; - - struct bch_subvolume subvol; -- ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), -- false, 0, &subvol); -- bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u", -- le32_to_cpu(st.master_subvol), snapshot); -+ ret = bch2_subvolume_get(trans, subvolid, false, &subvol); -+ bch_err_msg(c, ret, "looking up subvol %u for snapshot %u", subvolid, snapshot); - if (ret) - return ret; - - if (!subvol.inode) { - struct btree_iter iter; - struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter, -- BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)), -+ BTREE_ID_subvolumes, POS(0, subvolid), - 0, subvolume); - ret = PTR_ERR_OR_ZERO(subvol); - if (ret) -@@ -243,13 +280,16 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - bch2_trans_iter_exit(trans, &iter); - } - -- root_inum.inum = le64_to_cpu(subvol.inode); -+ subvol_inum root_inum = { -+ .subvol = subvolid, -+ .inum = le64_to_cpu(subvol.inode) -+ }; - - struct bch_inode_unpacked root_inode; - struct bch_hash_info root_hash_info; - ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode); - bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", -- root_inum.inum, le32_to_cpu(st.master_subvol)); -+ root_inum.inum, subvolid); - if (ret) - return ret; - -@@ -288,11 +328,16 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, - * XXX: we could have a nicer log message here if we had a nice way to - * walk backpointers to print a path - */ -- bch_notice(c, "creating lost+found in subvol %llu snapshot %u", -- root_inum.subvol, le32_to_cpu(st.root_snapshot)); -+ struct printbuf path = PRINTBUF; -+ ret = bch2_inum_to_path(trans, root_inum, &path); -+ if (ret) -+ goto err; -+ -+ bch_notice(c, "creating %s/lost+found in subvol %llu snapshot %u", -+ path.buf, root_inum.subvol, snapshot); -+ printbuf_exit(&path); - - u64 now = bch2_current_time(c); -- struct btree_iter lostfound_iter = { NULL }; - u64 cpu = raw_smp_processor_id(); - - bch2_inode_init_early(c, lostfound); -@@ -451,7 +496,9 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * - continue; - - struct bch_inode_unpacked child_inode; -- bch2_inode_unpack(k, &child_inode); -+ ret = bch2_inode_unpack(k, &child_inode); -+ if (ret) -+ break; - - if (!inode_should_reattach(&child_inode)) { - ret = maybe_delete_dirent(trans, -@@ -482,6 +529,13 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * - return ret; - } - -+static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bpos pos) -+{ -+ return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); -+} -+ - static int remove_backpointer(struct btree_trans *trans, - struct bch_inode_unpacked *inode) - { -@@ -490,13 +544,11 @@ static int remove_backpointer(struct btree_trans *trans, - - struct bch_fs *c = trans->c; - struct btree_iter iter; -- struct bkey_s_c_dirent d = -- bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, -- SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot), 0, -- dirent); -- int ret = bkey_err(d) ?: -- dirent_points_to_inode(c, d, inode) ?: -- __remove_dirent(trans, d.k->p); -+ struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter, -+ SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot)); -+ int ret = bkey_err(d) ?: -+ dirent_points_to_inode(c, d, inode) ?: -+ __remove_dirent(trans, d.k->p); - bch2_trans_iter_exit(trans, &iter); - return ret; - } -@@ -613,7 +665,7 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 - struct btree_iter iter = {}; - - bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); -- struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter); -+ struct bkey_s_c k = bch2_btree_iter_peek_prev_min(&iter, POS(inum, 0)); - bch2_trans_iter_exit(trans, &iter); - int ret = bkey_err(k); - if (ret) -@@ -780,11 +832,13 @@ struct inode_walker { - struct bpos last_pos; - - DARRAY(struct inode_walker_entry) inodes; -+ snapshot_id_list deletes; - }; - - static void inode_walker_exit(struct inode_walker *w) - { - darray_exit(&w->inodes); -+ darray_exit(&w->deletes); - } - - static struct inode_walker inode_walker_init(void) -@@ -797,9 +851,8 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w, - { - struct bch_inode_unpacked u; - -- BUG_ON(bch2_inode_unpack(inode, &u)); -- -- return darray_push(&w->inodes, ((struct inode_walker_entry) { -+ return bch2_inode_unpack(inode, &u) ?: -+ darray_push(&w->inodes, ((struct inode_walker_entry) { - .inode = u, - .snapshot = inode.k->p.snapshot, - })); -@@ -909,8 +962,9 @@ static int get_visible_inodes(struct btree_trans *trans, - int ret; - - w->inodes.nr = 0; -+ w->deletes.nr = 0; - -- for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), -+ for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, s->pos.snapshot), - BTREE_ITER_all_snapshots, k, ret) { - if (k.k->p.offset != inum) - break; -@@ -918,10 +972,13 @@ static int get_visible_inodes(struct btree_trans *trans, - if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) - continue; - -- if (bkey_is_inode(k.k)) -- add_inode(c, w, k); -+ if (snapshot_list_has_ancestor(c, &w->deletes, k.k->p.snapshot)) -+ continue; - -- if (k.k->p.snapshot >= s->pos.snapshot) -+ ret = bkey_is_inode(k.k) -+ ? add_inode(c, w, k) -+ : snapshot_list_add(c, &w->deletes, k.k->p.snapshot); -+ if (ret) - break; - } - bch2_trans_iter_exit(trans, &iter); -@@ -929,69 +986,16 @@ static int get_visible_inodes(struct btree_trans *trans, - return ret; - } - --static int dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d) --{ -- if (d.v->d_type == DT_SUBVOL) { -- u32 snap; -- u64 inum; -- int ret = subvol_lookup(trans, le32_to_cpu(d.v->d_child_subvol), &snap, &inum); -- if (ret && !bch2_err_matches(ret, ENOENT)) -- return ret; -- return !ret; -- } else { -- struct btree_iter iter; -- struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -- SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0); -- int ret = bkey_err(k); -- if (ret) -- return ret; -- -- ret = bkey_is_inode(k.k); -- bch2_trans_iter_exit(trans, &iter); -- return ret; -- } --} -- - /* - * Prefer to delete the first one, since that will be the one at the wrong - * offset: - * return value: 0 -> delete k1, 1 -> delete k2 - */ --static int hash_pick_winner(struct btree_trans *trans, -- const struct bch_hash_desc desc, -- struct bch_hash_info *hash_info, -- struct bkey_s_c k1, -- struct bkey_s_c k2) --{ -- if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) && -- !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k))) -- return 0; -- -- switch (desc.btree_id) { -- case BTREE_ID_dirents: { -- int ret = dirent_has_target(trans, bkey_s_c_to_dirent(k1)); -- if (ret < 0) -- return ret; -- if (!ret) -- return 0; -- -- ret = dirent_has_target(trans, bkey_s_c_to_dirent(k2)); -- if (ret < 0) -- return ret; -- if (!ret) -- return 1; -- return 2; -- } -- default: -- return 0; -- } --} -- --static int fsck_update_backpointers(struct btree_trans *trans, -- struct snapshots_seen *s, -- const struct bch_hash_desc desc, -- struct bch_hash_info *hash_info, -- struct bkey_i *new) -+int bch2_fsck_update_backpointers(struct btree_trans *trans, -+ struct snapshots_seen *s, -+ const struct bch_hash_desc desc, -+ struct bch_hash_info *hash_info, -+ struct bkey_i *new) - { - if (new->k.type != KEY_TYPE_dirent) - return 0; -@@ -1019,160 +1023,6 @@ static int fsck_update_backpointers(struct btree_trans *trans, - return ret; - } - --static int fsck_rename_dirent(struct btree_trans *trans, -- struct snapshots_seen *s, -- const struct bch_hash_desc desc, -- struct bch_hash_info *hash_info, -- struct bkey_s_c_dirent old) --{ -- struct qstr old_name = bch2_dirent_get_name(old); -- struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32); -- int ret = PTR_ERR_OR_ZERO(new); -- if (ret) -- return ret; -- -- bkey_dirent_init(&new->k_i); -- dirent_copy_target(new, old); -- new->k.p = old.k->p; -- -- for (unsigned i = 0; i < 1000; i++) { -- unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u", -- old_name.len, old_name.name, i); -- unsigned u64s = BKEY_U64s + dirent_val_u64s(len); -- -- if (u64s > U8_MAX) -- return -EINVAL; -- -- new->k.u64s = u64s; -- -- ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, -- (subvol_inum) { 0, old.k->p.inode }, -- old.k->p.snapshot, &new->k_i, -- BTREE_UPDATE_internal_snapshot_node); -- if (!bch2_err_matches(ret, EEXIST)) -- break; -- } -- -- if (ret) -- return ret; -- -- return fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); --} -- --static int hash_check_key(struct btree_trans *trans, -- struct snapshots_seen *s, -- const struct bch_hash_desc desc, -- struct bch_hash_info *hash_info, -- struct btree_iter *k_iter, struct bkey_s_c hash_k) --{ -- struct bch_fs *c = trans->c; -- struct btree_iter iter = { NULL }; -- struct printbuf buf = PRINTBUF; -- struct bkey_s_c k; -- u64 hash; -- int ret = 0; -- -- if (hash_k.k->type != desc.key_type) -- return 0; -- -- hash = desc.hash_bkey(hash_info, hash_k); -- -- if (likely(hash == hash_k.k->p.offset)) -- return 0; -- -- if (hash_k.k->p.offset < hash) -- goto bad_hash; -- -- for_each_btree_key_norestart(trans, iter, desc.btree_id, -- SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), -- BTREE_ITER_slots, k, ret) { -- if (bkey_eq(k.k->p, hash_k.k->p)) -- break; -- -- if (k.k->type == desc.key_type && -- !desc.cmp_bkey(k, hash_k)) -- goto duplicate_entries; -- -- if (bkey_deleted(k.k)) { -- bch2_trans_iter_exit(trans, &iter); -- goto bad_hash; -- } -- } --out: -- bch2_trans_iter_exit(trans, &iter); -- printbuf_exit(&buf); -- return ret; --bad_hash: -- if (fsck_err(trans, hash_table_key_wrong_offset, -- "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s", -- bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, -- (printbuf_reset(&buf), -- bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { -- struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k); -- if (IS_ERR(new)) -- return PTR_ERR(new); -- -- k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info, -- (subvol_inum) { 0, hash_k.k->p.inode }, -- hash_k.k->p.snapshot, new, -- STR_HASH_must_create| -- BTREE_ITER_with_updates| -- BTREE_UPDATE_internal_snapshot_node); -- ret = bkey_err(k); -- if (ret) -- goto out; -- if (k.k) -- goto duplicate_entries; -- -- ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, -- BTREE_UPDATE_internal_snapshot_node) ?: -- fsck_update_backpointers(trans, s, desc, hash_info, new) ?: -- bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -- -BCH_ERR_transaction_restart_nested; -- goto out; -- } --fsck_err: -- goto out; --duplicate_entries: -- ret = hash_pick_winner(trans, desc, hash_info, hash_k, k); -- if (ret < 0) -- goto out; -- -- if (!fsck_err(trans, hash_table_key_duplicate, -- "duplicate hash table keys%s:\n%s", -- ret != 2 ? "" : ", both point to valid inodes", -- (printbuf_reset(&buf), -- bch2_bkey_val_to_text(&buf, c, hash_k), -- prt_newline(&buf), -- bch2_bkey_val_to_text(&buf, c, k), -- buf.buf))) -- goto out; -- -- switch (ret) { -- case 0: -- ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); -- break; -- case 1: -- ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0); -- break; -- case 2: -- ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?: -- bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); -- goto out; -- } -- -- ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: -- -BCH_ERR_transaction_restart_nested; -- goto out; --} -- --static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bpos pos) --{ -- return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); --} -- - static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, - struct btree_iter *iter, - struct bch_inode_unpacked *inode, -@@ -1260,12 +1110,43 @@ static int get_snapshot_root_inode(struct btree_trans *trans, - goto err; - BUG(); - found_root: -- BUG_ON(bch2_inode_unpack(k, root)); -+ ret = bch2_inode_unpack(k, root); - err: - bch2_trans_iter_exit(trans, &iter); - return ret; - } - -+static int check_directory_size(struct btree_trans *trans, -+ struct bch_inode_unpacked *inode_u, -+ struct bkey_s_c inode_k, bool *write_inode) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ u64 new_size = 0; -+ int ret; -+ -+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents, -+ SPOS(inode_k.k->p.offset, 0, inode_k.k->p.snapshot), -+ POS(inode_k.k->p.offset, U64_MAX), -+ 0, k, ret) { -+ if (k.k->type != KEY_TYPE_dirent) -+ continue; -+ -+ struct bkey_s_c_dirent dirent = bkey_s_c_to_dirent(k); -+ struct qstr name = bch2_dirent_get_name(dirent); -+ -+ new_size += dirent_occupied_size(&name); -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (!ret && inode_u->bi_size != new_size) { -+ inode_u->bi_size = new_size; -+ *write_inode = true; -+ } -+ -+ return ret; -+} -+ - static int check_inode(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k, -@@ -1291,7 +1172,9 @@ static int check_inode(struct btree_trans *trans, - if (!bkey_is_inode(k.k)) - return 0; - -- BUG_ON(bch2_inode_unpack(k, &u)); -+ ret = bch2_inode_unpack(k, &u); -+ if (ret) -+ goto err; - - if (snapshot_root->bi_inum != u.bi_inum) { - ret = get_snapshot_root_inode(trans, snapshot_root, u.bi_inum); -@@ -1302,7 +1185,7 @@ static int check_inode(struct btree_trans *trans, - if (fsck_err_on(u.bi_hash_seed != snapshot_root->bi_hash_seed || - INODE_STR_HASH(&u) != INODE_STR_HASH(snapshot_root), - trans, inode_snapshot_mismatch, -- "inodes in different snapshots don't match")) { -+ "inode hash info in different snapshots don't match")) { - u.bi_hash_seed = snapshot_root->bi_hash_seed; - SET_INODE_STR_HASH(&u, INODE_STR_HASH(snapshot_root)); - do_update = true; -@@ -1392,7 +1275,7 @@ static int check_inode(struct btree_trans *trans, - - if (fsck_err_on(!ret, - trans, inode_unlinked_and_not_open, -- "inode %llu%u unlinked and not open", -+ "inode %llu:%u unlinked and not open", - u.bi_inum, u.bi_snapshot)) { - ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); - bch_err_msg(c, ret, "in fsck deleting inode"); -@@ -1415,7 +1298,7 @@ static int check_inode(struct btree_trans *trans, - if (u.bi_subvol) { - struct bch_subvolume s; - -- ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s); -+ ret = bch2_subvolume_get(trans, u.bi_subvol, false, &s); - if (ret && !bch2_err_matches(ret, ENOENT)) - goto err; - -@@ -1441,6 +1324,27 @@ static int check_inode(struct btree_trans *trans, - do_update = true; - } - } -+ -+ if (fsck_err_on(u.bi_journal_seq > journal_cur_seq(&c->journal), -+ trans, inode_journal_seq_in_future, -+ "inode journal seq in future (currently at %llu)\n%s", -+ journal_cur_seq(&c->journal), -+ (printbuf_reset(&buf), -+ bch2_inode_unpacked_to_text(&buf, &u), -+ buf.buf))) { -+ u.bi_journal_seq = journal_cur_seq(&c->journal); -+ do_update = true; -+ } -+ -+ if (S_ISDIR(u.bi_mode)) { -+ ret = check_directory_size(trans, &u, k, &do_update); -+ -+ fsck_err_on(ret, -+ trans, directory_size_mismatch, -+ "directory inode %llu:%u with the mismatch directory size", -+ u.bi_inum, k.k->p.snapshot); -+ ret = 0; -+ } - do_update: - if (do_update) { - ret = __bch2_fsck_write_inode(trans, &u); -@@ -1502,7 +1406,9 @@ static int find_oldest_inode_needs_reattach(struct btree_trans *trans, - break; - - struct bch_inode_unpacked parent_inode; -- bch2_inode_unpack(k, &parent_inode); -+ ret = bch2_inode_unpack(k, &parent_inode); -+ if (ret) -+ break; - - if (!inode_should_reattach(&parent_inode)) - break; -@@ -1525,7 +1431,9 @@ static int check_unreachable_inode(struct btree_trans *trans, - return 0; - - struct bch_inode_unpacked inode; -- BUG_ON(bch2_inode_unpack(k, &inode)); -+ ret = bch2_inode_unpack(k, &inode); -+ if (ret) -+ return ret; - - if (!inode_should_reattach(&inode)) - return 0; -@@ -1649,7 +1557,7 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal - if (i->count != count2) { - bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", - w->last_pos.inode, i->snapshot, i->count, count2); -- return -BCH_ERR_internal_fsck_err; -+ i->count = count2; - } - - if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), -@@ -1753,7 +1661,7 @@ static int overlapping_extents_found(struct btree_trans *trans, - bch2_trans_iter_init(trans, &iter1, btree, pos1, - BTREE_ITER_all_snapshots| - BTREE_ITER_not_extents); -- k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); -+ k1 = bch2_btree_iter_peek_max(&iter1, POS(pos1.inode, U64_MAX)); - ret = bkey_err(k1); - if (ret) - goto err; -@@ -1778,7 +1686,7 @@ static int overlapping_extents_found(struct btree_trans *trans, - while (1) { - bch2_btree_iter_advance(&iter2); - -- k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); -+ k2 = bch2_btree_iter_peek_max(&iter2, POS(pos1.inode, U64_MAX)); - ret = bkey_err(k2); - if (ret) - goto err; -@@ -2156,7 +2064,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, - return __bch2_fsck_write_inode(trans, target); - } - -- if (bch2_inode_should_have_bp(target) && -+ if (bch2_inode_should_have_single_bp(target) && - !fsck_err(trans, inode_wrong_backpointer, - "dirent points to inode that does not point back:\n %s", - (bch2_bkey_val_to_text(&buf, c, d.s_c), -@@ -2480,7 +2388,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, - *hash_info = bch2_hash_info_init(c, &i->inode); - dir->first_this_inode = false; - -- ret = hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k); -+ ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k); - if (ret < 0) - goto err; - if (ret) { -@@ -2519,6 +2427,30 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, - if (ret) - goto err; - } -+ -+ darray_for_each(target->deletes, i) -+ if (fsck_err_on(!snapshot_list_has_id(&s->ids, *i), -+ trans, dirent_to_overwritten_inode, -+ "dirent points to inode overwritten in snapshot %u:\n%s", -+ *i, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), -+ buf.buf))) { -+ struct btree_iter delete_iter; -+ bch2_trans_iter_init(trans, &delete_iter, -+ BTREE_ID_dirents, -+ SPOS(k.k->p.inode, k.k->p.offset, *i), -+ BTREE_ITER_intent); -+ ret = bch2_btree_iter_traverse(&delete_iter) ?: -+ bch2_hash_delete_at(trans, bch2_dirent_hash_desc, -+ hash_info, -+ &delete_iter, -+ BTREE_UPDATE_internal_snapshot_node); -+ bch2_trans_iter_exit(trans, &delete_iter); -+ if (ret) -+ goto err; -+ -+ } - } - - ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); -@@ -2594,7 +2526,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, - *hash_info = bch2_hash_info_init(c, &i->inode); - inode->first_this_inode = false; - -- ret = hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k); -+ ret = bch2_str_hash_check_key(trans, NULL, &bch2_xattr_hash_desc, hash_info, iter, k); - bch_err_fn(c, ret); - return ret; - } -@@ -2774,6 +2706,48 @@ struct pathbuf_entry { - - typedef DARRAY(struct pathbuf_entry) pathbuf; - -+static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p, -+ u32 new_depth) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ SPOS(0, p->inum, p->snapshot), 0); -+ -+ struct bch_inode_unpacked inode; -+ int ret = bkey_err(k) ?: -+ !bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode -+ : bch2_inode_unpack(k, &inode); -+ if (ret) -+ goto err; -+ -+ if (inode.bi_depth != new_depth) { -+ inode.bi_depth = new_depth; -+ ret = __bch2_fsck_write_inode(trans, &inode) ?: -+ bch2_trans_commit(trans, NULL, NULL, 0); -+ } -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth) -+{ -+ u32 restart_count = trans->restart_count; -+ int ret = 0; -+ -+ darray_for_each_reverse(*path, i) { -+ ret = nested_lockrestart_do(trans, -+ bch2_bi_depth_renumber_one(trans, i, new_bi_depth)); -+ bch_err_fn(trans->c, ret); -+ if (ret) -+ break; -+ -+ new_bi_depth++; -+ } -+ -+ return ret ?: trans_was_restarted(trans, restart_count); -+} -+ - static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) - { - darray_for_each(*p, i) -@@ -2783,21 +2757,21 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) - return false; - } - --static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) -+static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) - { - struct bch_fs *c = trans->c; - struct btree_iter inode_iter = {}; -- struct bch_inode_unpacked inode; -+ pathbuf path = {}; - struct printbuf buf = PRINTBUF; - u32 snapshot = inode_k.k->p.snapshot; -+ bool redo_bi_depth = false; -+ u32 min_bi_depth = U32_MAX; - int ret = 0; - -- p->nr = 0; -- -- BUG_ON(bch2_inode_unpack(inode_k, &inode)); -- -- if (!S_ISDIR(inode.bi_mode)) -- return 0; -+ struct bch_inode_unpacked inode; -+ ret = bch2_inode_unpack(inode_k, &inode); -+ if (ret) -+ return ret; - - while (!inode.bi_subvol) { - struct btree_iter dirent_iter; -@@ -2807,7 +2781,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino - d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); - ret = bkey_err(d.s_c); - if (ret && !bch2_err_matches(ret, ENOENT)) -- break; -+ goto out; - - if (!ret && (ret = dirent_points_to_inode(c, d, &inode))) - bch2_trans_iter_exit(trans, &dirent_iter); -@@ -2822,7 +2796,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino - - bch2_trans_iter_exit(trans, &dirent_iter); - -- ret = darray_push(p, ((struct pathbuf_entry) { -+ ret = darray_push(&path, ((struct pathbuf_entry) { - .inum = inode.bi_inum, - .snapshot = snapshot, - })); -@@ -2834,22 +2808,32 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino - bch2_trans_iter_exit(trans, &inode_iter); - inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, - SPOS(0, inode.bi_dir, snapshot), 0); -+ -+ struct bch_inode_unpacked parent_inode; - ret = bkey_err(inode_k) ?: - !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode -- : bch2_inode_unpack(inode_k, &inode); -+ : bch2_inode_unpack(inode_k, &parent_inode); - if (ret) { - /* Should have been caught in dirents pass */ - bch_err_msg(c, ret, "error looking up parent directory"); -- break; -+ goto out; - } - -+ min_bi_depth = parent_inode.bi_depth; -+ -+ if (parent_inode.bi_depth < inode.bi_depth && -+ min_bi_depth < U16_MAX) -+ break; -+ -+ inode = parent_inode; - snapshot = inode_k.k->p.snapshot; -+ redo_bi_depth = true; - -- if (path_is_dup(p, inode.bi_inum, snapshot)) { -+ if (path_is_dup(&path, inode.bi_inum, snapshot)) { - /* XXX print path */ - bch_err(c, "directory structure loop"); - -- darray_for_each(*p, i) -+ darray_for_each(path, i) - pr_err("%llu:%u", i->inum, i->snapshot); - pr_err("%llu:%u", inode.bi_inum, snapshot); - -@@ -2862,12 +2846,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino - ret = reattach_inode(trans, &inode); - bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); - } -- break; -+ -+ goto out; - } - } -+ -+ if (inode.bi_subvol) -+ min_bi_depth = 0; -+ -+ if (redo_bi_depth) -+ ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth); - out: - fsck_err: - bch2_trans_iter_exit(trans, &inode_iter); -+ darray_exit(&path); - printbuf_exit(&buf); - bch_err_fn(c, ret); - return ret; -@@ -2879,24 +2871,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino - */ - int bch2_check_directory_structure(struct bch_fs *c) - { -- pathbuf path = { 0, }; -- int ret; -- -- ret = bch2_trans_run(c, -+ int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, - BTREE_ITER_intent| - BTREE_ITER_prefetch| - BTREE_ITER_all_snapshots, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -- if (!bkey_is_inode(k.k)) -+ if (!S_ISDIR(bkey_inode_mode(k))) - continue; - - if (bch2_inode_flags(k) & BCH_INODE_unlinked) - continue; - -- check_path(trans, &path, k); -+ check_path_loop(trans, k); - }))); -- darray_exit(&path); - - bch_err_fn(c, ret); - return ret; -@@ -2994,7 +2982,9 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, - - /* Should never fail, checked by bch2_inode_invalid: */ - struct bch_inode_unpacked u; -- BUG_ON(bch2_inode_unpack(k, &u)); -+ _ret3 = bch2_inode_unpack(k, &u); -+ if (_ret3) -+ break; - - /* - * Backpointer and directory structure checks are sufficient for -@@ -3072,7 +3062,9 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite - if (!bkey_is_inode(k.k)) - return 0; - -- BUG_ON(bch2_inode_unpack(k, &u)); -+ ret = bch2_inode_unpack(k, &u); -+ if (ret) -+ return ret; - - if (S_ISDIR(u.bi_mode)) - return 0; -@@ -3194,3 +3186,223 @@ int bch2_fix_reflink_p(struct bch_fs *c) - bch_err_fn(c, ret); - return ret; - } -+ -+#ifndef NO_BCACHEFS_CHARDEV -+ -+struct fsck_thread { -+ struct thread_with_stdio thr; -+ struct bch_fs *c; -+ struct bch_opts opts; -+}; -+ -+static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr) -+{ -+ struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr); -+ kfree(thr); -+} -+ -+static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio) -+{ -+ struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); -+ struct bch_fs *c = thr->c; -+ -+ int ret = PTR_ERR_OR_ZERO(c); -+ if (ret) -+ return ret; -+ -+ ret = bch2_fs_start(thr->c); -+ if (ret) -+ goto err; -+ -+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) { -+ bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name); -+ ret |= 1; -+ } -+ if (test_bit(BCH_FS_error, &c->flags)) { -+ bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name); -+ ret |= 4; -+ } -+err: -+ bch2_fs_stop(c); -+ return ret; -+} -+ -+static const struct thread_with_stdio_ops bch2_offline_fsck_ops = { -+ .exit = bch2_fsck_thread_exit, -+ .fn = bch2_fsck_offline_thread_fn, -+}; -+ -+long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) -+{ -+ struct bch_ioctl_fsck_offline arg; -+ struct fsck_thread *thr = NULL; -+ darray_str(devs) = {}; -+ long ret = 0; -+ -+ if (copy_from_user(&arg, user_arg, sizeof(arg))) -+ return -EFAULT; -+ -+ if (arg.flags) -+ return -EINVAL; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ for (size_t i = 0; i < arg.nr_devs; i++) { -+ u64 dev_u64; -+ ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64)); -+ if (ret) -+ goto err; -+ -+ char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX); -+ ret = PTR_ERR_OR_ZERO(dev_str); -+ if (ret) -+ goto err; -+ -+ ret = darray_push(&devs, dev_str); -+ if (ret) { -+ kfree(dev_str); -+ goto err; -+ } -+ } -+ -+ thr = kzalloc(sizeof(*thr), GFP_KERNEL); -+ if (!thr) { -+ ret = -ENOMEM; -+ goto err; -+ } -+ -+ thr->opts = bch2_opts_empty(); -+ -+ if (arg.opts) { -+ char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); -+ ret = PTR_ERR_OR_ZERO(optstr) ?: -+ bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr); -+ if (!IS_ERR(optstr)) -+ kfree(optstr); -+ -+ if (ret) -+ goto err; -+ } -+ -+ opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio); -+ opt_set(thr->opts, read_only, 1); -+ opt_set(thr->opts, ratelimit_errors, 0); -+ -+ /* We need request_key() to be called before we punt to kthread: */ -+ opt_set(thr->opts, nostart, true); -+ -+ bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); -+ -+ thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); -+ -+ if (!IS_ERR(thr->c) && -+ thr->c->opts.errors == BCH_ON_ERROR_panic) -+ thr->c->opts.errors = BCH_ON_ERROR_ro; -+ -+ ret = __bch2_run_thread_with_stdio(&thr->thr); -+out: -+ darray_for_each(devs, i) -+ kfree(*i); -+ darray_exit(&devs); -+ return ret; -+err: -+ if (thr) -+ bch2_fsck_thread_exit(&thr->thr); -+ pr_err("ret %s", bch2_err_str(ret)); -+ goto out; -+} -+ -+static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio) -+{ -+ struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); -+ struct bch_fs *c = thr->c; -+ -+ c->stdio_filter = current; -+ c->stdio = &thr->thr.stdio; -+ -+ /* -+ * XXX: can we figure out a way to do this without mucking with c->opts? -+ */ -+ unsigned old_fix_errors = c->opts.fix_errors; -+ if (opt_defined(thr->opts, fix_errors)) -+ c->opts.fix_errors = thr->opts.fix_errors; -+ else -+ c->opts.fix_errors = FSCK_FIX_ask; -+ -+ c->opts.fsck = true; -+ set_bit(BCH_FS_fsck_running, &c->flags); -+ -+ c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; -+ int ret = bch2_run_online_recovery_passes(c); -+ -+ clear_bit(BCH_FS_fsck_running, &c->flags); -+ bch_err_fn(c, ret); -+ -+ c->stdio = NULL; -+ c->stdio_filter = NULL; -+ c->opts.fix_errors = old_fix_errors; -+ -+ up(&c->online_fsck_mutex); -+ bch2_ro_ref_put(c); -+ return ret; -+} -+ -+static const struct thread_with_stdio_ops bch2_online_fsck_ops = { -+ .exit = bch2_fsck_thread_exit, -+ .fn = bch2_fsck_online_thread_fn, -+}; -+ -+long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg) -+{ -+ struct fsck_thread *thr = NULL; -+ long ret = 0; -+ -+ if (arg.flags) -+ return -EINVAL; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if (!bch2_ro_ref_tryget(c)) -+ return -EROFS; -+ -+ if (down_trylock(&c->online_fsck_mutex)) { -+ bch2_ro_ref_put(c); -+ return -EAGAIN; -+ } -+ -+ thr = kzalloc(sizeof(*thr), GFP_KERNEL); -+ if (!thr) { -+ ret = -ENOMEM; -+ goto err; -+ } -+ -+ thr->c = c; -+ thr->opts = bch2_opts_empty(); -+ -+ if (arg.opts) { -+ char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); -+ -+ ret = PTR_ERR_OR_ZERO(optstr) ?: -+ bch2_parse_mount_opts(c, &thr->opts, NULL, optstr); -+ if (!IS_ERR(optstr)) -+ kfree(optstr); -+ -+ if (ret) -+ goto err; -+ } -+ -+ ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_online_fsck_ops); -+err: -+ if (ret < 0) { -+ bch_err_fn(c, ret); -+ if (thr) -+ bch2_fsck_thread_exit(&thr->thr); -+ up(&c->online_fsck_mutex); -+ bch2_ro_ref_put(c); -+ } -+ return ret; -+} -+ -+#endif /* NO_BCACHEFS_CHARDEV */ -diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h -index 1cca31011530..574948278cd4 100644 ---- a/fs/bcachefs/fsck.h -+++ b/fs/bcachefs/fsck.h -@@ -2,6 +2,14 @@ - #ifndef _BCACHEFS_FSCK_H - #define _BCACHEFS_FSCK_H - -+#include "str_hash.h" -+ -+int bch2_fsck_update_backpointers(struct btree_trans *, -+ struct snapshots_seen *, -+ const struct bch_hash_desc, -+ struct bch_hash_info *, -+ struct bkey_i *); -+ - int bch2_check_inodes(struct bch_fs *); - int bch2_check_extents(struct bch_fs *); - int bch2_check_indirect_extents(struct bch_fs *); -@@ -14,4 +22,7 @@ int bch2_check_directory_structure(struct bch_fs *); - int bch2_check_nlinks(struct bch_fs *); - int bch2_fix_reflink_p(struct bch_fs *); - -+long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *); -+long bch2_ioctl_fsck_online(struct bch_fs *, struct bch_ioctl_fsck_online); -+ - #endif /* _BCACHEFS_FSCK_H */ -diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c -index 039cb7a22244..04ec05206f8c 100644 ---- a/fs/bcachefs/inode.c -+++ b/fs/bcachefs/inode.c -@@ -14,6 +14,7 @@ - #include "extent_update.h" - #include "fs.h" - #include "inode.h" -+#include "opts.h" - #include "str_hash.h" - #include "snapshot.h" - #include "subvolume.h" -@@ -47,10 +48,10 @@ static int inode_decode_field(const u8 *in, const u8 *end, - u8 *p; - - if (in >= end) -- return -1; -+ return -BCH_ERR_inode_unpack_error; - - if (!*in) -- return -1; -+ return -BCH_ERR_inode_unpack_error; - - /* - * position of highest set bit indicates number of bytes: -@@ -60,7 +61,7 @@ static int inode_decode_field(const u8 *in, const u8 *end, - bytes = byte_table[shift - 1]; - - if (in + bytes > end) -- return -1; -+ return -BCH_ERR_inode_unpack_error; - - p = (u8 *) be + 16 - bytes; - memcpy(p, in, bytes); -@@ -176,7 +177,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, - return ret; \ - \ - if (field_bits > sizeof(unpacked->_name) * 8) \ -- return -1; \ -+ return -BCH_ERR_inode_unpack_error; \ - \ - unpacked->_name = field[1]; \ - in += ret; -@@ -217,7 +218,7 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked, - \ - unpacked->_name = v[0]; \ - if (v[1] || v[0] != unpacked->_name) \ -- return -1; \ -+ return -BCH_ERR_inode_unpack_error; \ - fieldnr++; - - BCH_INODE_FIELDS_v2() -@@ -268,7 +269,7 @@ static int bch2_inode_unpack_v3(struct bkey_s_c k, - \ - unpacked->_name = v[0]; \ - if (v[1] || v[0] != unpacked->_name) \ -- return -1; \ -+ return -BCH_ERR_inode_unpack_error; \ - fieldnr++; - - BCH_INODE_FIELDS_v3() -@@ -428,7 +429,7 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) - } - - static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bch_inode_unpacked unpacked; - int ret = 0; -@@ -468,7 +469,7 @@ static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, - } - - int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); - int ret = 0; -@@ -478,13 +479,13 @@ int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, - "invalid str hash type (%llu >= %u)", - INODEv1_STR_HASH(inode.v), BCH_STR_HASH_NR); - -- ret = __bch2_inode_validate(c, k, flags); -+ ret = __bch2_inode_validate(c, k, from); - fsck_err: - return ret; - } - - int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); - int ret = 0; -@@ -494,13 +495,13 @@ int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, - "invalid str hash type (%llu >= %u)", - INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - -- ret = __bch2_inode_validate(c, k, flags); -+ ret = __bch2_inode_validate(c, k, from); - fsck_err: - return ret; - } - - int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); - int ret = 0; -@@ -518,7 +519,7 @@ int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, - "invalid str hash type (%llu >= %u)", - INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - -- ret = __bch2_inode_validate(c, k, flags); -+ ret = __bch2_inode_validate(c, k, from); - fsck_err: - return ret; - } -@@ -617,7 +618,7 @@ bch2_bkey_get_iter_snapshot_parent(struct btree_trans *trans, struct btree_iter - struct bkey_s_c k; - int ret = 0; - -- for_each_btree_key_upto_norestart(trans, *iter, btree, -+ for_each_btree_key_max_norestart(trans, *iter, btree, - bpos_successor(pos), - SPOS(pos.inode, pos.offset, U32_MAX), - flags|BTREE_ITER_all_snapshots, k, ret) -@@ -652,7 +653,7 @@ int __bch2_inode_has_child_snapshots(struct btree_trans *trans, struct bpos pos) - struct bkey_s_c k; - int ret = 0; - -- for_each_btree_key_upto_norestart(trans, iter, -+ for_each_btree_key_max_norestart(trans, iter, - BTREE_ID_inodes, POS(0, pos.offset), bpos_predecessor(pos), - BTREE_ITER_all_snapshots| - BTREE_ITER_with_updates, k, ret) -@@ -779,7 +780,7 @@ int bch2_trigger_inode(struct btree_trans *trans, - } - - int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -@@ -798,6 +799,28 @@ void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, - prt_printf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation)); - } - -+int bch2_inode_alloc_cursor_validate(struct bch_fs *c, struct bkey_s_c k, -+ struct bkey_validate_context from) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(k.k->p.inode != LOGGED_OPS_INUM_inode_cursors, -+ c, inode_alloc_cursor_inode_bad, -+ "k.p.inode bad"); -+fsck_err: -+ return ret; -+} -+ -+void bch2_inode_alloc_cursor_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_inode_alloc_cursor i = bkey_s_c_to_inode_alloc_cursor(k); -+ -+ prt_printf(out, "idx %llu generation %llu", -+ le64_to_cpu(i.v->idx), -+ le64_to_cpu(i.v->gen)); -+} -+ - void bch2_inode_init_early(struct bch_fs *c, - struct bch_inode_unpacked *inode_u) - { -@@ -858,43 +881,78 @@ static inline u32 bkey_generation(struct bkey_s_c k) - } - } - --/* -- * This just finds an empty slot: -- */ --int bch2_inode_create(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bch_inode_unpacked *inode_u, -- u32 snapshot, u64 cpu) -+static struct bkey_i_inode_alloc_cursor * -+bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max) - { - struct bch_fs *c = trans->c; -- struct bkey_s_c k; -- u64 min, max, start, pos, *hint; -- int ret = 0; -- unsigned bits = (c->opts.inodes_32bit ? 31 : 63); - -- if (c->opts.shard_inode_numbers) { -- bits -= c->inode_shard_bits; -+ u64 cursor_idx = c->opts.inodes_32bit ? 0 : cpu + 1; - -- min = (cpu << bits); -- max = (cpu << bits) | ~(ULLONG_MAX << bits); -+ cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits); - -- min = max_t(u64, min, BLOCKDEV_INODE_MAX); -- hint = c->unused_inode_hints + cpu; -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, -+ BTREE_ID_logged_ops, -+ POS(LOGGED_OPS_INUM_inode_cursors, cursor_idx), -+ BTREE_ITER_cached); -+ int ret = bkey_err(k); -+ if (ret) -+ return ERR_PTR(ret); -+ -+ struct bkey_i_inode_alloc_cursor *cursor = -+ k.k->type == KEY_TYPE_inode_alloc_cursor -+ ? bch2_bkey_make_mut_typed(trans, &iter, &k, 0, inode_alloc_cursor) -+ : bch2_bkey_alloc(trans, &iter, 0, inode_alloc_cursor); -+ ret = PTR_ERR_OR_ZERO(cursor); -+ if (ret) -+ goto err; -+ -+ if (c->opts.inodes_32bit) { -+ *min = BLOCKDEV_INODE_MAX; -+ *max = INT_MAX; - } else { -- min = BLOCKDEV_INODE_MAX; -- max = ~(ULLONG_MAX << bits); -- hint = c->unused_inode_hints; -+ cursor->v.bits = c->opts.shard_inode_numbers_bits; -+ -+ unsigned bits = 63 - c->opts.shard_inode_numbers_bits; -+ -+ *min = max(cpu << bits, (u64) INT_MAX + 1); -+ *max = (cpu << bits) | ~(ULLONG_MAX << bits); - } - -- start = READ_ONCE(*hint); -+ if (le64_to_cpu(cursor->v.idx) < *min) -+ cursor->v.idx = cpu_to_le64(*min); - -- if (start >= max || start < min) -- start = min; -+ if (le64_to_cpu(cursor->v.idx) >= *max) { -+ cursor->v.idx = cpu_to_le64(*min); -+ le32_add_cpu(&cursor->v.gen, 1); -+ } -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret ? ERR_PTR(ret) : cursor; -+} -+ -+/* -+ * This just finds an empty slot: -+ */ -+int bch2_inode_create(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bch_inode_unpacked *inode_u, -+ u32 snapshot, u64 cpu) -+{ -+ u64 min, max; -+ struct bkey_i_inode_alloc_cursor *cursor = -+ bch2_inode_alloc_cursor_get(trans, cpu, &min, &max); -+ int ret = PTR_ERR_OR_ZERO(cursor); -+ if (ret) -+ return ret; -+ -+ u64 start = le64_to_cpu(cursor->v.idx); -+ u64 pos = start; - -- pos = start; - bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos), - BTREE_ITER_all_snapshots| - BTREE_ITER_intent); -+ struct bkey_s_c k; - again: - while ((k = bch2_btree_iter_peek(iter)).k && - !(ret = bkey_err(k)) && -@@ -924,6 +982,7 @@ int bch2_inode_create(struct btree_trans *trans, - /* Retry from start */ - pos = start = min; - bch2_btree_iter_set_pos(iter, POS(0, pos)); -+ le32_add_cpu(&cursor->v.gen, 1); - goto again; - found_slot: - bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot)); -@@ -934,9 +993,9 @@ int bch2_inode_create(struct btree_trans *trans, - return ret; - } - -- *hint = k.k->p.offset; - inode_u->bi_inum = k.k->p.offset; -- inode_u->bi_generation = bkey_generation(k); -+ inode_u->bi_generation = le64_to_cpu(cursor->v.gen); -+ cursor->v.idx = cpu_to_le64(k.k->p.offset + 1); - return 0; - } - -@@ -966,7 +1025,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, - - bch2_btree_iter_set_snapshot(&iter, snapshot); - -- k = bch2_btree_iter_peek_upto(&iter, end); -+ k = bch2_btree_iter_peek_max(&iter, end); - ret = bkey_err(k); - if (ret) - goto err; -@@ -998,8 +1057,6 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) - { - struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; -- struct bkey_i_inode_generation delete; -- struct bch_inode_unpacked inode_u; - struct bkey_s_c k; - u32 snapshot; - int ret; -@@ -1039,13 +1096,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) - goto err; - } - -- bch2_inode_unpack(k, &inode_u); -- -- bkey_inode_generation_init(&delete.k_i); -- delete.k.p = iter.pos; -- delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); -- -- ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: -+ ret = bch2_btree_delete_at(trans, &iter, 0) ?: - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc); - err: -@@ -1141,12 +1192,17 @@ struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode) - void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, - struct bch_inode_unpacked *inode) - { --#define x(_name, _bits) opts->_name = inode_opt_get(c, inode, _name); -+#define x(_name, _bits) \ -+ if ((inode)->bi_##_name) { \ -+ opts->_name = inode->bi_##_name - 1; \ -+ opts->_name##_from_inode = true; \ -+ } else { \ -+ opts->_name = c->opts._name; \ -+ } - BCH_INODE_OPTS() - #undef x - -- if (opts->nocow) -- opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0; -+ bch2_io_opts_fixups(opts); - } - - int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts) -@@ -1380,7 +1436,8 @@ int bch2_delete_dead_inodes(struct bch_fs *c) - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass); - if (ret > 0) { -- bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); -+ bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", -+ k.k->p.offset, k.k->p.snapshot); - - ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); - /* -diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h -index eab82b5eb897..d2e134528f0e 100644 ---- a/fs/bcachefs/inode.h -+++ b/fs/bcachefs/inode.h -@@ -7,15 +7,14 @@ - #include "opts.h" - #include "snapshot.h" - --enum bch_validate_flags; - extern const char * const bch2_inode_opts[]; - - int bch2_inode_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - int bch2_inode_v2_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - int bch2_inode_v3_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - int __bch2_inode_has_child_snapshots(struct btree_trans *, struct bpos); -@@ -60,7 +59,7 @@ static inline bool bkey_is_inode(const struct bkey *k) - } - - int bch2_inode_generation_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - #define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \ -@@ -69,6 +68,16 @@ void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bk - .min_val_size = 8, \ - }) - -+int bch2_inode_alloc_cursor_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); -+void bch2_inode_alloc_cursor_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+#define bch2_bkey_ops_inode_alloc_cursor ((struct bkey_ops) { \ -+ .key_validate = bch2_inode_alloc_cursor_validate, \ -+ .val_to_text = bch2_inode_alloc_cursor_to_text, \ -+ .min_val_size = 16, \ -+}) -+ - #if 0 - typedef struct { - u64 lo; -@@ -220,6 +229,20 @@ static inline u32 bch2_inode_flags(struct bkey_s_c k) - } - } - -+static inline unsigned bkey_inode_mode(struct bkey_s_c k) -+{ -+ switch (k.k->type) { -+ case KEY_TYPE_inode: -+ return le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode); -+ case KEY_TYPE_inode_v2: -+ return le16_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_mode); -+ case KEY_TYPE_inode_v3: -+ return INODEv3_MODE(bkey_s_c_to_inode_v3(k).v); -+ default: -+ return 0; -+ } -+} -+ - /* i_nlink: */ - - static inline unsigned nlink_bias(umode_t mode) -@@ -249,7 +272,7 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi, - int bch2_inode_nlink_inc(struct bch_inode_unpacked *); - void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *); - --static inline bool bch2_inode_should_have_bp(struct bch_inode_unpacked *inode) -+static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *inode) - { - bool inode_has_bp = inode->bi_dir || inode->bi_dir_offset; - -@@ -262,6 +285,14 @@ void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, - struct bch_inode_unpacked *); - int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *); - -+static inline struct bch_extent_rebalance -+bch2_inode_rebalance_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode) -+{ -+ struct bch_io_opts io_opts; -+ bch2_inode_opts_get(&io_opts, c, inode); -+ return io_opts_to_rebalance_opts(&io_opts); -+} -+ - int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32); - int bch2_delete_dead_inodes(struct bch_fs *); - -diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h -index 7928d0c6954f..b99a5bf1a75e 100644 ---- a/fs/bcachefs/inode_format.h -+++ b/fs/bcachefs/inode_format.h -@@ -101,7 +101,9 @@ struct bch_inode_generation { - x(bi_dir_offset, 64) \ - x(bi_subvol, 32) \ - x(bi_parent_subvol, 32) \ -- x(bi_nocow, 8) -+ x(bi_nocow, 8) \ -+ x(bi_depth, 32) \ -+ x(bi_inodes_32bit, 8) - - /* subset of BCH_INODE_FIELDS */ - #define BCH_INODE_OPTS() \ -@@ -114,7 +116,8 @@ struct bch_inode_generation { - x(foreground_target, 16) \ - x(background_target, 16) \ - x(erasure_code, 16) \ -- x(nocow, 8) -+ x(nocow, 8) \ -+ x(inodes_32bit, 8) - - enum inode_opt_id { - #define x(name, ...) \ -@@ -164,4 +167,12 @@ LE64_BITMASK(INODEv3_FIELDS_START, - struct bch_inode_v3, bi_flags, 31, 36); - LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52); - -+struct bch_inode_alloc_cursor { -+ struct bch_val v; -+ __u8 bits; -+ __u8 pad; -+ __le32 gen; -+ __le64 idx; -+}; -+ - #endif /* _BCACHEFS_INODE_FORMAT_H */ -diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c -index f283051758d6..5353979117b0 100644 ---- a/fs/bcachefs/io_misc.c -+++ b/fs/bcachefs/io_misc.c -@@ -113,11 +113,13 @@ int bch2_extent_fallocate(struct btree_trans *trans, - err: - if (!ret && sectors_allocated) - bch2_increment_clock(c, sectors_allocated, WRITE); -- if (should_print_err(ret)) -- bch_err_inum_offset_ratelimited(c, -- inum.inum, -- iter->pos.offset << 9, -- "%s(): error: %s", __func__, bch2_err_str(ret)); -+ if (should_print_err(ret)) { -+ struct printbuf buf = PRINTBUF; -+ bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9); -+ prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret)); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ } - err_noprint: - bch2_open_buckets_put(c, &open_buckets); - bch2_disk_reservation_put(c, &disk_res); -@@ -164,9 +166,9 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, - bch2_btree_iter_set_snapshot(iter, snapshot); - - /* -- * peek_upto() doesn't have ideal semantics for extents: -+ * peek_max() doesn't have ideal semantics for extents: - */ -- k = bch2_btree_iter_peek_upto(iter, end_pos); -+ k = bch2_btree_iter_peek_max(iter, end_pos); - if (!k.k) - break; - -@@ -426,8 +428,8 @@ case LOGGED_OP_FINSERT_shift_extents: - bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot)); - - k = insert -- ? bch2_btree_iter_peek_prev(&iter) -- : bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX)); -+ ? bch2_btree_iter_peek_prev_min(&iter, POS(inum.inum, 0)) -+ : bch2_btree_iter_peek_max(&iter, POS(inum.inum, U64_MAX)); - if ((ret = bkey_err(k))) - goto btree_err; - -@@ -461,7 +463,7 @@ case LOGGED_OP_FINSERT_shift_extents: - - op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); - -- ret = bch2_bkey_set_needs_rebalance(c, copy, &opts) ?: -+ ret = bch2_bkey_set_needs_rebalance(c, &opts, copy) ?: - bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: - bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: - bch2_logged_op_update(trans, &op->k_i) ?: -diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c -index b3b934a87c6d..6276f375dbc9 100644 ---- a/fs/bcachefs/io_read.c -+++ b/fs/bcachefs/io_read.c -@@ -21,6 +21,7 @@ - #include "io_read.h" - #include "io_misc.h" - #include "io_write.h" -+#include "reflink.h" - #include "subvolume.h" - #include "trace.h" - -@@ -79,6 +80,7 @@ struct promote_op { - struct rhash_head hash; - struct bpos pos; - -+ struct work_struct work; - struct data_update write; - struct bio_vec bi_inline_vecs[]; /* must be last */ - }; -@@ -90,16 +92,41 @@ static const struct rhashtable_params bch_promote_params = { - .automatic_shrinking = true, - }; - -+static inline bool have_io_error(struct bch_io_failures *failed) -+{ -+ return failed && failed->nr; -+} -+ -+static bool ptr_being_rewritten(struct bch_read_bio *orig, -+ unsigned dev, -+ unsigned flags) -+{ -+ if (!(flags & BCH_READ_data_update)) -+ return false; -+ -+ struct data_update *u = container_of(orig, struct data_update, rbio); -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(u->k.k)); -+ unsigned i = 0; -+ bkey_for_each_ptr(ptrs, ptr) { -+ if (ptr->dev == dev && -+ u->data_opts.rewrite_ptrs & BIT(i)) -+ return true; -+ i++; -+ } -+ -+ return false; -+} -+ - static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, - struct bpos pos, - struct bch_io_opts opts, - unsigned flags, - struct bch_io_failures *failed) - { -- if (!failed) { -+ if (!have_io_error(failed)) { - BUG_ON(!opts.promote_target); - -- if (!(flags & BCH_READ_MAY_PROMOTE)) -+ if (!(flags & BCH_READ_may_promote)) - return -BCH_ERR_nopromote_may_not; - - if (bch2_bkey_has_target(c, k, opts.promote_target)) -@@ -119,98 +146,94 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, - return 0; - } - --static void promote_free(struct bch_fs *c, struct promote_op *op) -+static noinline void promote_free(struct bch_read_bio *rbio) - { -- int ret; -+ struct promote_op *op = container_of(rbio, struct promote_op, write.rbio); -+ struct bch_fs *c = rbio->c; -+ -+ int ret = rhashtable_remove_fast(&c->promote_table, &op->hash, -+ bch_promote_params); -+ BUG_ON(ret); - - bch2_data_update_exit(&op->write); - -- ret = rhashtable_remove_fast(&c->promote_table, &op->hash, -- bch_promote_params); -- BUG_ON(ret); - bch2_write_ref_put(c, BCH_WRITE_REF_promote); - kfree_rcu(op, rcu); - } - - static void promote_done(struct bch_write_op *wop) - { -- struct promote_op *op = -- container_of(wop, struct promote_op, write.op); -- struct bch_fs *c = op->write.op.c; -+ struct promote_op *op = container_of(wop, struct promote_op, write.op); -+ struct bch_fs *c = op->write.rbio.c; - -- bch2_time_stats_update(&c->times[BCH_TIME_data_promote], -- op->start_time); -- promote_free(c, op); -+ bch2_time_stats_update(&c->times[BCH_TIME_data_promote], op->start_time); -+ promote_free(&op->write.rbio); - } - --static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) -+static void promote_start_work(struct work_struct *work) - { -- struct bio *bio = &op->write.op.wbio.bio; -+ struct promote_op *op = container_of(work, struct promote_op, work); - -- trace_and_count(op->write.op.c, read_promote, &rbio->bio); -+ bch2_data_update_read_done(&op->write); -+} - -- /* we now own pages: */ -- BUG_ON(!rbio->bounce); -- BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs); -+static noinline void promote_start(struct bch_read_bio *rbio) -+{ -+ struct promote_op *op = container_of(rbio, struct promote_op, write.rbio); - -- memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, -- sizeof(struct bio_vec) * rbio->bio.bi_vcnt); -- swap(bio->bi_vcnt, rbio->bio.bi_vcnt); -+ trace_and_count(op->write.op.c, read_promote, &rbio->bio); - -- bch2_data_update_read_done(&op->write, rbio->pick.crc); -+ INIT_WORK(&op->work, promote_start_work); -+ queue_work(rbio->c->write_ref_wq, &op->work); - } - --static struct promote_op *__promote_alloc(struct btree_trans *trans, -- enum btree_id btree_id, -- struct bkey_s_c k, -- struct bpos pos, -- struct extent_ptr_decoded *pick, -- struct bch_io_opts opts, -- unsigned sectors, -- struct bch_read_bio **rbio, -- struct bch_io_failures *failed) -+static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, -+ enum btree_id btree_id, -+ struct bkey_s_c k, -+ struct bpos pos, -+ struct extent_ptr_decoded *pick, -+ unsigned sectors, -+ unsigned flags, -+ struct bch_read_bio *orig, -+ struct bch_io_failures *failed) - { - struct bch_fs *c = trans->c; -- struct promote_op *op = NULL; -- struct bio *bio; -- unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); - int ret; - -- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) -- return ERR_PTR(-BCH_ERR_nopromote_no_writes); -+ struct data_update_opts update_opts = { .write_flags = BCH_WRITE_alloc_nowait }; - -- op = kzalloc(struct_size(op, bi_inline_vecs, pages), GFP_KERNEL); -- if (!op) { -- ret = -BCH_ERR_nopromote_enomem; -- goto err; -- } -+ if (!have_io_error(failed)) { -+ update_opts.target = orig->opts.promote_target; -+ update_opts.extra_replicas = 1; -+ update_opts.write_flags |= BCH_WRITE_cached; -+ update_opts.write_flags |= BCH_WRITE_only_specified_devs; -+ } else { -+ update_opts.target = orig->opts.foreground_target; - -- op->start_time = local_clock(); -- op->pos = pos; -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ unsigned ptr_bit = 1; -+ bkey_for_each_ptr(ptrs, ptr) { -+ if (bch2_dev_io_failures(failed, ptr->dev) && -+ !ptr_being_rewritten(orig, ptr->dev, flags)) -+ update_opts.rewrite_ptrs |= ptr_bit; -+ ptr_bit <<= 1; -+ } - -- /* -- * We don't use the mempool here because extents that aren't -- * checksummed or compressed can be too big for the mempool: -- */ -- *rbio = kzalloc(sizeof(struct bch_read_bio) + -- sizeof(struct bio_vec) * pages, -- GFP_KERNEL); -- if (!*rbio) { -- ret = -BCH_ERR_nopromote_enomem; -- goto err; -+ if (!update_opts.rewrite_ptrs) -+ return NULL; - } - -- rbio_init(&(*rbio)->bio, opts); -- bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0); -+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) -+ return ERR_PTR(-BCH_ERR_nopromote_no_writes); - -- if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, GFP_KERNEL)) { -+ struct promote_op *op = kzalloc(sizeof(*op), GFP_KERNEL); -+ if (!op) { - ret = -BCH_ERR_nopromote_enomem; -- goto err; -+ goto err_put; - } - -- (*rbio)->bounce = true; -- (*rbio)->split = true; -- (*rbio)->kmalloc = true; -+ op->start_time = local_clock(); -+ op->pos = pos; - - if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, - bch_promote_params)) { -@@ -218,64 +241,43 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, - goto err; - } - -- bio = &op->write.op.wbio.bio; -- bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); -- -- struct data_update_opts update_opts = {}; -- -- if (!failed) { -- update_opts.target = opts.promote_target; -- update_opts.extra_replicas = 1; -- update_opts.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED; -- } else { -- update_opts.target = opts.foreground_target; -- -- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- unsigned i = 0; -- bkey_for_each_ptr(ptrs, ptr) { -- if (bch2_dev_io_failures(failed, ptr->dev)) -- update_opts.rewrite_ptrs |= BIT(i); -- i++; -- } -- } -- - ret = bch2_data_update_init(trans, NULL, NULL, &op->write, - writepoint_hashed((unsigned long) current), -- opts, -+ orig->opts, - update_opts, - btree_id, k); - /* - * possible errors: -BCH_ERR_nocow_lock_blocked, - * -BCH_ERR_ENOSPC_disk_reservation: - */ -- if (ret) { -- BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash, -- bch_promote_params)); -- goto err; -- } -+ if (ret) -+ goto err_remove_hash; - -+ rbio_init_fragment(&op->write.rbio.bio, orig); -+ op->write.rbio.bounce = true; -+ op->write.rbio.promote = true; - op->write.op.end_io = promote_done; - -- return op; -+ return &op->write.rbio; -+err_remove_hash: -+ BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash, -+ bch_promote_params)); - err: -- if (*rbio) -- bio_free_pages(&(*rbio)->bio); -- kfree(*rbio); -- *rbio = NULL; -+ bio_free_pages(&op->write.op.wbio.bio); - /* We may have added to the rhashtable and thus need rcu freeing: */ - kfree_rcu(op, rcu); -+err_put: - bch2_write_ref_put(c, BCH_WRITE_REF_promote); - return ERR_PTR(ret); - } - - noinline --static struct promote_op *promote_alloc(struct btree_trans *trans, -+static struct bch_read_bio *promote_alloc(struct btree_trans *trans, - struct bvec_iter iter, - struct bkey_s_c k, - struct extent_ptr_decoded *pick, -- struct bch_io_opts opts, - unsigned flags, -- struct bch_read_bio **rbio, -+ struct bch_read_bio *orig, - bool *bounce, - bool *read_full, - struct bch_io_failures *failed) -@@ -285,7 +287,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, - * if failed != NULL we're not actually doing a promote, we're - * recovering from an io/checksum error - */ -- bool promote_full = (failed || -+ bool promote_full = (have_io_error(failed) || - *read_full || - READ_ONCE(c->opts.promote_whole_extents)); - /* data might have to be decompressed in the write path: */ -@@ -295,18 +297,21 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, - struct bpos pos = promote_full - ? bkey_start_pos(k.k) - : POS(k.k->p.inode, iter.bi_sector); -- struct promote_op *promote; - int ret; - -- ret = should_promote(c, k, pos, opts, flags, failed); -+ ret = should_promote(c, k, pos, orig->opts, flags, failed); - if (ret) - goto nopromote; - -- promote = __promote_alloc(trans, -- k.k->type == KEY_TYPE_reflink_v -- ? BTREE_ID_reflink -- : BTREE_ID_extents, -- k, pos, pick, opts, sectors, rbio, failed); -+ struct bch_read_bio *promote = -+ __promote_alloc(trans, -+ k.k->type == KEY_TYPE_reflink_v -+ ? BTREE_ID_reflink -+ : BTREE_ID_extents, -+ k, pos, pick, sectors, flags, orig, failed); -+ if (!promote) -+ return NULL; -+ - ret = PTR_ERR_OR_ZERO(promote); - if (ret) - goto nopromote; -@@ -321,6 +326,20 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, - - /* Read */ - -+static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out, -+ struct bch_read_bio *rbio, struct bpos read_pos) -+{ -+ return bch2_inum_offset_err_msg_trans(trans, out, -+ (subvol_inum) { rbio->subvol, read_pos.inode }, -+ read_pos.offset << 9); -+} -+ -+static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out, -+ struct bch_read_bio *rbio, struct bpos read_pos) -+{ -+ bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos)); -+} -+ - #define READ_RETRY_AVOID 1 - #define READ_RETRY 2 - #define READ_ERR 3 -@@ -355,20 +374,20 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) - { - BUG_ON(rbio->bounce && !rbio->split); - -- if (rbio->promote) -- promote_free(rbio->c, rbio->promote); -- rbio->promote = NULL; -- -- if (rbio->bounce) -- bch2_bio_free_pages_pool(rbio->c, &rbio->bio); -- - if (rbio->split) { - struct bch_read_bio *parent = rbio->parent; - -- if (rbio->kmalloc) -- kfree(rbio); -- else -+ if (unlikely(rbio->promote)) { -+ if (!rbio->bio.bi_status) -+ promote_start(rbio); -+ else -+ promote_free(rbio); -+ } else { -+ if (rbio->bounce) -+ bch2_bio_free_pages_pool(rbio->c, &rbio->bio); -+ - bio_put(&rbio->bio); -+ } - - rbio = parent; - } -@@ -388,61 +407,47 @@ static void bch2_rbio_done(struct bch_read_bio *rbio) - bio_endio(&rbio->bio); - } - --static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio, -+static noinline void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio, - struct bvec_iter bvec_iter, - struct bch_io_failures *failed, - unsigned flags) - { -+ struct data_update *u = container_of(rbio, struct data_update, rbio); - struct btree_trans *trans = bch2_trans_get(c); -- struct btree_iter iter; -- struct bkey_buf sk; -- struct bkey_s_c k; -- int ret; -- -- flags &= ~BCH_READ_LAST_FRAGMENT; -- flags |= BCH_READ_MUST_CLONE; -- -- bch2_bkey_buf_init(&sk); -- -- bch2_trans_iter_init(trans, &iter, rbio->data_btree, -- rbio->read_pos, BTREE_ITER_slots); - retry: - bch2_trans_begin(trans); -- rbio->bio.bi_status = 0; - -- ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = lockrestart_do(trans, -+ bkey_err(k = bch2_bkey_get_iter(trans, &iter, -+ u->btree_id, bkey_start_pos(&u->k.k->k), -+ 0))); - if (ret) - goto err; - -- bch2_bkey_buf_reassemble(&sk, c, k); -- k = bkey_i_to_s_c(sk.k); -- -- if (!bch2_bkey_matches_ptr(c, k, -- rbio->pick.ptr, -- rbio->data_pos.offset - -- rbio->pick.crc.offset)) { -+ if (!bkey_and_val_eq(k, bkey_i_to_s_c(u->k.k))) { - /* extent we wanted to read no longer exists: */ - rbio->hole = true; -- goto out; -+ goto err; - } - - ret = __bch2_read_extent(trans, rbio, bvec_iter, -- rbio->read_pos, -- rbio->data_btree, -- k, 0, failed, flags); -+ bkey_start_pos(&u->k.k->k), -+ u->btree_id, -+ bkey_i_to_s_c(u->k.k), -+ 0, failed, flags); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ - if (ret == READ_RETRY) - goto retry; - if (ret) -- goto err; --out: -+ rbio->bio.bi_status = BLK_STS_IOERR; -+ -+ BUG_ON(atomic_read(&rbio->bio.__bi_remaining) != 1); - bch2_rbio_done(rbio); -- bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); -- bch2_bkey_buf_exit(&sk, c); -- return; --err: -- rbio->bio.bi_status = BLK_STS_IOERR; -- goto out; - } - - static void bch2_rbio_retry(struct work_struct *work) -@@ -463,21 +468,20 @@ static void bch2_rbio_retry(struct work_struct *work) - if (rbio->retry == READ_RETRY_AVOID) - bch2_mark_io_failure(&failed, &rbio->pick); - -- rbio->bio.bi_status = 0; -+ if (!rbio->split) -+ rbio->bio.bi_status = 0; - - rbio = bch2_rbio_free(rbio); - -- flags |= BCH_READ_IN_RETRY; -- flags &= ~BCH_READ_MAY_PROMOTE; -+ flags |= BCH_READ_in_retry; -+ flags &= ~BCH_READ_may_promote; -+ flags &= ~BCH_READ_last_fragment; -+ flags |= BCH_READ_must_clone; - -- if (flags & BCH_READ_NODECODE) { -+ if (flags & BCH_READ_data_update) - bch2_read_retry_nodecode(c, rbio, iter, &failed, flags); -- } else { -- flags &= ~BCH_READ_LAST_FRAGMENT; -- flags |= BCH_READ_MUST_CLONE; -- -+ else - __bch2_read(c, rbio, iter, inum, &failed, flags); -- } - } - - static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, -@@ -485,7 +489,7 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, - { - rbio->retry = retry; - -- if (rbio->flags & BCH_READ_IN_RETRY) -+ if (rbio->flags & BCH_READ_in_retry) - return; - - if (retry == READ_ERR) { -@@ -499,6 +503,29 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, - } - } - -+static void bch2_read_io_err(struct work_struct *work) -+{ -+ struct bch_read_bio *rbio = -+ container_of(work, struct bch_read_bio, work); -+ struct bio *bio = &rbio->bio; -+ struct bch_fs *c = rbio->c; -+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); -+ prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status)); -+ -+ if (ca) { -+ bch2_io_error(ca, BCH_MEMBER_ERROR_read); -+ bch_err_ratelimited(ca, "%s", buf.buf); -+ } else { -+ bch_err_ratelimited(c, "%s", buf.buf); -+ } -+ -+ printbuf_exit(&buf); -+ bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); -+} -+ - static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, - struct bch_read_bio *rbio) - { -@@ -562,6 +589,73 @@ static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) - __bch2_rbio_narrow_crcs(trans, rbio)); - } - -+static void bch2_read_csum_err(struct work_struct *work) -+{ -+ struct bch_read_bio *rbio = -+ container_of(work, struct bch_read_bio, work); -+ struct bch_fs *c = rbio->c; -+ struct bio *src = &rbio->bio; -+ struct bch_extent_crc_unpacked crc = rbio->pick.crc; -+ struct nonce nonce = extent_nonce(rbio->version, crc); -+ struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); -+ prt_str(&buf, "data "); -+ bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); -+ -+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; -+ if (ca) { -+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -+ bch_err_ratelimited(ca, "%s", buf.buf); -+ } else { -+ bch_err_ratelimited(c, "%s", buf.buf); -+ } -+ -+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); -+ printbuf_exit(&buf); -+} -+ -+static void bch2_read_decompress_err(struct work_struct *work) -+{ -+ struct bch_read_bio *rbio = -+ container_of(work, struct bch_read_bio, work); -+ struct bch_fs *c = rbio->c; -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); -+ prt_str(&buf, "decompression error"); -+ -+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; -+ if (ca) -+ bch_err_ratelimited(ca, "%s", buf.buf); -+ else -+ bch_err_ratelimited(c, "%s", buf.buf); -+ -+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); -+ printbuf_exit(&buf); -+} -+ -+static void bch2_read_decrypt_err(struct work_struct *work) -+{ -+ struct bch_read_bio *rbio = -+ container_of(work, struct bch_read_bio, work); -+ struct bch_fs *c = rbio->c; -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); -+ prt_str(&buf, "decrypt error"); -+ -+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; -+ if (ca) -+ bch_err_ratelimited(ca, "%s", buf.buf); -+ else -+ bch_err_ratelimited(c, "%s", buf.buf); -+ -+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); -+ printbuf_exit(&buf); -+} -+ - /* Inner part that may run in process context */ - static void __bch2_read_endio(struct work_struct *work) - { -@@ -602,32 +696,40 @@ static void __bch2_read_endio(struct work_struct *work) - if (unlikely(rbio->narrow_crcs)) - bch2_rbio_narrow_crcs(rbio); - -- if (rbio->flags & BCH_READ_NODECODE) -- goto nodecode; -+ if (likely(!(rbio->flags & BCH_READ_data_update))) { -+ /* Adjust crc to point to subset of data we want: */ -+ crc.offset += rbio->offset_into_extent; -+ crc.live_size = bvec_iter_sectors(rbio->bvec_iter); - -- /* Adjust crc to point to subset of data we want: */ -- crc.offset += rbio->offset_into_extent; -- crc.live_size = bvec_iter_sectors(rbio->bvec_iter); -+ if (crc_is_compressed(crc)) { -+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); -+ if (ret) -+ goto decrypt_err; - -- if (crc_is_compressed(crc)) { -- ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); -- if (ret) -- goto decrypt_err; -+ if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && -+ !c->opts.no_data_io) -+ goto decompression_err; -+ } else { -+ /* don't need to decrypt the entire bio: */ -+ nonce = nonce_add(nonce, crc.offset << 9); -+ bio_advance(src, crc.offset << 9); - -- if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && -- !c->opts.no_data_io) -- goto decompression_err; -- } else { -- /* don't need to decrypt the entire bio: */ -- nonce = nonce_add(nonce, crc.offset << 9); -- bio_advance(src, crc.offset << 9); -+ BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); -+ src->bi_iter.bi_size = dst_iter.bi_size; - -- BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); -- src->bi_iter.bi_size = dst_iter.bi_size; -+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); -+ if (ret) -+ goto decrypt_err; - -- ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); -- if (ret) -- goto decrypt_err; -+ if (rbio->bounce) { -+ struct bvec_iter src_iter = src->bi_iter; -+ -+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter); -+ } -+ } -+ } else { -+ if (rbio->split) -+ rbio->parent->pick = rbio->pick; - - if (rbio->bounce) { - struct bvec_iter src_iter = src->bi_iter; -@@ -644,12 +746,9 @@ static void __bch2_read_endio(struct work_struct *work) - ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); - if (ret) - goto decrypt_err; -- -- promote_start(rbio->promote, rbio); -- rbio->promote = NULL; - } --nodecode: -- if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) { -+ -+ if (likely(!(rbio->flags & BCH_READ_in_retry))) { - rbio = bch2_rbio_free(rbio); - bch2_rbio_done(rbio); - } -@@ -662,39 +761,19 @@ static void __bch2_read_endio(struct work_struct *work) - * reading into buffers owned by userspace (that userspace can - * scribble over) - retry the read, bouncing it this time: - */ -- if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) { -- rbio->flags |= BCH_READ_MUST_BOUNCE; -+ if (!rbio->bounce && (rbio->flags & BCH_READ_user_mapped)) { -+ rbio->flags |= BCH_READ_must_bounce; - bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR); - goto out; - } - -- struct printbuf buf = PRINTBUF; -- buf.atomic++; -- prt_str(&buf, "data "); -- bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); -- -- struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; -- if (ca) { -- bch_err_inum_offset_ratelimited(ca, -- rbio->read_pos.inode, -- rbio->read_pos.offset << 9, -- "data %s", buf.buf); -- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -- } -- printbuf_exit(&buf); -- bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); -+ bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); - goto out; - decompression_err: -- bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, -- rbio->read_pos.offset << 9, -- "decompression error"); -- bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); -+ bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); - goto out; - decrypt_err: -- bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, -- rbio->read_pos.offset << 9, -- "decrypt error"); -- bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); -+ bch2_rbio_punt(rbio, bch2_read_decrypt_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); - goto out; - } - -@@ -715,24 +794,16 @@ static void bch2_read_endio(struct bio *bio) - if (!rbio->split) - rbio->bio.bi_end_io = rbio->end_io; - -- if (bio->bi_status) { -- if (ca) { -- bch_err_inum_offset_ratelimited(ca, -- rbio->read_pos.inode, -- rbio->read_pos.offset, -- "data read error: %s", -- bch2_blk_status_to_str(bio->bi_status)); -- bch2_io_error(ca, BCH_MEMBER_ERROR_read); -- } -- bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); -+ if (unlikely(bio->bi_status)) { -+ bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); - return; - } - -- if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || -+ if (((rbio->flags & BCH_READ_retry_if_stale) && race_fault()) || - (ca && dev_ptr_stale(ca, &rbio->pick.ptr))) { - trace_and_count(c, read_reuse_race, &rbio->bio); - -- if (rbio->flags & BCH_READ_RETRY_IF_STALE) -+ if (rbio->flags & BCH_READ_retry_if_stale) - bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN); - else - bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN); -@@ -750,45 +821,6 @@ static void bch2_read_endio(struct bio *bio) - bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); - } - --int __bch2_read_indirect_extent(struct btree_trans *trans, -- unsigned *offset_into_extent, -- struct bkey_buf *orig_k) --{ -- struct btree_iter iter; -- struct bkey_s_c k; -- u64 reflink_offset; -- int ret; -- -- reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + -- *offset_into_extent; -- -- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink, -- POS(0, reflink_offset), 0); -- ret = bkey_err(k); -- if (ret) -- goto err; -- -- if (k.k->type != KEY_TYPE_reflink_v && -- k.k->type != KEY_TYPE_indirect_inline_data) { -- bch_err_inum_offset_ratelimited(trans->c, -- orig_k->k->k.p.inode, -- orig_k->k->k.p.offset << 9, -- "%llu len %u points to nonexistent indirect extent %llu", -- orig_k->k->k.p.offset, -- orig_k->k->k.size, -- reflink_offset); -- bch2_inconsistent_error(trans->c); -- ret = -BCH_ERR_missing_indirect_extent; -- goto err; -- } -- -- *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); -- bch2_bkey_buf_reassemble(orig_k, trans->c, k); --err: -- bch2_trans_iter_exit(trans, &iter); -- return ret; --} -- - static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, - struct bch_dev *ca, - struct bkey_s_c k, -@@ -845,7 +877,6 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - struct bch_fs *c = trans->c; - struct extent_ptr_decoded pick; - struct bch_read_bio *rbio = NULL; -- struct promote_op *promote = NULL; - bool bounce = false, read_full = false, narrow_crcs = false; - struct bpos data_pos = bkey_start_pos(k.k); - int pick_ret; -@@ -868,15 +899,24 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - if (!pick_ret) - goto hole; - -- if (pick_ret < 0) { -+ if (unlikely(pick_ret < 0)) { -+ struct printbuf buf = PRINTBUF; -+ bch2_read_err_msg_trans(trans, &buf, orig, read_pos); -+ prt_printf(&buf, "no device to read from: %s\n ", bch2_err_str(pick_ret)); -+ bch2_bkey_val_to_text(&buf, c, k); -+ -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ goto err; -+ } -+ -+ if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20) { - struct printbuf buf = PRINTBUF; -+ bch2_read_err_msg_trans(trans, &buf, orig, read_pos); -+ prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); - bch2_bkey_val_to_text(&buf, c, k); - -- bch_err_inum_offset_ratelimited(c, -- read_pos.inode, read_pos.offset << 9, -- "no device to read from: %s\n %s", -- bch2_err_str(pick_ret), -- buf.buf); -+ bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); - goto err; - } -@@ -889,7 +929,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - * retry path, don't check here, it'll be caught in bch2_read_endio() - * and we'll end up in the retry path: - */ -- if ((flags & BCH_READ_IN_RETRY) && -+ if ((flags & BCH_READ_in_retry) && - !pick.ptr.cached && - ca && - unlikely(dev_ptr_stale(ca, &pick.ptr))) { -@@ -903,48 +943,52 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - * Unlock the iterator while the btree node's lock is still in - * cache, before doing the IO: - */ -- bch2_trans_unlock(trans); -+ if (!(flags & BCH_READ_in_retry)) -+ bch2_trans_unlock(trans); -+ else -+ bch2_trans_unlock_long(trans); -+ -+ if (!(flags & BCH_READ_data_update)) { -+ if (!(flags & BCH_READ_last_fragment) || -+ bio_flagged(&orig->bio, BIO_CHAIN)) -+ flags |= BCH_READ_must_clone; -+ -+ narrow_crcs = !(flags & BCH_READ_in_retry) && -+ bch2_can_narrow_extent_crcs(k, pick.crc); -+ -+ if (narrow_crcs && (flags & BCH_READ_user_mapped)) -+ flags |= BCH_READ_must_bounce; - -- if (flags & BCH_READ_NODECODE) { -+ EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); -+ -+ if (crc_is_compressed(pick.crc) || -+ (pick.crc.csum_type != BCH_CSUM_none && -+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || -+ (bch2_csum_type_is_encryption(pick.crc.csum_type) && -+ (flags & BCH_READ_user_mapped)) || -+ (flags & BCH_READ_must_bounce)))) { -+ read_full = true; -+ bounce = true; -+ } -+ } else { -+ read_full = true; - /* - * can happen if we retry, and the extent we were going to read - * has been merged in the meantime: - */ -- if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) { -+ struct data_update *u = container_of(orig, struct data_update, rbio); -+ if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) { - if (ca) - percpu_ref_put(&ca->io_ref); - goto hole; - } - - iter.bi_size = pick.crc.compressed_size << 9; -- goto get_bio; -- } -- -- if (!(flags & BCH_READ_LAST_FRAGMENT) || -- bio_flagged(&orig->bio, BIO_CHAIN)) -- flags |= BCH_READ_MUST_CLONE; -- -- narrow_crcs = !(flags & BCH_READ_IN_RETRY) && -- bch2_can_narrow_extent_crcs(k, pick.crc); -- -- if (narrow_crcs && (flags & BCH_READ_USER_MAPPED)) -- flags |= BCH_READ_MUST_BOUNCE; -- -- EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); -- -- if (crc_is_compressed(pick.crc) || -- (pick.crc.csum_type != BCH_CSUM_none && -- (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || -- (bch2_csum_type_is_encryption(pick.crc.csum_type) && -- (flags & BCH_READ_USER_MAPPED)) || -- (flags & BCH_READ_MUST_BOUNCE)))) { -- read_full = true; -- bounce = true; - } - -- if (orig->opts.promote_target)// || failed) -- promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags, -- &rbio, &bounce, &read_full, failed); -+ if (orig->opts.promote_target || have_io_error(failed)) -+ rbio = promote_alloc(trans, iter, k, &pick, flags, orig, -+ &bounce, &read_full, failed); - - if (!read_full) { - EBUG_ON(crc_is_compressed(pick.crc)); -@@ -963,7 +1007,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - pick.crc.offset = 0; - pick.crc.live_size = bvec_iter_sectors(iter); - } --get_bio: -+ - if (rbio) { - /* - * promote already allocated bounce rbio: -@@ -978,17 +1022,16 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - } else if (bounce) { - unsigned sectors = pick.crc.compressed_size; - -- rbio = rbio_init(bio_alloc_bioset(NULL, -+ rbio = rbio_init_fragment(bio_alloc_bioset(NULL, - DIV_ROUND_UP(sectors, PAGE_SECTORS), - 0, - GFP_NOFS, - &c->bio_read_split), -- orig->opts); -+ orig); - - bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9); - rbio->bounce = true; -- rbio->split = true; -- } else if (flags & BCH_READ_MUST_CLONE) { -+ } else if (flags & BCH_READ_must_clone) { - /* - * Have to clone if there were any splits, due to error - * reporting issues (if a split errored, and retrying didn't -@@ -997,11 +1040,10 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - * from the whole bio, in which case we don't want to retry and - * lose the error) - */ -- rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS, -+ rbio = rbio_init_fragment(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS, - &c->bio_read_split), -- orig->opts); -+ orig); - rbio->bio.bi_iter = iter; -- rbio->split = true; - } else { - rbio = orig; - rbio->bio.bi_iter = iter; -@@ -1010,11 +1052,8 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - - EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size); - -- rbio->c = c; - rbio->submit_time = local_clock(); -- if (rbio->split) -- rbio->parent = orig; -- else -+ if (!rbio->split) - rbio->end_io = orig->bio.bi_end_io; - rbio->bvec_iter = iter; - rbio->offset_into_extent= offset_into_extent; -@@ -1024,20 +1063,14 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - rbio->hole = 0; - rbio->retry = 0; - rbio->context = 0; -- /* XXX: only initialize this if needed */ -- rbio->devs_have = bch2_bkey_devs(k); - rbio->pick = pick; - rbio->subvol = orig->subvol; - rbio->read_pos = read_pos; - rbio->data_btree = data_btree; - rbio->data_pos = data_pos; - rbio->version = k.k->bversion; -- rbio->promote = promote; - INIT_WORK(&rbio->work, NULL); - -- if (flags & BCH_READ_NODECODE) -- orig->pick = pick; -- - rbio->bio.bi_opf = orig->bio.bi_opf; - rbio->bio.bi_iter.bi_sector = pick.ptr.offset; - rbio->bio.bi_end_io = bch2_read_endio; -@@ -1052,21 +1085,25 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - * If it's being moved internally, we don't want to flag it as a cache - * hit: - */ -- if (ca && pick.ptr.cached && !(flags & BCH_READ_NODECODE)) -+ if (ca && pick.ptr.cached && !(flags & BCH_READ_data_update)) - bch2_bucket_io_time_reset(trans, pick.ptr.dev, - PTR_BUCKET_NR(ca, &pick.ptr), READ); - -- if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) { -+ if (!(flags & (BCH_READ_in_retry|BCH_READ_last_fragment))) { - bio_inc_remaining(&orig->bio); - trace_and_count(c, read_split, &orig->bio); - } - - if (!rbio->pick.idx) { -- if (!rbio->have_ioref) { -- bch_err_inum_offset_ratelimited(c, -- read_pos.inode, -- read_pos.offset << 9, -- "no device to read from"); -+ if (unlikely(!rbio->have_ioref)) { -+ struct printbuf buf = PRINTBUF; -+ bch2_read_err_msg_trans(trans, &buf, rbio, read_pos); -+ prt_printf(&buf, "no device to read from:\n "); -+ bch2_bkey_val_to_text(&buf, c, k); -+ -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ - bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); - goto out; - } -@@ -1076,10 +1113,10 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - bio_set_dev(&rbio->bio, ca->disk_sb.bdev); - - if (unlikely(c->opts.no_data_io)) { -- if (likely(!(flags & BCH_READ_IN_RETRY))) -+ if (likely(!(flags & BCH_READ_in_retry))) - bio_endio(&rbio->bio); - } else { -- if (likely(!(flags & BCH_READ_IN_RETRY))) -+ if (likely(!(flags & BCH_READ_in_retry))) - submit_bio(&rbio->bio); - else - submit_bio_wait(&rbio->bio); -@@ -1097,11 +1134,11 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - goto out; - } - -- if (likely(!(flags & BCH_READ_IN_RETRY))) -+ if (likely(!(flags & BCH_READ_in_retry))) - bio_endio(&rbio->bio); - } - out: -- if (likely(!(flags & BCH_READ_IN_RETRY))) { -+ if (likely(!(flags & BCH_READ_in_retry))) { - return 0; - } else { - int ret; -@@ -1124,7 +1161,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - } - - err: -- if (flags & BCH_READ_IN_RETRY) -+ if (flags & BCH_READ_in_retry) - return READ_ERR; - - orig->bio.bi_status = BLK_STS_IOERR; -@@ -1132,16 +1169,16 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - - hole: - /* -- * won't normally happen in the BCH_READ_NODECODE -+ * won't normally happen in the BCH_READ_data_update - * (bch2_move_extent()) path, but if we retry and the extent we wanted - * to read no longer exists we have to signal that: - */ -- if (flags & BCH_READ_NODECODE) -+ if (flags & BCH_READ_data_update) - orig->hole = true; - - zero_fill_bio_iter(&orig->bio, iter); - out_read_done: -- if (flags & BCH_READ_LAST_FRAGMENT) -+ if (flags & BCH_READ_last_fragment) - bch2_rbio_done(orig); - return 0; - } -@@ -1156,7 +1193,7 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - struct bkey_s_c k; - int ret; - -- BUG_ON(flags & BCH_READ_NODECODE); -+ BUG_ON(flags & BCH_READ_data_update); - - bch2_bkey_buf_init(&sk); - bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -@@ -1164,7 +1201,6 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - BTREE_ITER_slots); - - while (1) { -- unsigned bytes, sectors, offset_into_extent; - enum btree_id data_btree = BTREE_ID_extents; - - bch2_trans_begin(trans); -@@ -1184,9 +1220,9 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - if (ret) - goto err; - -- offset_into_extent = iter.pos.offset - -+ s64 offset_into_extent = iter.pos.offset - - bkey_start_offset(k.k); -- sectors = k.k->size - offset_into_extent; -+ unsigned sectors = k.k->size - offset_into_extent; - - bch2_bkey_buf_reassemble(&sk, c, k); - -@@ -1201,13 +1237,13 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - * With indirect extents, the amount of data to read is the min - * of the original extent and the indirect extent: - */ -- sectors = min(sectors, k.k->size - offset_into_extent); -+ sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); - -- bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; -+ unsigned bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; - swap(bvec_iter.bi_size, bytes); - - if (bvec_iter.bi_size == bytes) -- flags |= BCH_READ_LAST_FRAGMENT; -+ flags |= BCH_READ_last_fragment; - - ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos, - data_btree, k, -@@ -1215,7 +1251,7 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - if (ret) - goto err; - -- if (flags & BCH_READ_LAST_FRAGMENT) -+ if (flags & BCH_READ_last_fragment) - break; - - swap(bvec_iter.bi_size, bytes); -@@ -1229,16 +1265,20 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - } - - bch2_trans_iter_exit(trans, &iter); -- bch2_trans_put(trans); -- bch2_bkey_buf_exit(&sk, c); - - if (ret) { -- bch_err_inum_offset_ratelimited(c, inum.inum, -- bvec_iter.bi_sector << 9, -- "read error %i from btree lookup", ret); -+ struct printbuf buf = PRINTBUF; -+ bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9); -+ prt_printf(&buf, "read error %i from btree lookup", ret); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ - rbio->bio.bi_status = BLK_STS_IOERR; - bch2_rbio_done(rbio); - } -+ -+ bch2_trans_put(trans); -+ bch2_bkey_buf_exit(&sk, c); - } - - void bch2_fs_io_read_exit(struct bch_fs *c) -diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h -index d9c18bb7d403..f54c9943e34a 100644 ---- a/fs/bcachefs/io_read.h -+++ b/fs/bcachefs/io_read.h -@@ -3,6 +3,7 @@ - #define _BCACHEFS_IO_READ_H - - #include "bkey_buf.h" -+#include "reflink.h" - - struct bch_read_bio { - struct bch_fs *c; -@@ -34,9 +35,9 @@ struct bch_read_bio { - u16 flags; - union { - struct { -- u16 bounce:1, -+ u16 promote:1, -+ bounce:1, - split:1, -- kmalloc:1, - have_ioref:1, - narrow_crcs:1, - hole:1, -@@ -46,8 +47,6 @@ struct bch_read_bio { - u16 _state; - }; - -- struct bch_devs_list devs_have; -- - struct extent_ptr_decoded pick; - - /* -@@ -64,8 +63,6 @@ struct bch_read_bio { - struct bpos data_pos; - struct bversion version; - -- struct promote_op *promote; -- - struct bch_io_opts opts; - - struct work_struct work; -@@ -79,32 +76,54 @@ struct bch_devs_mask; - struct cache_promote_op; - struct extent_ptr_decoded; - --int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, -- struct bkey_buf *); -- - static inline int bch2_read_indirect_extent(struct btree_trans *trans, - enum btree_id *data_btree, -- unsigned *offset_into_extent, -- struct bkey_buf *k) -+ s64 *offset_into_extent, -+ struct bkey_buf *extent) - { -- if (k->k->k.type != KEY_TYPE_reflink_p) -+ if (extent->k->k.type != KEY_TYPE_reflink_p) - return 0; - - *data_btree = BTREE_ID_reflink; -- return __bch2_read_indirect_extent(trans, offset_into_extent, k); -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_lookup_indirect_extent(trans, &iter, -+ offset_into_extent, -+ bkey_i_to_s_c_reflink_p(extent->k), -+ true, 0); -+ int ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ if (bkey_deleted(k.k)) { -+ bch2_trans_iter_exit(trans, &iter); -+ return -BCH_ERR_missing_indirect_extent; -+ } -+ -+ bch2_bkey_buf_reassemble(extent, trans->c, k); -+ bch2_trans_iter_exit(trans, &iter); -+ return 0; - } - -+#define BCH_READ_FLAGS() \ -+ x(retry_if_stale) \ -+ x(may_promote) \ -+ x(user_mapped) \ -+ x(data_update) \ -+ x(last_fragment) \ -+ x(must_bounce) \ -+ x(must_clone) \ -+ x(in_retry) -+ -+enum __bch_read_flags { -+#define x(n) __BCH_READ_##n, -+ BCH_READ_FLAGS() -+#undef x -+}; -+ - enum bch_read_flags { -- BCH_READ_RETRY_IF_STALE = 1 << 0, -- BCH_READ_MAY_PROMOTE = 1 << 1, -- BCH_READ_USER_MAPPED = 1 << 2, -- BCH_READ_NODECODE = 1 << 3, -- BCH_READ_LAST_FRAGMENT = 1 << 4, -- -- /* internal: */ -- BCH_READ_MUST_BOUNCE = 1 << 5, -- BCH_READ_MUST_CLONE = 1 << 6, -- BCH_READ_IN_RETRY = 1 << 7, -+#define x(n) BCH_READ_##n = BIT(__BCH_READ_##n), -+ BCH_READ_FLAGS() -+#undef x - }; - - int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *, -@@ -131,24 +150,39 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - - BUG_ON(rbio->_state); - -- rbio->c = c; -- rbio->start_time = local_clock(); - rbio->subvol = inum.subvol; - - __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed, -- BCH_READ_RETRY_IF_STALE| -- BCH_READ_MAY_PROMOTE| -- BCH_READ_USER_MAPPED); -+ BCH_READ_retry_if_stale| -+ BCH_READ_may_promote| -+ BCH_READ_user_mapped); - } - --static inline struct bch_read_bio *rbio_init(struct bio *bio, -- struct bch_io_opts opts) -+static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, -+ struct bch_read_bio *orig) - { - struct bch_read_bio *rbio = to_rbio(bio); - -+ rbio->c = orig->c; - rbio->_state = 0; -- rbio->promote = NULL; -- rbio->opts = opts; -+ rbio->split = true; -+ rbio->parent = orig; -+ rbio->opts = orig->opts; -+ return rbio; -+} -+ -+static inline struct bch_read_bio *rbio_init(struct bio *bio, -+ struct bch_fs *c, -+ struct bch_io_opts opts, -+ bio_end_io_t end_io) -+{ -+ struct bch_read_bio *rbio = to_rbio(bio); -+ -+ rbio->start_time = local_clock(); -+ rbio->c = c; -+ rbio->_state = 0; -+ rbio->opts = opts; -+ rbio->bio.bi_end_io = end_io; - return rbio; - } - -diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c -index 96720adcfee0..92abc239599d 100644 ---- a/fs/bcachefs/io_write.c -+++ b/fs/bcachefs/io_write.c -@@ -164,7 +164,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, - - bch2_trans_copy_iter(&iter, extent_iter); - -- for_each_btree_key_upto_continue_norestart(iter, -+ for_each_btree_key_max_continue_norestart(iter, - new->k.p, BTREE_ITER_slots, old, ret) { - s64 sectors = min(new->k.p.offset, old.k->p.offset) - - max(bkey_start_offset(&new->k), -@@ -216,6 +216,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, - SPOS(0, - extent_iter->pos.inode, - extent_iter->snapshot), -+ BTREE_ITER_intent| - BTREE_ITER_cached); - int ret = bkey_err(k); - if (unlikely(ret)) -@@ -369,11 +370,11 @@ static int bch2_write_index_default(struct bch_write_op *op) - bkey_start_pos(&sk.k->k), - BTREE_ITER_slots|BTREE_ITER_intent); - -- ret = bch2_bkey_set_needs_rebalance(c, sk.k, &op->opts) ?: -+ ret = bch2_bkey_set_needs_rebalance(c, &op->opts, sk.k) ?: - bch2_extent_update(trans, inum, &iter, sk.k, - &op->res, - op->new_i_size, &op->i_sectors_delta, -- op->flags & BCH_WRITE_CHECK_ENOSPC); -+ op->flags & BCH_WRITE_check_enospc); - bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -@@ -395,6 +396,21 @@ static int bch2_write_index_default(struct bch_write_op *op) - - /* Writes */ - -+static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, -+ u64 offset) -+{ -+ bch2_inum_offset_err_msg(op->c, out, -+ (subvol_inum) { op->subvol, op->pos.inode, }, -+ offset << 9); -+ prt_printf(out, "write error%s: ", -+ op->flags & BCH_WRITE_move ? "(internal move)" : ""); -+} -+ -+void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op) -+{ -+ __bch2_write_op_error(out, op, op->pos.offset); -+} -+ - void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, - enum bch_data_type type, - const struct bkey_i *k, -@@ -467,7 +483,7 @@ static void bch2_write_done(struct closure *cl) - bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time); - bch2_disk_reservation_put(c, &op->res); - -- if (!(op->flags & BCH_WRITE_MOVE)) -+ if (!(op->flags & BCH_WRITE_move)) - bch2_write_ref_put(c, BCH_WRITE_REF_write); - bch2_keylist_free(&op->insert_keys, op->inline_keys); - -@@ -513,7 +529,7 @@ static void __bch2_write_index(struct bch_write_op *op) - unsigned dev; - int ret = 0; - -- if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) { -+ if (unlikely(op->flags & BCH_WRITE_io_error)) { - ret = bch2_write_drop_io_error_ptrs(op); - if (ret) - goto err; -@@ -522,7 +538,7 @@ static void __bch2_write_index(struct bch_write_op *op) - if (!bch2_keylist_empty(keys)) { - u64 sectors_start = keylist_sectors(keys); - -- ret = !(op->flags & BCH_WRITE_MOVE) -+ ret = !(op->flags & BCH_WRITE_move) - ? bch2_write_index_default(op) - : bch2_data_update_index_update(op); - -@@ -531,14 +547,14 @@ static void __bch2_write_index(struct bch_write_op *op) - - op->written += sectors_start - keylist_sectors(keys); - -- if (ret && !bch2_err_matches(ret, EROFS)) { -+ if (unlikely(ret && !bch2_err_matches(ret, EROFS))) { - struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); - -- bch_err_inum_offset_ratelimited(c, -- insert->k.p.inode, insert->k.p.offset << 9, -- "%s write error while doing btree update: %s", -- op->flags & BCH_WRITE_MOVE ? "move" : "user", -- bch2_err_str(ret)); -+ struct printbuf buf = PRINTBUF; -+ __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k)); -+ prt_printf(&buf, "btree update error: %s", bch2_err_str(ret)); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); - } - - if (ret) -@@ -554,7 +570,7 @@ static void __bch2_write_index(struct bch_write_op *op) - err: - keys->top = keys->keys; - op->error = ret; -- op->flags |= BCH_WRITE_SUBMITTED; -+ op->flags |= BCH_WRITE_submitted; - goto out; - } - -@@ -589,8 +605,8 @@ static CLOSURE_CALLBACK(bch2_write_index) - struct workqueue_struct *wq = index_update_wq(op); - unsigned long flags; - -- if ((op->flags & BCH_WRITE_SUBMITTED) && -- (op->flags & BCH_WRITE_MOVE)) -+ if ((op->flags & BCH_WRITE_submitted) && -+ (op->flags & BCH_WRITE_move)) - bch2_bio_free_pages_pool(op->c, &op->wbio.bio); - - spin_lock_irqsave(&wp->writes_lock, flags); -@@ -621,20 +637,18 @@ void bch2_write_point_do_index_updates(struct work_struct *work) - - while (1) { - spin_lock_irq(&wp->writes_lock); -- op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list); -- if (op) -- list_del(&op->wp_list); -+ op = list_pop_entry(&wp->writes, struct bch_write_op, wp_list); - wp_update_state(wp, op != NULL); - spin_unlock_irq(&wp->writes_lock); - - if (!op) - break; - -- op->flags |= BCH_WRITE_IN_WORKER; -+ op->flags |= BCH_WRITE_in_worker; - - __bch2_write_index(op); - -- if (!(op->flags & BCH_WRITE_SUBMITTED)) -+ if (!(op->flags & BCH_WRITE_submitted)) - __bch2_write(op); - else - bch2_write_done(&op->cl); -@@ -658,7 +672,7 @@ static void bch2_write_endio(struct bio *bio) - "data write error: %s", - bch2_blk_status_to_str(bio->bi_status))) { - set_bit(wbio->dev, op->failed.d); -- op->flags |= BCH_WRITE_IO_ERROR; -+ op->flags |= BCH_WRITE_io_error; - } - - if (wbio->nocow) { -@@ -705,7 +719,7 @@ static void init_append_extent(struct bch_write_op *op, - bch2_extent_crc_append(&e->k_i, crc); - - bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size, -- op->flags & BCH_WRITE_CACHED); -+ op->flags & BCH_WRITE_cached); - - bch2_keylist_push(&op->insert_keys); - } -@@ -822,7 +836,7 @@ static enum prep_encoded_ret { - struct bch_fs *c = op->c; - struct bio *bio = &op->wbio.bio; - -- if (!(op->flags & BCH_WRITE_DATA_ENCODED)) -+ if (!(op->flags & BCH_WRITE_data_encoded)) - return PREP_ENCODED_OK; - - BUG_ON(bio_sectors(bio) != op->crc.compressed_size); -@@ -859,7 +873,7 @@ static enum prep_encoded_ret { - if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io) - return PREP_ENCODED_CHECKSUM_ERR; - -- if (bch2_bio_uncompress_inplace(c, bio, &op->crc)) -+ if (bch2_bio_uncompress_inplace(op, bio)) - return PREP_ENCODED_ERR; - } - -@@ -930,9 +944,9 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, - if (ec_buf || - op->compression_opt || - (op->csum_type && -- !(op->flags & BCH_WRITE_PAGES_STABLE)) || -+ !(op->flags & BCH_WRITE_pages_stable)) || - (bch2_csum_type_is_encryption(op->csum_type) && -- !(op->flags & BCH_WRITE_PAGES_OWNED))) { -+ !(op->flags & BCH_WRITE_pages_owned))) { - dst = bch2_write_bio_alloc(c, wp, src, - &page_alloc_failed, - ec_buf); -@@ -952,7 +966,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, - break; - - BUG_ON(op->compression_opt && -- (op->flags & BCH_WRITE_DATA_ENCODED) && -+ (op->flags & BCH_WRITE_data_encoded) && - bch2_csum_type_is_encryption(op->crc.csum_type)); - BUG_ON(op->compression_opt && !bounce); - -@@ -990,7 +1004,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, - } - } - -- if ((op->flags & BCH_WRITE_DATA_ENCODED) && -+ if ((op->flags & BCH_WRITE_data_encoded) && - !crc_is_compressed(crc) && - bch2_csum_type_is_encryption(op->crc.csum_type) == - bch2_csum_type_is_encryption(op->csum_type)) { -@@ -1022,7 +1036,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, - crc.compression_type = compression_type; - crc.nonce = nonce; - } else { -- if ((op->flags & BCH_WRITE_DATA_ENCODED) && -+ if ((op->flags & BCH_WRITE_data_encoded) && - bch2_rechecksum_bio(c, src, version, op->crc, - NULL, &op->crc, - src_len >> 9, -@@ -1080,11 +1094,14 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, - *_dst = dst; - return more; - csum_err: -- bch_err_inum_offset_ratelimited(c, -- op->pos.inode, -- op->pos.offset << 9, -- "%s write error: error verifying existing checksum while rewriting existing data (memory corruption?)", -- op->flags & BCH_WRITE_MOVE ? "move" : "user"); -+ { -+ struct printbuf buf = PRINTBUF; -+ bch2_write_op_error(&buf, op); -+ prt_printf(&buf, "error verifying existing checksum while rewriting existing data (memory corruption?)"); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ } -+ - ret = -EIO; - err: - if (to_wbio(dst)->bounce) -@@ -1165,7 +1182,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) - struct btree_trans *trans = bch2_trans_get(c); - - for_each_keylist_key(&op->insert_keys, orig) { -- int ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents, -+ int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents, - bkey_start_pos(&orig->k), orig->k.p, - BTREE_ITER_intent, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ -@@ -1175,11 +1192,11 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) - if (ret && !bch2_err_matches(ret, EROFS)) { - struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); - -- bch_err_inum_offset_ratelimited(c, -- insert->k.p.inode, insert->k.p.offset << 9, -- "%s write error while doing btree update: %s", -- op->flags & BCH_WRITE_MOVE ? "move" : "user", -- bch2_err_str(ret)); -+ struct printbuf buf = PRINTBUF; -+ __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k)); -+ prt_printf(&buf, "btree update error: %s", bch2_err_str(ret)); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); - } - - if (ret) { -@@ -1193,9 +1210,9 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) - - static void __bch2_nocow_write_done(struct bch_write_op *op) - { -- if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) { -+ if (unlikely(op->flags & BCH_WRITE_io_error)) { - op->error = -EIO; -- } else if (unlikely(op->flags & BCH_WRITE_CONVERT_UNWRITTEN)) -+ } else if (unlikely(op->flags & BCH_WRITE_convert_unwritten)) - bch2_nocow_write_convert_unwritten(op); - } - -@@ -1224,7 +1241,7 @@ static void bch2_nocow_write(struct bch_write_op *op) - struct bucket_to_lock *stale_at; - int stale, ret; - -- if (op->flags & BCH_WRITE_MOVE) -+ if (op->flags & BCH_WRITE_move) - return; - - darray_init(&buckets); -@@ -1282,7 +1299,7 @@ static void bch2_nocow_write(struct bch_write_op *op) - }), GFP_KERNEL|__GFP_NOFAIL); - - if (ptr->unwritten) -- op->flags |= BCH_WRITE_CONVERT_UNWRITTEN; -+ op->flags |= BCH_WRITE_convert_unwritten; - } - - /* Unlock before taking nocow locks, doing IO: */ -@@ -1290,7 +1307,7 @@ static void bch2_nocow_write(struct bch_write_op *op) - bch2_trans_unlock(trans); - - bch2_cut_front(op->pos, op->insert_keys.top); -- if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN) -+ if (op->flags & BCH_WRITE_convert_unwritten) - bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top); - - darray_for_each(buckets, i) { -@@ -1315,7 +1332,7 @@ static void bch2_nocow_write(struct bch_write_op *op) - wbio_init(bio)->put_bio = true; - bio->bi_opf = op->wbio.bio.bi_opf; - } else { -- op->flags |= BCH_WRITE_SUBMITTED; -+ op->flags |= BCH_WRITE_submitted; - } - - op->pos.offset += bio_sectors(bio); -@@ -1329,7 +1346,7 @@ static void bch2_nocow_write(struct bch_write_op *op) - op->insert_keys.top, true); - - bch2_keylist_push(&op->insert_keys); -- if (op->flags & BCH_WRITE_SUBMITTED) -+ if (op->flags & BCH_WRITE_submitted) - break; - bch2_btree_iter_advance(&iter); - } -@@ -1339,23 +1356,25 @@ static void bch2_nocow_write(struct bch_write_op *op) - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - -+ bch2_trans_put(trans); -+ darray_exit(&buckets); -+ - if (ret) { -- bch_err_inum_offset_ratelimited(c, -- op->pos.inode, op->pos.offset << 9, -- "%s: btree lookup error %s", __func__, bch2_err_str(ret)); -+ struct printbuf buf = PRINTBUF; -+ bch2_write_op_error(&buf, op); -+ prt_printf(&buf, "%s(): btree lookup error: %s", __func__, bch2_err_str(ret)); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); - op->error = ret; -- op->flags |= BCH_WRITE_SUBMITTED; -+ op->flags |= BCH_WRITE_submitted; - } - -- bch2_trans_put(trans); -- darray_exit(&buckets); -- - /* fallback to cow write path? */ -- if (!(op->flags & BCH_WRITE_SUBMITTED)) { -+ if (!(op->flags & BCH_WRITE_submitted)) { - closure_sync(&op->cl); - __bch2_nocow_write_done(op); - op->insert_keys.top = op->insert_keys.keys; -- } else if (op->flags & BCH_WRITE_SYNC) { -+ } else if (op->flags & BCH_WRITE_sync) { - closure_sync(&op->cl); - bch2_nocow_write_done(&op->cl.work); - } else { -@@ -1407,7 +1426,7 @@ static void __bch2_write(struct bch_write_op *op) - - if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) { - bch2_nocow_write(op); -- if (op->flags & BCH_WRITE_SUBMITTED) -+ if (op->flags & BCH_WRITE_submitted) - goto out_nofs_restore; - } - again: -@@ -1437,7 +1456,7 @@ static void __bch2_write(struct bch_write_op *op) - ret = bch2_trans_run(c, lockrestart_do(trans, - bch2_alloc_sectors_start_trans(trans, - op->target, -- op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED), -+ op->opts.erasure_code && !(op->flags & BCH_WRITE_cached), - op->write_point, - &op->devs_have, - op->nr_replicas, -@@ -1460,16 +1479,16 @@ static void __bch2_write(struct bch_write_op *op) - bch2_alloc_sectors_done_inlined(c, wp); - err: - if (ret <= 0) { -- op->flags |= BCH_WRITE_SUBMITTED; -- -- if (ret < 0) { -- if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT)) -- bch_err_inum_offset_ratelimited(c, -- op->pos.inode, -- op->pos.offset << 9, -- "%s(): %s error: %s", __func__, -- op->flags & BCH_WRITE_MOVE ? "move" : "user", -- bch2_err_str(ret)); -+ op->flags |= BCH_WRITE_submitted; -+ -+ if (unlikely(ret < 0)) { -+ if (!(op->flags & BCH_WRITE_alloc_nowait)) { -+ struct printbuf buf = PRINTBUF; -+ bch2_write_op_error(&buf, op); -+ prt_printf(&buf, "%s(): %s", __func__, bch2_err_str(ret)); -+ bch_err_ratelimited(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ } - op->error = ret; - break; - } -@@ -1495,14 +1514,14 @@ static void __bch2_write(struct bch_write_op *op) - * synchronously here if we weren't able to submit all of the IO at - * once, as that signals backpressure to the caller. - */ -- if ((op->flags & BCH_WRITE_SYNC) || -- (!(op->flags & BCH_WRITE_SUBMITTED) && -- !(op->flags & BCH_WRITE_IN_WORKER))) { -+ if ((op->flags & BCH_WRITE_sync) || -+ (!(op->flags & BCH_WRITE_submitted) && -+ !(op->flags & BCH_WRITE_in_worker))) { - bch2_wait_on_allocator(c, &op->cl); - - __bch2_write_index(op); - -- if (!(op->flags & BCH_WRITE_SUBMITTED)) -+ if (!(op->flags & BCH_WRITE_submitted)) - goto again; - bch2_write_done(&op->cl); - } else { -@@ -1523,8 +1542,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) - - memset(&op->failed, 0, sizeof(op->failed)); - -- op->flags |= BCH_WRITE_WROTE_DATA_INLINE; -- op->flags |= BCH_WRITE_SUBMITTED; -+ op->flags |= BCH_WRITE_wrote_data_inline; -+ op->flags |= BCH_WRITE_submitted; - - bch2_check_set_feature(op->c, BCH_FEATURE_inline_data); - -@@ -1587,20 +1606,19 @@ CLOSURE_CALLBACK(bch2_write) - BUG_ON(!op->write_point.v); - BUG_ON(bkey_eq(op->pos, POS_MAX)); - -- if (op->flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) -- op->flags |= BCH_WRITE_ALLOC_NOWAIT; -+ if (op->flags & BCH_WRITE_only_specified_devs) -+ op->flags |= BCH_WRITE_alloc_nowait; - - op->nr_replicas_required = min_t(unsigned, op->nr_replicas_required, op->nr_replicas); - op->start_time = local_clock(); - bch2_keylist_init(&op->insert_keys, op->inline_keys); - wbio_init(bio)->put_bio = false; - -- if (bio->bi_iter.bi_size & (c->opts.block_size - 1)) { -- bch_err_inum_offset_ratelimited(c, -- op->pos.inode, -- op->pos.offset << 9, -- "%s write error: misaligned write", -- op->flags & BCH_WRITE_MOVE ? "move" : "user"); -+ if (unlikely(bio->bi_iter.bi_size & (c->opts.block_size - 1))) { -+ struct printbuf buf = PRINTBUF; -+ bch2_write_op_error(&buf, op); -+ prt_printf(&buf, "misaligned write"); -+ printbuf_exit(&buf); - op->error = -EIO; - goto err; - } -@@ -1610,7 +1628,7 @@ CLOSURE_CALLBACK(bch2_write) - goto err; - } - -- if (!(op->flags & BCH_WRITE_MOVE) && -+ if (!(op->flags & BCH_WRITE_move) && - !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) { - op->error = -BCH_ERR_erofs_no_writes; - goto err; -diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h -index 5400ce94ee57..02cca52be0bd 100644 ---- a/fs/bcachefs/io_write.h -+++ b/fs/bcachefs/io_write.h -@@ -20,22 +20,23 @@ static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw - void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, - enum bch_data_type, const struct bkey_i *, bool); - -+void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op); -+ - #define BCH_WRITE_FLAGS() \ -- x(ALLOC_NOWAIT) \ -- x(CACHED) \ -- x(DATA_ENCODED) \ -- x(PAGES_STABLE) \ -- x(PAGES_OWNED) \ -- x(ONLY_SPECIFIED_DEVS) \ -- x(WROTE_DATA_INLINE) \ -- x(FROM_INTERNAL) \ -- x(CHECK_ENOSPC) \ -- x(SYNC) \ -- x(MOVE) \ -- x(IN_WORKER) \ -- x(SUBMITTED) \ -- x(IO_ERROR) \ -- x(CONVERT_UNWRITTEN) -+ x(alloc_nowait) \ -+ x(cached) \ -+ x(data_encoded) \ -+ x(pages_stable) \ -+ x(pages_owned) \ -+ x(only_specified_devs) \ -+ x(wrote_data_inline) \ -+ x(check_enospc) \ -+ x(sync) \ -+ x(move) \ -+ x(in_worker) \ -+ x(submitted) \ -+ x(io_error) \ -+ x(convert_unwritten) - - enum __bch_write_flags { - #define x(f) __BCH_WRITE_##f, -diff --git a/fs/bcachefs/io_write_types.h b/fs/bcachefs/io_write_types.h -index 6e878a6f2f0b..3ef6df9145ef 100644 ---- a/fs/bcachefs/io_write_types.h -+++ b/fs/bcachefs/io_write_types.h -@@ -64,7 +64,7 @@ struct bch_write_op { - struct bpos pos; - struct bversion version; - -- /* For BCH_WRITE_DATA_ENCODED: */ -+ /* For BCH_WRITE_data_encoded: */ - struct bch_extent_crc_unpacked crc; - - struct write_point_specifier write_point; -diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c -index 2dc0d60c1745..cb2c3722f674 100644 ---- a/fs/bcachefs/journal.c -+++ b/fs/bcachefs/journal.c -@@ -113,11 +113,10 @@ journal_seq_to_buf(struct journal *j, u64 seq) - - static void journal_pin_list_init(struct journal_entry_pin_list *p, int count) - { -- unsigned i; -- -- for (i = 0; i < ARRAY_SIZE(p->list); i++) -- INIT_LIST_HEAD(&p->list[i]); -- INIT_LIST_HEAD(&p->flushed); -+ for (unsigned i = 0; i < ARRAY_SIZE(p->unflushed); i++) -+ INIT_LIST_HEAD(&p->unflushed[i]); -+ for (unsigned i = 0; i < ARRAY_SIZE(p->flushed); i++) -+ INIT_LIST_HEAD(&p->flushed[i]); - atomic_set(&p->count, count); - p->devs.nr = 0; - } -@@ -217,6 +216,12 @@ void bch2_journal_buf_put_final(struct journal *j, u64 seq) - if (__bch2_journal_pin_put(j, seq)) - bch2_journal_reclaim_fast(j); - bch2_journal_do_writes(j); -+ -+ /* -+ * for __bch2_next_write_buffer_flush_journal_buf(), when quiescing an -+ * open journal entry -+ */ -+ wake_up(&j->wait); - } - - /* -@@ -251,6 +256,9 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t - if (!__journal_entry_is_open(old)) - return; - -+ if (old.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) -+ old.cur_entry_offset = j->cur_entry_offset_if_blocked; -+ - /* Close out old buffer: */ - buf->data->u64s = cpu_to_le32(old.cur_entry_offset); - -@@ -373,6 +381,10 @@ static int journal_entry_open(struct journal *j) - if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf)) - return JOURNAL_ERR_max_in_flight; - -+ if (bch2_fs_fatal_err_on(journal_cur_seq(j) >= JOURNAL_SEQ_MAX, -+ c, "cannot start: journal seq overflow")) -+ return JOURNAL_ERR_insufficient_devices; /* -EROFS */ -+ - BUG_ON(!j->cur_entry_sectors); - - buf->expires = -@@ -588,6 +600,16 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, - : -BCH_ERR_journal_res_get_blocked; - } - -+static unsigned max_dev_latency(struct bch_fs *c) -+{ -+ u64 nsecs = 0; -+ -+ for_each_rw_member(c, ca) -+ nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration); -+ -+ return nsecs_to_jiffies(nsecs); -+} -+ - /* - * Essentially the entry function to the journaling code. When bcachefs is doing - * a btree insert, it calls this function to get the current journal write. -@@ -599,17 +621,31 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, - * btree node write locks. - */ - int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, -- unsigned flags) -+ unsigned flags, -+ struct btree_trans *trans) - { - int ret; - - if (closure_wait_event_timeout(&j->async_wait, - (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked || - (flags & JOURNAL_RES_GET_NONBLOCK), -- HZ * 10)) -+ HZ)) - return ret; - -+ if (trans) -+ bch2_trans_unlock_long(trans); -+ - struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ int remaining_wait = max(max_dev_latency(c) * 2, HZ * 10); -+ -+ remaining_wait = max(0, remaining_wait - HZ); -+ -+ if (closure_wait_event_timeout(&j->async_wait, -+ (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked || -+ (flags & JOURNAL_RES_GET_NONBLOCK), -+ remaining_wait)) -+ return ret; -+ - struct printbuf buf = PRINTBUF; - bch2_journal_debug_to_text(&buf, j); - bch_err(c, "Journal stuck? Waited for 10 seconds...\n%s", -@@ -664,7 +700,7 @@ void bch2_journal_entry_res_resize(struct journal *j, - * @seq: seq to flush - * @parent: closure object to wait with - * Returns: 1 if @seq has already been flushed, 0 if @seq is being flushed, -- * -EIO if @seq will never be flushed -+ * -BCH_ERR_journal_flush_err if @seq will never be flushed - * - * Like bch2_journal_wait_on_seq, except that it triggers a write immediately if - * necessary -@@ -687,7 +723,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq, - - /* Recheck under lock: */ - if (j->err_seq && seq >= j->err_seq) { -- ret = -EIO; -+ ret = -BCH_ERR_journal_flush_err; - goto out; - } - -@@ -714,7 +750,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq, - * livelock: - */ - sched_annotate_sleep(); -- ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); -+ ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL); - if (ret) - return ret; - -@@ -794,10 +830,11 @@ int bch2_journal_flush(struct journal *j) - } - - /* -- * bch2_journal_noflush_seq - tell the journal not to issue any flushes before -+ * bch2_journal_noflush_seq - ask the journal not to issue any flushes in the -+ * range [start, end) - * @seq - */ --bool bch2_journal_noflush_seq(struct journal *j, u64 seq) -+bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) - { - struct bch_fs *c = container_of(j, struct bch_fs, journal); - u64 unwritten_seq; -@@ -806,15 +843,15 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 seq) - if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush))) - return false; - -- if (seq <= c->journal.flushed_seq_ondisk) -+ if (c->journal.flushed_seq_ondisk >= start) - return false; - - spin_lock(&j->lock); -- if (seq <= c->journal.flushed_seq_ondisk) -+ if (c->journal.flushed_seq_ondisk >= start) - goto out; - - for (unwritten_seq = journal_last_unwritten_seq(j); -- unwritten_seq < seq; -+ unwritten_seq < end; - unwritten_seq++) { - struct journal_buf *buf = journal_seq_to_buf(j, unwritten_seq); - -@@ -831,19 +868,14 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 seq) - return ret; - } - --int bch2_journal_meta(struct journal *j) -+static int __bch2_journal_meta(struct journal *j) - { -- struct journal_buf *buf; -- struct journal_res res; -- int ret; -- -- memset(&res, 0, sizeof(res)); -- -- ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); -+ struct journal_res res = {}; -+ int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL); - if (ret) - return ret; - -- buf = j->buf + (res.seq & JOURNAL_BUF_MASK); -+ struct journal_buf *buf = j->buf + (res.seq & JOURNAL_BUF_MASK); - buf->must_flush = true; - - if (!buf->flush_time) { -@@ -856,27 +888,70 @@ int bch2_journal_meta(struct journal *j) - return bch2_journal_flush_seq(j, res.seq, TASK_UNINTERRUPTIBLE); - } - -+int bch2_journal_meta(struct journal *j) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ -+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_journal)) -+ return -EROFS; -+ -+ int ret = __bch2_journal_meta(j); -+ bch2_write_ref_put(c, BCH_WRITE_REF_journal); -+ return ret; -+} -+ - /* block/unlock the journal: */ - - void bch2_journal_unblock(struct journal *j) - { - spin_lock(&j->lock); -- j->blocked--; -+ if (!--j->blocked && -+ j->cur_entry_offset_if_blocked < JOURNAL_ENTRY_CLOSED_VAL && -+ j->reservations.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) { -+ union journal_res_state old, new; -+ -+ old.v = atomic64_read(&j->reservations.counter); -+ do { -+ new.v = old.v; -+ new.cur_entry_offset = j->cur_entry_offset_if_blocked; -+ } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); -+ } - spin_unlock(&j->lock); - - journal_wake(j); - } - -+static void __bch2_journal_block(struct journal *j) -+{ -+ if (!j->blocked++) { -+ union journal_res_state old, new; -+ -+ old.v = atomic64_read(&j->reservations.counter); -+ do { -+ j->cur_entry_offset_if_blocked = old.cur_entry_offset; -+ -+ if (j->cur_entry_offset_if_blocked >= JOURNAL_ENTRY_CLOSED_VAL) -+ break; -+ -+ new.v = old.v; -+ new.cur_entry_offset = JOURNAL_ENTRY_BLOCKED_VAL; -+ } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); -+ -+ journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset); -+ } -+} -+ - void bch2_journal_block(struct journal *j) - { - spin_lock(&j->lock); -- j->blocked++; -+ __bch2_journal_block(j); - spin_unlock(&j->lock); - - journal_quiesce(j); - } - --static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq) -+static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, -+ u64 max_seq, bool *blocked) - { - struct journal_buf *ret = NULL; - -@@ -893,13 +968,17 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou - struct journal_buf *buf = j->buf + idx; - - if (buf->need_flush_to_write_buffer) { -- if (seq == journal_cur_seq(j)) -- __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); -- - union journal_res_state s; - s.v = atomic64_read_acquire(&j->reservations.counter); - -- ret = journal_state_count(s, idx) -+ unsigned open = seq == journal_cur_seq(j) && __journal_entry_is_open(s); -+ -+ if (open && !*blocked) { -+ __bch2_journal_block(j); -+ *blocked = true; -+ } -+ -+ ret = journal_state_count(s, idx) > open - ? ERR_PTR(-EAGAIN) - : buf; - break; -@@ -912,11 +991,17 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou - return ret; - } - --struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq) -+struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, -+ u64 max_seq, bool *blocked) - { - struct journal_buf *ret; -+ *blocked = false; -+ -+ wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j, -+ max_seq, blocked)) != ERR_PTR(-EAGAIN)); -+ if (IS_ERR_OR_NULL(ret) && *blocked) -+ bch2_journal_unblock(j); - -- wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j, max_seq)) != ERR_PTR(-EAGAIN)); - return ret; - } - -@@ -945,19 +1030,17 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, - } - - for (nr_got = 0; nr_got < nr_want; nr_got++) { -- if (new_fs) { -- bu[nr_got] = bch2_bucket_alloc_new_fs(ca); -- if (bu[nr_got] < 0) { -- ret = -BCH_ERR_ENOSPC_bucket_alloc; -- break; -- } -- } else { -- ob[nr_got] = bch2_bucket_alloc(c, ca, BCH_WATERMARK_normal, -- BCH_DATA_journal, cl); -- ret = PTR_ERR_OR_ZERO(ob[nr_got]); -- if (ret) -- break; -+ enum bch_watermark watermark = new_fs -+ ? BCH_WATERMARK_btree -+ : BCH_WATERMARK_normal; -+ -+ ob[nr_got] = bch2_bucket_alloc(c, ca, watermark, -+ BCH_DATA_journal, cl); -+ ret = PTR_ERR_OR_ZERO(ob[nr_got]); -+ if (ret) -+ break; - -+ if (!new_fs) { - ret = bch2_trans_run(c, - bch2_trans_mark_metadata_bucket(trans, ca, - ob[nr_got]->bucket, BCH_DATA_journal, -@@ -967,9 +1050,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, - bch_err_msg(c, ret, "marking new journal buckets"); - break; - } -- -- bu[nr_got] = ob[nr_got]->bucket; - } -+ -+ bu[nr_got] = ob[nr_got]->bucket; - } - - if (!nr_got) -@@ -1009,8 +1092,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, - if (ret) - goto err_unblock; - -- if (!new_fs) -- bch2_write_super(c); -+ bch2_write_super(c); - - /* Commit: */ - if (c) -@@ -1044,9 +1126,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, - bu[i], BCH_DATA_free, 0, - BTREE_TRIGGER_transactional)); - err_free: -- if (!new_fs) -- for (i = 0; i < nr_got; i++) -- bch2_open_bucket_put(c, ob[i]); -+ for (i = 0; i < nr_got; i++) -+ bch2_open_bucket_put(c, ob[i]); - - kfree(new_bucket_seq); - kfree(new_buckets); -@@ -1193,7 +1274,7 @@ void bch2_fs_journal_stop(struct journal *j) - * Always write a new journal entry, to make sure the clock hands are up - * to date (and match the superblock) - */ -- bch2_journal_meta(j); -+ __bch2_journal_meta(j); - - journal_quiesce(j); - cancel_delayed_work_sync(&j->write_work); -@@ -1217,6 +1298,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) - bool had_entries = false; - u64 last_seq = cur_seq, nr, seq; - -+ if (cur_seq >= JOURNAL_SEQ_MAX) { -+ bch_err(c, "cannot start: journal seq overflow"); -+ return -EINVAL; -+ } -+ - genradix_for_each_reverse(&c->journal_entries, iter, _i) { - i = *_i; - -@@ -1474,6 +1560,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) - case JOURNAL_ENTRY_CLOSED_VAL: - prt_printf(out, "closed\n"); - break; -+ case JOURNAL_ENTRY_BLOCKED_VAL: -+ prt_printf(out, "blocked\n"); -+ break; - default: - prt_printf(out, "%u/%u\n", s.cur_entry_offset, j->cur_entry_u64s); - break; -@@ -1499,6 +1588,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) - printbuf_indent_sub(out, 2); - - for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { -+ if (!ca->mi.durability) -+ continue; -+ - struct journal_device *ja = &ca->journal; - - if (!test_bit(ca->dev_idx, c->rw_devs[BCH_DATA_journal].d)) -@@ -1508,6 +1600,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) - continue; - - prt_printf(out, "dev %u:\n", ca->dev_idx); -+ prt_printf(out, "durability %u:\n", ca->mi.durability); - printbuf_indent_add(out, 2); - prt_printf(out, "nr\t%u\n", ja->nr); - prt_printf(out, "bucket size\t%u\n", ca->mi.bucket_size); -@@ -1519,6 +1612,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) - printbuf_indent_sub(out, 2); - } - -+ prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required); -+ - rcu_read_unlock(); - - --out->atomic; -@@ -1530,54 +1625,3 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) - __bch2_journal_debug_to_text(out, j); - spin_unlock(&j->lock); - } -- --bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq) --{ -- struct journal_entry_pin_list *pin_list; -- struct journal_entry_pin *pin; -- -- spin_lock(&j->lock); -- if (!test_bit(JOURNAL_running, &j->flags)) { -- spin_unlock(&j->lock); -- return true; -- } -- -- *seq = max(*seq, j->pin.front); -- -- if (*seq >= j->pin.back) { -- spin_unlock(&j->lock); -- return true; -- } -- -- out->atomic++; -- -- pin_list = journal_seq_pin(j, *seq); -- -- prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count)); -- printbuf_indent_add(out, 2); -- -- for (unsigned i = 0; i < ARRAY_SIZE(pin_list->list); i++) -- list_for_each_entry(pin, &pin_list->list[i], list) -- prt_printf(out, "\t%px %ps\n", pin, pin->flush); -- -- if (!list_empty(&pin_list->flushed)) -- prt_printf(out, "flushed:\n"); -- -- list_for_each_entry(pin, &pin_list->flushed, list) -- prt_printf(out, "\t%px %ps\n", pin, pin->flush); -- -- printbuf_indent_sub(out, 2); -- -- --out->atomic; -- spin_unlock(&j->lock); -- -- return false; --} -- --void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) --{ -- u64 seq = 0; -- -- while (!bch2_journal_seq_pins_to_text(out, j, &seq)) -- seq++; --} -diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h -index 2762be6f9814..dccddd5420ad 100644 ---- a/fs/bcachefs/journal.h -+++ b/fs/bcachefs/journal.h -@@ -285,7 +285,8 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq - spin_lock(&j->lock); - bch2_journal_buf_put_final(j, seq); - spin_unlock(&j->lock); -- } -+ } else if (unlikely(s.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL)) -+ wake_up(&j->wait); - } - - /* -@@ -311,7 +312,7 @@ static inline void bch2_journal_res_put(struct journal *j, - } - - int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *, -- unsigned); -+ unsigned, struct btree_trans *); - - /* First bits for BCH_WATERMARK: */ - enum journal_res_flags { -@@ -367,7 +368,8 @@ static inline int journal_res_get_fast(struct journal *j, - } - - static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res, -- unsigned u64s, unsigned flags) -+ unsigned u64s, unsigned flags, -+ struct btree_trans *trans) - { - int ret; - -@@ -379,7 +381,7 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re - if (journal_res_get_fast(j, res, flags)) - goto out; - -- ret = bch2_journal_res_get_slowpath(j, res, flags); -+ ret = bch2_journal_res_get_slowpath(j, res, flags, trans); - if (ret) - return ret; - out: -@@ -403,7 +405,7 @@ void bch2_journal_flush_async(struct journal *, struct closure *); - - int bch2_journal_flush_seq(struct journal *, u64, unsigned); - int bch2_journal_flush(struct journal *); --bool bch2_journal_noflush_seq(struct journal *, u64); -+bool bch2_journal_noflush_seq(struct journal *, u64, u64); - int bch2_journal_meta(struct journal *); - - void bch2_journal_halt(struct journal *); -@@ -411,7 +413,7 @@ void bch2_journal_halt(struct journal *); - static inline int bch2_journal_error(struct journal *j) - { - return j->reservations.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL -- ? -EIO : 0; -+ ? -BCH_ERR_journal_shutdown : 0; - } - - struct bch_dev; -@@ -424,12 +426,10 @@ static inline void bch2_journal_set_replay_done(struct journal *j) - - void bch2_journal_unblock(struct journal *); - void bch2_journal_block(struct journal *); --struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq); -+struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u64, bool *); - - void __bch2_journal_debug_to_text(struct printbuf *, struct journal *); - void bch2_journal_debug_to_text(struct printbuf *, struct journal *); --void bch2_journal_pins_to_text(struct printbuf *, struct journal *); --bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); - - int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, - unsigned nr); -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -index fb35dd336331..b89d77717de4 100644 ---- a/fs/bcachefs/journal_io.c -+++ b/fs/bcachefs/journal_io.c -@@ -17,6 +17,8 @@ - #include "sb-clean.h" - #include "trace.h" - -+#include -+ - void bch2_journal_pos_from_member_info_set(struct bch_fs *c) - { - lockdep_assert_held(&c->sb_lock); -@@ -299,7 +301,7 @@ static void journal_entry_err_msg(struct printbuf *out, - journal_entry_err_msg(&_buf, version, jset, entry); \ - prt_printf(&_buf, msg, ##__VA_ARGS__); \ - \ -- switch (flags & BCH_VALIDATE_write) { \ -+ switch (from.flags & BCH_VALIDATE_write) { \ - case READ: \ - mustfix_fsck_err(c, _err, "%s", _buf.buf); \ - break; \ -@@ -325,11 +327,11 @@ static void journal_entry_err_msg(struct printbuf *out, - static int journal_validate_key(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, -- unsigned level, enum btree_id btree_id, - struct bkey_i *k, -- unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from, -+ unsigned version, int big_endian) - { -+ enum bch_validate_flags flags = from.flags; - int write = flags & BCH_VALIDATE_write; - void *next = vstruct_next(entry); - int ret = 0; -@@ -364,11 +366,10 @@ static int journal_validate_key(struct bch_fs *c, - } - - if (!write) -- bch2_bkey_compat(level, btree_id, version, big_endian, -+ bch2_bkey_compat(from.level, from.btree, version, big_endian, - write, NULL, bkey_to_packed(k)); - -- ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), -- __btree_node_type(level, btree_id), write); -+ ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), from); - if (ret == -BCH_ERR_fsck_delete_bkey) { - le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); - memmove(k, bkey_next(k), next - (void *) bkey_next(k)); -@@ -379,7 +380,7 @@ static int journal_validate_key(struct bch_fs *c, - goto fsck_err; - - if (write) -- bch2_bkey_compat(level, btree_id, version, big_endian, -+ bch2_bkey_compat(from.level, from.btree, version, big_endian, - write, NULL, bkey_to_packed(k)); - fsck_err: - return ret; -@@ -389,16 +390,15 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_i *k = entry->start; - -+ from.level = entry->level; -+ from.btree = entry->btree_id; -+ - while (k != vstruct_last(entry)) { -- int ret = journal_validate_key(c, jset, entry, -- entry->level, -- entry->btree_id, -- k, version, big_endian, -- flags|BCH_VALIDATE_journal); -+ int ret = journal_validate_key(c, jset, entry, k, from, version, big_endian); - if (ret == FSCK_DELETED_KEY) - continue; - else if (ret) -@@ -421,7 +421,8 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs - bch2_prt_jset_entry_type(out, entry->type); - prt_str(out, ": "); - } -- prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level); -+ bch2_btree_id_level_to_text(out, entry->btree_id, entry->level); -+ prt_char(out, ' '); - bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); - first = false; - } -@@ -431,11 +432,15 @@ static int journal_entry_btree_root_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_i *k = entry->start; - int ret = 0; - -+ from.root = true; -+ from.level = entry->level + 1; -+ from.btree = entry->btree_id; -+ - if (journal_entry_err_on(!entry->u64s || - le16_to_cpu(entry->u64s) != k->k.u64s, - c, version, jset, entry, -@@ -452,8 +457,7 @@ static int journal_entry_btree_root_validate(struct bch_fs *c, - return 0; - } - -- ret = journal_validate_key(c, jset, entry, 1, entry->btree_id, k, -- version, big_endian, flags); -+ ret = journal_validate_key(c, jset, entry, k, from, version, big_endian); - if (ret == FSCK_DELETED_KEY) - ret = 0; - fsck_err: -@@ -470,7 +474,7 @@ static int journal_entry_prio_ptrs_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - /* obsolete, don't care: */ - return 0; -@@ -485,7 +489,7 @@ static int journal_entry_blacklist_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -@@ -512,7 +516,7 @@ static int journal_entry_blacklist_v2_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct jset_entry_blacklist_v2 *bl_entry; - int ret = 0; -@@ -554,7 +558,7 @@ static int journal_entry_usage_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct jset_entry_usage *u = - container_of(entry, struct jset_entry_usage, entry); -@@ -588,7 +592,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct jset_entry_data_usage *u = - container_of(entry, struct jset_entry_data_usage, entry); -@@ -632,7 +636,7 @@ static int journal_entry_clock_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct jset_entry_clock *clock = - container_of(entry, struct jset_entry_clock, entry); -@@ -665,14 +669,14 @@ static void journal_entry_clock_to_text(struct printbuf *out, struct bch_fs *c, - struct jset_entry_clock *clock = - container_of(entry, struct jset_entry_clock, entry); - -- prt_printf(out, "%s=%llu", clock->rw ? "write" : "read", le64_to_cpu(clock->time)); -+ prt_printf(out, "%s=%llu", str_write_read(clock->rw), le64_to_cpu(clock->time)); - } - - static int journal_entry_dev_usage_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct jset_entry_dev_usage *u = - container_of(entry, struct jset_entry_dev_usage, entry); -@@ -729,7 +733,7 @@ static int journal_entry_log_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return 0; - } -@@ -738,19 +742,19 @@ static void journal_entry_log_to_text(struct printbuf *out, struct bch_fs *c, - struct jset_entry *entry) - { - struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry); -- unsigned bytes = vstruct_bytes(entry) - offsetof(struct jset_entry_log, d); - -- prt_printf(out, "%.*s", bytes, l->d); -+ prt_printf(out, "%.*s", jset_entry_log_msg_bytes(l), l->d); - } - - static int journal_entry_overwrite_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { -+ from.flags = 0; - return journal_entry_btree_keys_validate(c, jset, entry, -- version, big_endian, READ); -+ version, big_endian, from); - } - - static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs *c, -@@ -763,10 +767,10 @@ static int journal_entry_write_buffer_keys_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return journal_entry_btree_keys_validate(c, jset, entry, -- version, big_endian, READ); -+ version, big_endian, from); - } - - static void journal_entry_write_buffer_keys_to_text(struct printbuf *out, struct bch_fs *c, -@@ -779,7 +783,7 @@ static int journal_entry_datetime_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - unsigned bytes = vstruct_bytes(entry); - unsigned expected = 16; -@@ -809,7 +813,7 @@ static void journal_entry_datetime_to_text(struct printbuf *out, struct bch_fs * - struct jset_entry_ops { - int (*validate)(struct bch_fs *, struct jset *, - struct jset_entry *, unsigned, int, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void (*to_text)(struct printbuf *, struct bch_fs *, struct jset_entry *); - }; - -@@ -827,11 +831,11 @@ int bch2_journal_entry_validate(struct bch_fs *c, - struct jset *jset, - struct jset_entry *entry, - unsigned version, int big_endian, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - return entry->type < BCH_JSET_ENTRY_NR - ? bch2_jset_entry_ops[entry->type].validate(c, jset, entry, -- version, big_endian, flags) -+ version, big_endian, from) - : 0; - } - -@@ -849,10 +853,18 @@ void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c, - static int jset_validate_entries(struct bch_fs *c, struct jset *jset, - enum bch_validate_flags flags) - { -+ struct bkey_validate_context from = { -+ .flags = flags, -+ .from = BKEY_VALIDATE_journal, -+ .journal_seq = le64_to_cpu(jset->seq), -+ }; -+ - unsigned version = le32_to_cpu(jset->version); - int ret = 0; - - vstruct_for_each(jset, entry) { -+ from.journal_offset = (u64 *) entry - jset->_data; -+ - if (journal_entry_err_on(vstruct_next(entry) > vstruct_last(jset), - c, version, jset, entry, - journal_entry_past_jset_end, -@@ -861,8 +873,8 @@ static int jset_validate_entries(struct bch_fs *c, struct jset *jset, - break; - } - -- ret = bch2_journal_entry_validate(c, jset, entry, -- version, JSET_BIG_ENDIAN(jset), flags); -+ ret = bch2_journal_entry_validate(c, jset, entry, version, -+ JSET_BIG_ENDIAN(jset), from); - if (ret) - break; - } -@@ -875,13 +887,17 @@ static int jset_validate(struct bch_fs *c, - struct jset *jset, u64 sector, - enum bch_validate_flags flags) - { -- unsigned version; -+ struct bkey_validate_context from = { -+ .flags = flags, -+ .from = BKEY_VALIDATE_journal, -+ .journal_seq = le64_to_cpu(jset->seq), -+ }; - int ret = 0; - - if (le64_to_cpu(jset->magic) != jset_magic(c)) - return JOURNAL_ENTRY_NONE; - -- version = le32_to_cpu(jset->version); -+ unsigned version = le32_to_cpu(jset->version); - if (journal_entry_err_on(!bch2_version_compatible(version), - c, version, jset, NULL, - jset_unsupported_version, -@@ -926,15 +942,16 @@ static int jset_validate_early(struct bch_fs *c, - unsigned bucket_sectors_left, - unsigned sectors_read) - { -- size_t bytes = vstruct_bytes(jset); -- unsigned version; -- enum bch_validate_flags flags = BCH_VALIDATE_journal; -+ struct bkey_validate_context from = { -+ .from = BKEY_VALIDATE_journal, -+ .journal_seq = le64_to_cpu(jset->seq), -+ }; - int ret = 0; - - if (le64_to_cpu(jset->magic) != jset_magic(c)) - return JOURNAL_ENTRY_NONE; - -- version = le32_to_cpu(jset->version); -+ unsigned version = le32_to_cpu(jset->version); - if (journal_entry_err_on(!bch2_version_compatible(version), - c, version, jset, NULL, - jset_unsupported_version, -@@ -947,6 +964,7 @@ static int jset_validate_early(struct bch_fs *c, - return -EINVAL; - } - -+ size_t bytes = vstruct_bytes(jset); - if (bytes > (sectors_read << 9) && - sectors_read < bucket_sectors_left) - return JOURNAL_ENTRY_REREAD; -@@ -1231,8 +1249,6 @@ int bch2_journal_read(struct bch_fs *c, - * those entries will be blacklisted: - */ - genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) { -- enum bch_validate_flags flags = BCH_VALIDATE_journal; -- - i = *_i; - - if (journal_replay_ignore(i)) -@@ -1252,6 +1268,10 @@ int bch2_journal_read(struct bch_fs *c, - continue; - } - -+ struct bkey_validate_context from = { -+ .from = BKEY_VALIDATE_journal, -+ .journal_seq = le64_to_cpu(i->j.seq), -+ }; - if (journal_entry_err_on(le64_to_cpu(i->j.last_seq) > le64_to_cpu(i->j.seq), - c, le32_to_cpu(i->j.version), &i->j, NULL, - jset_last_seq_newer_than_seq, -@@ -1411,27 +1431,50 @@ int bch2_journal_read(struct bch_fs *c, - - /* journal write: */ - -+static void journal_advance_devs_to_next_bucket(struct journal *j, -+ struct dev_alloc_list *devs, -+ unsigned sectors, u64 seq) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ -+ darray_for_each(*devs, i) { -+ struct bch_dev *ca = rcu_dereference(c->devs[*i]); -+ if (!ca) -+ continue; -+ -+ struct journal_device *ja = &ca->journal; -+ -+ if (sectors > ja->sectors_free && -+ sectors <= ca->mi.bucket_size && -+ bch2_journal_dev_buckets_available(j, ja, -+ journal_space_discarded)) { -+ ja->cur_idx = (ja->cur_idx + 1) % ja->nr; -+ ja->sectors_free = ca->mi.bucket_size; -+ -+ /* -+ * ja->bucket_seq[ja->cur_idx] must always have -+ * something sensible: -+ */ -+ ja->bucket_seq[ja->cur_idx] = le64_to_cpu(seq); -+ } -+ } -+} -+ - static void __journal_write_alloc(struct journal *j, - struct journal_buf *w, -- struct dev_alloc_list *devs_sorted, -+ struct dev_alloc_list *devs, - unsigned sectors, - unsigned *replicas, - unsigned replicas_want) - { - struct bch_fs *c = container_of(j, struct bch_fs, journal); -- struct journal_device *ja; -- struct bch_dev *ca; -- unsigned i; - -- if (*replicas >= replicas_want) -- return; -- -- for (i = 0; i < devs_sorted->nr; i++) { -- ca = rcu_dereference(c->devs[devs_sorted->devs[i]]); -+ darray_for_each(*devs, i) { -+ struct bch_dev *ca = rcu_dereference(c->devs[*i]); - if (!ca) - continue; - -- ja = &ca->journal; -+ struct journal_device *ja = &ca->journal; - - /* - * Check that we can use this device, and aren't already using -@@ -1477,65 +1520,53 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) - { - struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bch_devs_mask devs; -- struct journal_device *ja; -- struct bch_dev *ca; - struct dev_alloc_list devs_sorted; - unsigned sectors = vstruct_sectors(w->data, c->block_bits); - unsigned target = c->opts.metadata_target ?: - c->opts.foreground_target; -- unsigned i, replicas = 0, replicas_want = -+ unsigned replicas = 0, replicas_want = - READ_ONCE(c->opts.metadata_replicas); - unsigned replicas_need = min_t(unsigned, replicas_want, - READ_ONCE(c->opts.metadata_replicas_required)); -+ bool advance_done = false; - - rcu_read_lock(); --retry: -- devs = target_rw_devs(c, BCH_DATA_journal, target); - -- devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs); -+ /* We might run more than once if we have to stop and do discards: */ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&w->key)); -+ bkey_for_each_ptr(ptrs, p) { -+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, p->dev); -+ if (ca) -+ replicas += ca->mi.durability; -+ } - -- __journal_write_alloc(j, w, &devs_sorted, -- sectors, &replicas, replicas_want); -+retry_target: -+ devs = target_rw_devs(c, BCH_DATA_journal, target); -+ devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs); -+retry_alloc: -+ __journal_write_alloc(j, w, &devs_sorted, sectors, &replicas, replicas_want); - -- if (replicas >= replicas_want) -+ if (likely(replicas >= replicas_want)) - goto done; - -- for (i = 0; i < devs_sorted.nr; i++) { -- ca = rcu_dereference(c->devs[devs_sorted.devs[i]]); -- if (!ca) -- continue; -- -- ja = &ca->journal; -- -- if (sectors > ja->sectors_free && -- sectors <= ca->mi.bucket_size && -- bch2_journal_dev_buckets_available(j, ja, -- journal_space_discarded)) { -- ja->cur_idx = (ja->cur_idx + 1) % ja->nr; -- ja->sectors_free = ca->mi.bucket_size; -- -- /* -- * ja->bucket_seq[ja->cur_idx] must always have -- * something sensible: -- */ -- ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq); -- } -+ if (!advance_done) { -+ journal_advance_devs_to_next_bucket(j, &devs_sorted, sectors, w->data->seq); -+ advance_done = true; -+ goto retry_alloc; - } - -- __journal_write_alloc(j, w, &devs_sorted, -- sectors, &replicas, replicas_want); -- - if (replicas < replicas_want && target) { - /* Retry from all devices: */ - target = 0; -- goto retry; -+ advance_done = false; -+ goto retry_target; - } - done: - rcu_read_unlock(); - - BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX); - -- return replicas >= replicas_need ? 0 : -EROFS; -+ return replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices; - } - - static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) -@@ -1732,6 +1763,7 @@ static CLOSURE_CALLBACK(journal_write_submit) - bio->bi_iter.bi_sector = ptr->offset; - bio->bi_end_io = journal_write_endio; - bio->bi_private = ca; -+ bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 0); - - BUG_ON(bio->bi_iter.bi_sector == ca->prev_journal_sector); - ca->prev_journal_sector = bio->bi_iter.bi_sector; -@@ -2023,19 +2055,21 @@ CLOSURE_CALLBACK(bch2_journal_write) - bch2_journal_do_discards(j); - } - -- if (ret) { -+ if (ret && !bch2_journal_error(j)) { - struct printbuf buf = PRINTBUF; - buf.atomic++; - -- prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu: %s"), -+ prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"), - le64_to_cpu(w->data->seq), -+ vstruct_sectors(w->data, c->block_bits), - bch2_err_str(ret)); - __bch2_journal_debug_to_text(&buf, j); - spin_unlock(&j->lock); - bch2_print_string_as_lines(KERN_ERR, buf.buf); - printbuf_exit(&buf); -- goto err; - } -+ if (ret) -+ goto err; - - /* - * write is allocated, no longer need to account for it in -diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h -index 2ca9cde30ea8..12b39fcb4424 100644 ---- a/fs/bcachefs/journal_io.h -+++ b/fs/bcachefs/journal_io.h -@@ -63,7 +63,7 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset, - - int bch2_journal_entry_validate(struct bch_fs *, struct jset *, - struct jset_entry *, unsigned, int, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *, - struct jset_entry *); - -diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c -index ace291f175dd..6a9cefb635d6 100644 ---- a/fs/bcachefs/journal_reclaim.c -+++ b/fs/bcachefs/journal_reclaim.c -@@ -38,6 +38,9 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j, - struct journal_device *ja, - enum journal_space_from from) - { -+ if (!ja->nr) -+ return 0; -+ - unsigned available = (journal_space_from(ja, from) - - ja->cur_idx - 1 + ja->nr) % ja->nr; - -@@ -137,14 +140,18 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne - struct bch_fs *c = container_of(j, struct bch_fs, journal); - unsigned pos, nr_devs = 0; - struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX]; -+ unsigned min_bucket_size = U32_MAX; - - BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space)); - - rcu_read_lock(); - for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) { -- if (!ca->journal.nr) -+ if (!ca->journal.nr || -+ !ca->mi.durability) - continue; - -+ min_bucket_size = min(min_bucket_size, ca->mi.bucket_size); -+ - space = journal_dev_space_available(j, ca, from); - if (!space.next_entry) - continue; -@@ -164,7 +171,9 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne - * We sorted largest to smallest, and we want the smallest out of the - * @nr_devs_want largest devices: - */ -- return dev_space[nr_devs_want - 1]; -+ space = dev_space[nr_devs_want - 1]; -+ space.next_entry = min(space.next_entry, min_bucket_size); -+ return space; - } - - void bch2_journal_space_available(struct journal *j) -@@ -318,8 +327,10 @@ void bch2_journal_reclaim_fast(struct journal *j) - popped = true; - } - -- if (popped) -+ if (popped) { - bch2_journal_space_available(j); -+ __closure_wake_up(&j->reclaim_flush_wait); -+ } - } - - bool __bch2_journal_pin_put(struct journal *j, u64 seq) -@@ -353,6 +364,9 @@ static inline bool __journal_pin_drop(struct journal *j, - pin->seq = 0; - list_del_init(&pin->list); - -+ if (j->reclaim_flush_wait.list.first) -+ __closure_wake_up(&j->reclaim_flush_wait); -+ - /* - * Unpinning a journal entry may make journal_next_bucket() succeed, if - * writing a new last_seq will now make another bucket available: -@@ -374,11 +388,11 @@ static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn) - { - if (fn == bch2_btree_node_flush0 || - fn == bch2_btree_node_flush1) -- return JOURNAL_PIN_btree; -+ return JOURNAL_PIN_TYPE_btree; - else if (fn == bch2_btree_key_cache_journal_flush) -- return JOURNAL_PIN_key_cache; -+ return JOURNAL_PIN_TYPE_key_cache; - else -- return JOURNAL_PIN_other; -+ return JOURNAL_PIN_TYPE_other; - } - - static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq, -@@ -397,7 +411,12 @@ static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq, - atomic_inc(&pin_list->count); - pin->seq = seq; - pin->flush = flush_fn; -- list_add(&pin->list, &pin_list->list[type]); -+ -+ if (list_empty(&pin_list->unflushed[type]) && -+ j->reclaim_flush_wait.list.first) -+ __closure_wake_up(&j->reclaim_flush_wait); -+ -+ list_add(&pin->list, &pin_list->unflushed[type]); - } - - void bch2_journal_pin_copy(struct journal *j, -@@ -490,16 +509,15 @@ journal_get_next_pin(struct journal *j, - { - struct journal_entry_pin_list *pin_list; - struct journal_entry_pin *ret = NULL; -- unsigned i; - - fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) { - if (*seq > seq_to_flush && !allowed_above_seq) - break; - -- for (i = 0; i < JOURNAL_PIN_NR; i++) -- if ((((1U << i) & allowed_below_seq) && *seq <= seq_to_flush) || -- ((1U << i) & allowed_above_seq)) { -- ret = list_first_entry_or_null(&pin_list->list[i], -+ for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++) -+ if (((BIT(i) & allowed_below_seq) && *seq <= seq_to_flush) || -+ (BIT(i) & allowed_above_seq)) { -+ ret = list_first_entry_or_null(&pin_list->unflushed[i], - struct journal_entry_pin, list); - if (ret) - return ret; -@@ -535,8 +553,8 @@ static size_t journal_flush_pins(struct journal *j, - } - - if (min_key_cache) { -- allowed_above |= 1U << JOURNAL_PIN_key_cache; -- allowed_below |= 1U << JOURNAL_PIN_key_cache; -+ allowed_above |= BIT(JOURNAL_PIN_TYPE_key_cache); -+ allowed_below |= BIT(JOURNAL_PIN_TYPE_key_cache); - } - - cond_resched(); -@@ -544,7 +562,9 @@ static size_t journal_flush_pins(struct journal *j, - j->last_flushed = jiffies; - - spin_lock(&j->lock); -- pin = journal_get_next_pin(j, seq_to_flush, allowed_below, allowed_above, &seq); -+ pin = journal_get_next_pin(j, seq_to_flush, -+ allowed_below, -+ allowed_above, &seq); - if (pin) { - BUG_ON(j->flush_in_progress); - j->flush_in_progress = pin; -@@ -567,7 +587,7 @@ static size_t journal_flush_pins(struct journal *j, - spin_lock(&j->lock); - /* Pin might have been dropped or rearmed: */ - if (likely(!err && !j->flush_in_progress_dropped)) -- list_move(&pin->list, &journal_seq_pin(j, seq)->flushed); -+ list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]); - j->flush_in_progress = NULL; - j->flush_in_progress_dropped = false; - spin_unlock(&j->lock); -@@ -758,10 +778,12 @@ static int bch2_journal_reclaim_thread(void *arg) - journal_empty = fifo_empty(&j->pin); - spin_unlock(&j->lock); - -+ long timeout = j->next_reclaim - jiffies; -+ - if (journal_empty) - schedule(); -- else if (time_after(j->next_reclaim, jiffies)) -- schedule_timeout(j->next_reclaim - jiffies); -+ else if (timeout > 0) -+ schedule_timeout(timeout); - else - break; - } -@@ -805,10 +827,41 @@ int bch2_journal_reclaim_start(struct journal *j) - return 0; - } - -+static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush, -+ unsigned types) -+{ -+ struct journal_entry_pin_list *pin_list; -+ u64 seq; -+ -+ spin_lock(&j->lock); -+ fifo_for_each_entry_ptr(pin_list, &j->pin, seq) { -+ if (seq > seq_to_flush) -+ break; -+ -+ for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++) -+ if ((BIT(i) & types) && -+ (!list_empty(&pin_list->unflushed[i]) || -+ !list_empty(&pin_list->flushed[i]))) { -+ spin_unlock(&j->lock); -+ return true; -+ } -+ } -+ spin_unlock(&j->lock); -+ -+ return false; -+} -+ -+static bool journal_flush_pins_or_still_flushing(struct journal *j, u64 seq_to_flush, -+ unsigned types) -+{ -+ return journal_flush_pins(j, seq_to_flush, types, 0, 0, 0) || -+ journal_pins_still_flushing(j, seq_to_flush, types); -+} -+ - static int journal_flush_done(struct journal *j, u64 seq_to_flush, - bool *did_work) - { -- int ret; -+ int ret = 0; - - ret = bch2_journal_error(j); - if (ret) -@@ -816,12 +869,18 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, - - mutex_lock(&j->reclaim_lock); - -- if (journal_flush_pins(j, seq_to_flush, -- (1U << JOURNAL_PIN_key_cache)| -- (1U << JOURNAL_PIN_other), 0, 0, 0) || -- journal_flush_pins(j, seq_to_flush, -- (1U << JOURNAL_PIN_btree), 0, 0, 0)) -+ if (journal_flush_pins_or_still_flushing(j, seq_to_flush, -+ BIT(JOURNAL_PIN_TYPE_key_cache)| -+ BIT(JOURNAL_PIN_TYPE_other))) { - *did_work = true; -+ goto unlock; -+ } -+ -+ if (journal_flush_pins_or_still_flushing(j, seq_to_flush, -+ BIT(JOURNAL_PIN_TYPE_btree))) { -+ *did_work = true; -+ goto unlock; -+ } - - if (seq_to_flush > journal_cur_seq(j)) - bch2_journal_entry_close(j); -@@ -836,6 +895,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, - !fifo_used(&j->pin); - - spin_unlock(&j->lock); -+unlock: - mutex_unlock(&j->reclaim_lock); - - return ret; -@@ -849,7 +909,7 @@ bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush) - if (!test_bit(JOURNAL_running, &j->flags)) - return false; - -- closure_wait_event(&j->async_wait, -+ closure_wait_event(&j->reclaim_flush_wait, - journal_flush_done(j, seq_to_flush, &did_work)); - - return did_work; -@@ -915,3 +975,54 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) - - return ret; - } -+ -+bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq) -+{ -+ struct journal_entry_pin_list *pin_list; -+ struct journal_entry_pin *pin; -+ -+ spin_lock(&j->lock); -+ if (!test_bit(JOURNAL_running, &j->flags)) { -+ spin_unlock(&j->lock); -+ return true; -+ } -+ -+ *seq = max(*seq, j->pin.front); -+ -+ if (*seq >= j->pin.back) { -+ spin_unlock(&j->lock); -+ return true; -+ } -+ -+ out->atomic++; -+ -+ pin_list = journal_seq_pin(j, *seq); -+ -+ prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count)); -+ printbuf_indent_add(out, 2); -+ -+ prt_printf(out, "unflushed:\n"); -+ for (unsigned i = 0; i < ARRAY_SIZE(pin_list->unflushed); i++) -+ list_for_each_entry(pin, &pin_list->unflushed[i], list) -+ prt_printf(out, "\t%px %ps\n", pin, pin->flush); -+ -+ prt_printf(out, "flushed:\n"); -+ for (unsigned i = 0; i < ARRAY_SIZE(pin_list->flushed); i++) -+ list_for_each_entry(pin, &pin_list->flushed[i], list) -+ prt_printf(out, "\t%px %ps\n", pin, pin->flush); -+ -+ printbuf_indent_sub(out, 2); -+ -+ --out->atomic; -+ spin_unlock(&j->lock); -+ -+ return false; -+} -+ -+void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) -+{ -+ u64 seq = 0; -+ -+ while (!bch2_journal_seq_pins_to_text(out, j, &seq)) -+ seq++; -+} -diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h -index ec84c3345281..0a73d7134e1c 100644 ---- a/fs/bcachefs/journal_reclaim.h -+++ b/fs/bcachefs/journal_reclaim.h -@@ -78,4 +78,7 @@ static inline bool bch2_journal_flush_all_pins(struct journal *j) - - int bch2_journal_flush_device_pins(struct journal *, int); - -+void bch2_journal_pins_to_text(struct printbuf *, struct journal *); -+bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); -+ - #endif /* _BCACHEFS_JOURNAL_RECLAIM_H */ -diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h -index 19183fcf7ad7..3ba433a48eb8 100644 ---- a/fs/bcachefs/journal_types.h -+++ b/fs/bcachefs/journal_types.h -@@ -9,6 +9,9 @@ - #include "super_types.h" - #include "fifo.h" - -+/* btree write buffer steals 8 bits for its own purposes: */ -+#define JOURNAL_SEQ_MAX ((1ULL << 56) - 1) -+ - #define JOURNAL_BUF_BITS 2 - #define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS) - #define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1) -@@ -50,15 +53,15 @@ struct journal_buf { - */ - - enum journal_pin_type { -- JOURNAL_PIN_btree, -- JOURNAL_PIN_key_cache, -- JOURNAL_PIN_other, -- JOURNAL_PIN_NR, -+ JOURNAL_PIN_TYPE_btree, -+ JOURNAL_PIN_TYPE_key_cache, -+ JOURNAL_PIN_TYPE_other, -+ JOURNAL_PIN_TYPE_NR, - }; - - struct journal_entry_pin_list { -- struct list_head list[JOURNAL_PIN_NR]; -- struct list_head flushed; -+ struct list_head unflushed[JOURNAL_PIN_TYPE_NR]; -+ struct list_head flushed[JOURNAL_PIN_TYPE_NR]; - atomic_t count; - struct bch_devs_list devs; - }; -@@ -112,6 +115,7 @@ union journal_res_state { - */ - #define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1) - -+#define JOURNAL_ENTRY_BLOCKED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 2) - #define JOURNAL_ENTRY_CLOSED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 1) - #define JOURNAL_ENTRY_ERROR_VAL (JOURNAL_ENTRY_OFFSET_MAX) - -@@ -193,6 +197,7 @@ struct journal { - * insufficient devices: - */ - enum journal_errors cur_entry_error; -+ unsigned cur_entry_offset_if_blocked; - - unsigned buf_size_want; - /* -@@ -221,6 +226,7 @@ struct journal { - /* Used when waiting because the journal was full */ - wait_queue_head_t wait; - struct closure_waitlist async_wait; -+ struct closure_waitlist reclaim_flush_wait; - - struct delayed_work write_work; - struct workqueue_struct *wq; -diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c -index 60e00702d1a4..75f27ec26f85 100644 ---- a/fs/bcachefs/logged_ops.c -+++ b/fs/bcachefs/logged_ops.c -@@ -63,8 +63,10 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, - int bch2_resume_logged_ops(struct bch_fs *c) - { - int ret = bch2_trans_run(c, -- for_each_btree_key(trans, iter, -- BTREE_ID_logged_ops, POS_MIN, -+ for_each_btree_key_max(trans, iter, -+ BTREE_ID_logged_ops, -+ POS(LOGGED_OPS_INUM_logged_ops, 0), -+ POS(LOGGED_OPS_INUM_logged_ops, U64_MAX), - BTREE_ITER_prefetch, k, - resume_logged_op(trans, &iter, k))); - bch_err_fn(c, ret); -@@ -74,9 +76,8 @@ int bch2_resume_logged_ops(struct bch_fs *c) - static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) - { - struct btree_iter iter; -- int ret; -- -- ret = bch2_bkey_get_empty_slot(trans, &iter, BTREE_ID_logged_ops, POS_MAX); -+ int ret = bch2_bkey_get_empty_slot(trans, &iter, -+ BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM_logged_ops, U64_MAX)); - if (ret) - return ret; - -diff --git a/fs/bcachefs/logged_ops_format.h b/fs/bcachefs/logged_ops_format.h -index 6a4bf7129dba..cfb67c95d4c8 100644 ---- a/fs/bcachefs/logged_ops_format.h -+++ b/fs/bcachefs/logged_ops_format.h -@@ -2,6 +2,11 @@ - #ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H - #define _BCACHEFS_LOGGED_OPS_FORMAT_H - -+enum logged_ops_inums { -+ LOGGED_OPS_INUM_logged_ops, -+ LOGGED_OPS_INUM_inode_cursors, -+}; -+ - struct bch_logged_op_truncate { - struct bch_val v; - __le32 subvol; -diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c -index 10857eccdeaf..ce794d55818f 100644 ---- a/fs/bcachefs/lru.c -+++ b/fs/bcachefs/lru.c -@@ -12,7 +12,7 @@ - - /* KEY_TYPE_lru is obsolete: */ - int bch2_lru_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -@@ -192,7 +192,7 @@ int bch2_check_lrus(struct bch_fs *c) - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, - BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, -- NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, -+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_lru_key(trans, &iter, k, &last_flushed))); - - bch2_bkey_buf_exit(&last_flushed, c); -diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h -index e6a7d8241bb8..f31a6cf1514c 100644 ---- a/fs/bcachefs/lru.h -+++ b/fs/bcachefs/lru.h -@@ -33,7 +33,7 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) - return BCH_LRU_read; - } - --int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, struct bkey_validate_context); - void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - void bch2_lru_pos_to_text(struct printbuf *, struct bpos); -diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -index 0ef4a86850bb..ff787d3d50d2 100644 ---- a/fs/bcachefs/move.c -+++ b/fs/bcachefs/move.c -@@ -21,6 +21,8 @@ - #include "journal_reclaim.h" - #include "keylist.h" - #include "move.h" -+#include "rebalance.h" -+#include "reflink.h" - #include "replicas.h" - #include "snapshot.h" - #include "super-io.h" -@@ -72,11 +74,7 @@ struct moving_io { - unsigned read_sectors; - unsigned write_sectors; - -- struct bch_read_bio rbio; -- - struct data_update write; -- /* Must be last since it is variable size */ -- struct bio_vec bi_inline_vecs[]; - }; - - static void move_free(struct moving_io *io) -@@ -86,13 +84,12 @@ static void move_free(struct moving_io *io) - if (io->b) - atomic_dec(&io->b->count); - -- bch2_data_update_exit(&io->write); -- - mutex_lock(&ctxt->lock); - list_del(&io->io_list); - wake_up(&ctxt->wait); - mutex_unlock(&ctxt->lock); - -+ bch2_data_update_exit(&io->write); - kfree(io); - } - -@@ -112,7 +109,7 @@ static void move_write_done(struct bch_write_op *op) - - static void move_write(struct moving_io *io) - { -- if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) { -+ if (unlikely(io->write.rbio.bio.bi_status || io->write.rbio.hole)) { - move_free(io); - return; - } -@@ -130,7 +127,7 @@ static void move_write(struct moving_io *io) - atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); - atomic_inc(&io->write.ctxt->write_ios); - -- bch2_data_update_read_done(&io->write, io->rbio.pick.crc); -+ bch2_data_update_read_done(&io->write); - } - - struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt) -@@ -143,7 +140,7 @@ struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctx - - static void move_read_endio(struct bio *bio) - { -- struct moving_io *io = container_of(bio, struct moving_io, rbio.bio); -+ struct moving_io *io = container_of(bio, struct moving_io, write.rbio.bio); - struct moving_context *ctxt = io->write.ctxt; - - atomic_sub(io->read_sectors, &ctxt->read_sectors); -@@ -196,6 +193,13 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt) - list_del(&ctxt->list); - mutex_unlock(&c->moving_context_lock); - -+ /* -+ * Generally, releasing a transaction within a transaction restart means -+ * an unhandled transaction restart: but this can happen legitimately -+ * within the move code, e.g. when bch2_move_ratelimit() tells us to -+ * exit before we've retried -+ */ -+ bch2_trans_begin(ctxt->trans); - bch2_trans_put(ctxt->trans); - memset(ctxt, 0, sizeof(*ctxt)); - } -@@ -249,11 +253,6 @@ int bch2_move_extent(struct moving_context *ctxt, - { - struct btree_trans *trans = ctxt->trans; - struct bch_fs *c = trans->c; -- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -- struct moving_io *io; -- const union bch_extent_entry *entry; -- struct extent_ptr_decoded p; -- unsigned sectors = k.k->size, pages; - int ret = -ENOMEM; - - trace_move_extent2(c, k, &io_opts, &data_opts); -@@ -276,13 +275,7 @@ int bch2_move_extent(struct moving_context *ctxt, - */ - bch2_trans_unlock(trans); - -- /* write path might have to decompress data: */ -- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -- sectors = max_t(unsigned, sectors, p.crc.uncompressed_size); -- -- pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); -- io = kzalloc(sizeof(struct moving_io) + -- sizeof(struct bio_vec) * pages, GFP_KERNEL); -+ struct moving_io *io = kzalloc(sizeof(struct moving_io), GFP_KERNEL); - if (!io) - goto err; - -@@ -291,29 +284,13 @@ int bch2_move_extent(struct moving_context *ctxt, - io->read_sectors = k.k->size; - io->write_sectors = k.k->size; - -- bio_init(&io->write.op.wbio.bio, NULL, io->bi_inline_vecs, pages, 0); -- bio_set_prio(&io->write.op.wbio.bio, -- IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); -- -- if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9, -- GFP_KERNEL)) -- goto err_free; -- -- io->rbio.c = c; -- io->rbio.opts = io_opts; -- bio_init(&io->rbio.bio, NULL, io->bi_inline_vecs, pages, 0); -- io->rbio.bio.bi_vcnt = pages; -- bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); -- io->rbio.bio.bi_iter.bi_size = sectors << 9; -- -- io->rbio.bio.bi_opf = REQ_OP_READ; -- io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); -- io->rbio.bio.bi_end_io = move_read_endio; -- - ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, - io_opts, data_opts, iter->btree_id, k); - if (ret) -- goto err_free_pages; -+ goto err_free; -+ -+ io->write.rbio.bio.bi_end_io = move_read_endio; -+ bio_set_prio(&io->write.rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); - - io->write.op.end_io = move_write_done; - -@@ -347,18 +324,16 @@ int bch2_move_extent(struct moving_context *ctxt, - * ctxt when doing wakeup - */ - closure_get(&ctxt->cl); -- bch2_read_extent(trans, &io->rbio, -+ bch2_read_extent(trans, &io->write.rbio, - bkey_start_pos(k.k), - iter->btree_id, k, 0, -- BCH_READ_NODECODE| -- BCH_READ_LAST_FRAGMENT); -+ BCH_READ_data_update| -+ BCH_READ_last_fragment); - return 0; --err_free_pages: -- bio_free_pages(&io->write.op.wbio.bio); - err_free: - kfree(io); - err: -- if (ret == -BCH_ERR_data_update_done) -+ if (bch2_err_matches(ret, BCH_ERR_data_update_done)) - return 0; - - if (bch2_err_matches(ret, EROFS) || -@@ -379,34 +354,42 @@ int bch2_move_extent(struct moving_context *ctxt, - return ret; - } - --struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, -+static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, - struct per_snapshot_io_opts *io_opts, -+ struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ -+ struct btree_iter *extent_iter, - struct bkey_s_c extent_k) - { - struct bch_fs *c = trans->c; - u32 restart_count = trans->restart_count; -+ struct bch_io_opts *opts_ret = &io_opts->fs_io_opts; - int ret = 0; - -- if (io_opts->cur_inum != extent_k.k->p.inode) { -+ if (extent_k.k->type == KEY_TYPE_reflink_v) -+ goto out; -+ -+ if (io_opts->cur_inum != extent_pos.inode) { - io_opts->d.nr = 0; - -- ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), -+ ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode), - BTREE_ITER_all_snapshots, k, ({ -- if (k.k->p.offset != extent_k.k->p.inode) -+ if (k.k->p.offset != extent_pos.inode) - break; - - if (!bkey_is_inode(k.k)) - continue; - - struct bch_inode_unpacked inode; -- BUG_ON(bch2_inode_unpack(k, &inode)); -+ _ret3 = bch2_inode_unpack(k, &inode); -+ if (_ret3) -+ break; - - struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; - bch2_inode_opts_get(&e.io_opts, trans->c, &inode); - - darray_push(&io_opts->d, e); - })); -- io_opts->cur_inum = extent_k.k->p.inode; -+ io_opts->cur_inum = extent_pos.inode; - } - - ret = ret ?: trans_was_restarted(trans, restart_count); -@@ -415,43 +398,46 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, - - if (extent_k.k->p.snapshot) - darray_for_each(io_opts->d, i) -- if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) -- return &i->io_opts; -- -- return &io_opts->fs_io_opts; -+ if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) { -+ opts_ret = &i->io_opts; -+ break; -+ } -+out: -+ ret = bch2_get_update_rebalance_opts(trans, opts_ret, extent_iter, extent_k); -+ if (ret) -+ return ERR_PTR(ret); -+ return opts_ret; - } - - int bch2_move_get_io_opts_one(struct btree_trans *trans, - struct bch_io_opts *io_opts, -+ struct btree_iter *extent_iter, - struct bkey_s_c extent_k) - { -- struct btree_iter iter; -- struct bkey_s_c k; -- int ret; -+ struct bch_fs *c = trans->c; -+ -+ *io_opts = bch2_opts_to_inode_opts(c->opts); - - /* reflink btree? */ -- if (!extent_k.k->p.inode) { -- *io_opts = bch2_opts_to_inode_opts(trans->c->opts); -- return 0; -- } -+ if (!extent_k.k->p.inode) -+ goto out; - -- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ struct btree_iter inode_iter; -+ struct bkey_s_c inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, - SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), - BTREE_ITER_cached); -- ret = bkey_err(k); -+ int ret = bkey_err(inode_k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - return ret; - -- if (!ret && bkey_is_inode(k.k)) { -+ if (!ret && bkey_is_inode(inode_k.k)) { - struct bch_inode_unpacked inode; -- bch2_inode_unpack(k, &inode); -- bch2_inode_opts_get(io_opts, trans->c, &inode); -- } else { -- *io_opts = bch2_opts_to_inode_opts(trans->c->opts); -+ bch2_inode_unpack(inode_k, &inode); -+ bch2_inode_opts_get(io_opts, c, &inode); - } -- -- bch2_trans_iter_exit(trans, &iter); -- return 0; -+ bch2_trans_iter_exit(trans, &inode_iter); -+out: -+ return bch2_get_update_rebalance_opts(trans, io_opts, extent_iter, extent_k); - } - - int bch2_move_ratelimit(struct moving_context *ctxt) -@@ -509,9 +495,15 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - struct per_snapshot_io_opts snapshot_io_opts; - struct bch_io_opts *io_opts; - struct bkey_buf sk; -- struct btree_iter iter; -+ struct btree_iter iter, reflink_iter = {}; - struct bkey_s_c k; - struct data_update_opts data_opts; -+ /* -+ * If we're moving a single file, also process reflinked data it points -+ * to (this includes propagating changed io_opts from the inode to the -+ * extent): -+ */ -+ bool walk_indirect = start.inode == end.inode; - int ret = 0, ret2; - - per_snapshot_io_opts_init(&snapshot_io_opts, c); -@@ -531,6 +523,8 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - bch2_ratelimit_reset(ctxt->rate); - - while (!bch2_move_ratelimit(ctxt)) { -+ struct btree_iter *extent_iter = &iter; -+ - bch2_trans_begin(trans); - - k = bch2_btree_iter_peek(&iter); -@@ -549,10 +543,36 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - if (ctxt->stats) - ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); - -+ if (walk_indirect && -+ k.k->type == KEY_TYPE_reflink_p && -+ REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) { -+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); -+ s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); -+ -+ bch2_trans_iter_exit(trans, &reflink_iter); -+ k = bch2_lookup_indirect_extent(trans, &reflink_iter, &offset_into_extent, p, true, 0); -+ ret = bkey_err(k); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ break; -+ -+ if (bkey_deleted(k.k)) -+ goto next_nondata; -+ -+ /* -+ * XXX: reflink pointers may point to multiple indirect -+ * extents, so don't advance past the entire reflink -+ * pointer - need to fixup iter->k -+ */ -+ extent_iter = &reflink_iter; -+ } -+ - if (!bkey_extent_is_direct_data(k.k)) - goto next_nondata; - -- io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k); -+ io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, -+ iter.pos, extent_iter, k); - ret = PTR_ERR_OR_ZERO(io_opts); - if (ret) - continue; -@@ -568,12 +588,12 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - bch2_bkey_buf_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); - -- ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts); -+ ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); - if (ret2) { - if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) - continue; - -- if (ret2 == -ENOMEM) { -+ if (bch2_err_matches(ret2, ENOMEM)) { - /* memory allocation failure, wait for some IO to finish */ - bch2_move_ctxt_wait_for_io(ctxt); - continue; -@@ -589,6 +609,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, - bch2_btree_iter_advance(&iter); - } - -+ bch2_trans_iter_exit(trans, &reflink_iter); - bch2_trans_iter_exit(trans, &iter); - bch2_bkey_buf_exit(&sk, c); - per_snapshot_io_opts_exit(&snapshot_io_opts); -@@ -654,16 +675,12 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - struct bch_fs *c = trans->c; - bool is_kthread = current->flags & PF_KTHREAD; - struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); -- struct btree_iter iter; -+ struct btree_iter iter = {}, bp_iter = {}; - struct bkey_buf sk; -- struct bch_backpointer bp; -- struct bch_alloc_v4 a_convert; -- const struct bch_alloc_v4 *a; - struct bkey_s_c k; - struct data_update_opts data_opts; -- unsigned dirty_sectors, bucket_size; -- u64 fragmentation; -- struct bpos bp_pos = POS_MIN; -+ unsigned sectors_moved = 0; -+ struct bkey_buf last_flushed; - int ret = 0; - - struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); -@@ -672,6 +689,8 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - - trace_bucket_evacuate(c, &bucket); - -+ bch2_bkey_buf_init(&last_flushed); -+ bkey_init(&last_flushed.k->k); - bch2_bkey_buf_init(&sk); - - /* -@@ -679,21 +698,13 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - */ - bch2_trans_begin(trans); - -- bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, -- bucket, BTREE_ITER_cached); -- ret = lockrestart_do(trans, -- bkey_err(k = bch2_btree_iter_peek_slot(&iter))); -- bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, -+ bucket_pos_to_bp_start(ca, bucket), 0); - - bch_err_msg(c, ret, "looking up alloc key"); - if (ret) - goto err; - -- a = bch2_alloc_to_v4(k, &a_convert); -- dirty_sectors = bch2_bucket_sectors_dirty(*a); -- bucket_size = ca->mi.bucket_size; -- fragmentation = alloc_lru_idx_fragmentation(*a, ca); -- - ret = bch2_btree_write_buffer_tryflush(trans); - bch_err_msg(c, ret, "flushing btree write buffer"); - if (ret) -@@ -705,18 +716,23 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - - bch2_trans_begin(trans); - -- ret = bch2_get_next_backpointer(trans, ca, bucket, gen, -- &bp_pos, &bp, -- BTREE_ITER_cached); -+ k = bch2_btree_iter_peek(&bp_iter); -+ ret = bkey_err(k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - goto err; -- if (bkey_eq(bp_pos, POS_MAX)) -+ -+ if (!k.k || bkey_gt(k.k->p, bucket_pos_to_bp_end(ca, bucket))) - break; - -- if (!bp.level) { -- k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0); -+ if (k.k->type != KEY_TYPE_backpointer) -+ goto next; -+ -+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); -+ -+ if (!bp.v->level) { -+ k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed); - ret = bkey_err(k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; -@@ -728,7 +744,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - bch2_bkey_buf_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); - -- ret = bch2_move_get_io_opts_one(trans, &io_opts, k); -+ ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k); - if (ret) { - bch2_trans_iter_exit(trans, &iter); - continue; -@@ -738,14 +754,18 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - data_opts.target = io_opts.background_target; - data_opts.rewrite_ptrs = 0; - -+ unsigned sectors = bp.v->bucket_len; /* move_extent will drop locks */ - unsigned i = 0; -- bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { -- if (ptr->dev == bucket.inode) { -- data_opts.rewrite_ptrs |= 1U << i; -- if (ptr->cached) { -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { -+ if (p.ptr.dev == bucket.inode) { -+ if (p.ptr.cached) { - bch2_trans_iter_exit(trans, &iter); - goto next; - } -+ data_opts.rewrite_ptrs |= 1U << i; -+ break; - } - i++; - } -@@ -765,14 +785,15 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - goto err; - - if (ctxt->stats) -- atomic64_add(k.k->size, &ctxt->stats->sectors_seen); -+ atomic64_add(sectors, &ctxt->stats->sectors_seen); -+ sectors_moved += sectors; - } else { - struct btree *b; - -- b = bch2_backpointer_get_node(trans, &iter, bp_pos, bp); -+ b = bch2_backpointer_get_node(trans, bp, &iter, &last_flushed); - ret = PTR_ERR_OR_ZERO(b); - if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) -- continue; -+ goto next; - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) -@@ -796,15 +817,18 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, - atomic64_add(sectors, &ctxt->stats->sectors_seen); - atomic64_add(sectors, &ctxt->stats->sectors_moved); - } -+ sectors_moved += btree_sectors(c); - } - next: -- bp_pos = bpos_nosnap_successor(bp_pos); -+ bch2_btree_iter_advance(&bp_iter); - } - -- trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret); -+ trace_evacuate_bucket(c, &bucket, sectors_moved, ca->mi.bucket_size, ret); - err: -+ bch2_trans_iter_exit(trans, &bp_iter); - bch2_dev_put(ca); - bch2_bkey_buf_exit(&sk, c); -+ bch2_bkey_buf_exit(&last_flushed, c); - return ret; - } - -@@ -1158,7 +1182,7 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str - - mutex_lock(&ctxt->lock); - list_for_each_entry(io, &ctxt->ios, io_list) -- bch2_write_op_to_text(out, &io->write.op); -+ bch2_data_update_inflight_to_text(out, &io->write); - mutex_unlock(&ctxt->lock); - - printbuf_indent_sub(out, 4); -diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h -index 9baf3093a678..51e0505a8156 100644 ---- a/fs/bcachefs/move.h -+++ b/fs/bcachefs/move.h -@@ -110,9 +110,8 @@ static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opt - darray_exit(&io_opts->d); - } - --struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *, -- struct per_snapshot_io_opts *, struct bkey_s_c); --int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, struct bkey_s_c); -+int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, -+ struct btree_iter *, struct bkey_s_c); - - int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); - -diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c -index d658be90f737..21805509ab9e 100644 ---- a/fs/bcachefs/movinggc.c -+++ b/fs/bcachefs/movinggc.c -@@ -167,7 +167,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, - - bch2_trans_begin(trans); - -- ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru, -+ ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru, - lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0), - lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX), - 0, k, ({ -@@ -215,7 +215,8 @@ static int bch2_copygc(struct moving_context *ctxt, - }; - move_buckets buckets = { 0 }; - struct move_bucket_in_flight *f; -- u64 moved = atomic64_read(&ctxt->stats->sectors_moved); -+ u64 sectors_seen = atomic64_read(&ctxt->stats->sectors_seen); -+ u64 sectors_moved = atomic64_read(&ctxt->stats->sectors_moved); - int ret = 0; - - ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets); -@@ -245,7 +246,6 @@ static int bch2_copygc(struct moving_context *ctxt, - *did_work = true; - } - err: -- darray_exit(&buckets); - - /* no entries in LRU btree found, or got to end: */ - if (bch2_err_matches(ret, ENOENT)) -@@ -254,8 +254,11 @@ static int bch2_copygc(struct moving_context *ctxt, - if (ret < 0 && !bch2_err_matches(ret, EROFS)) - bch_err_msg(c, ret, "from bch2_move_data()"); - -- moved = atomic64_read(&ctxt->stats->sectors_moved) - moved; -- trace_and_count(c, copygc, c, moved, 0, 0, 0); -+ sectors_seen = atomic64_read(&ctxt->stats->sectors_seen) - sectors_seen; -+ sectors_moved = atomic64_read(&ctxt->stats->sectors_moved) - sectors_moved; -+ trace_and_count(c, copygc, c, buckets.nr, sectors_seen, sectors_moved); -+ -+ darray_exit(&buckets); - return ret; - } - -@@ -350,9 +353,9 @@ static int bch2_copygc_thread(void *arg) - bch2_trans_unlock_long(ctxt.trans); - cond_resched(); - -- if (!c->copy_gc_enabled) { -+ if (!c->opts.copygc_enabled) { - move_buckets_wait(&ctxt, buckets, true); -- kthread_wait_freezable(c->copy_gc_enabled || -+ kthread_wait_freezable(c->opts.copygc_enabled || - kthread_should_stop()); - } - -diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c -index 0e2ee262fbd4..6772faf385a5 100644 ---- a/fs/bcachefs/opts.c -+++ b/fs/bcachefs/opts.c -@@ -1,6 +1,7 @@ - // SPDX-License-Identifier: GPL-2.0 - - #include -+#include - - #include "bcachefs.h" - #include "compress.h" -@@ -48,12 +49,12 @@ static const char * const __bch2_csum_types[] = { - NULL - }; - --const char * const bch2_csum_opts[] = { -+const char * const __bch2_csum_opts[] = { - BCH_CSUM_OPTS() - NULL - }; - --static const char * const __bch2_compression_types[] = { -+const char * const __bch2_compression_types[] = { - BCH_COMPRESSION_TYPES() - NULL - }; -@@ -113,6 +114,7 @@ void bch2_prt_##name(struct printbuf *out, type t) \ - PRT_STR_OPT_BOUNDSCHECKED(jset_entry_type, enum bch_jset_entry_type); - PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type, enum bch_fs_usage_type); - PRT_STR_OPT_BOUNDSCHECKED(data_type, enum bch_data_type); -+PRT_STR_OPT_BOUNDSCHECKED(csum_opt, enum bch_csum_opt); - PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type); - PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type); - PRT_STR_OPT_BOUNDSCHECKED(str_hash_type, enum bch_str_hash_type); -@@ -333,17 +335,18 @@ int bch2_opt_parse(struct bch_fs *c, - switch (opt->type) { - case BCH_OPT_BOOL: - if (val) { -- ret = kstrtou64(val, 10, res); -+ ret = lookup_constant(bool_names, val, -BCH_ERR_option_not_bool); -+ if (ret != -BCH_ERR_option_not_bool) { -+ *res = ret; -+ } else { -+ if (err) -+ prt_printf(err, "%s: must be bool", opt->attr.name); -+ return ret; -+ } - } else { -- ret = 0; - *res = 1; - } - -- if (ret < 0 || (*res != 0 && *res != 1)) { -- if (err) -- prt_printf(err, "%s: must be bool", opt->attr.name); -- return ret < 0 ? ret : -BCH_ERR_option_not_bool; -- } - break; - case BCH_OPT_UINT: - if (!val) { -@@ -710,11 +713,14 @@ void bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca, - - struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src) - { -- return (struct bch_io_opts) { -+ struct bch_io_opts opts = { - #define x(_name, _bits) ._name = src._name, - BCH_INODE_OPTS() - #undef x - }; -+ -+ bch2_io_opts_fixups(&opts); -+ return opts; - } - - bool bch2_opt_is_inode_opt(enum bch_opt_id id) -diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h -index 23dda014e331..a182b5d454ba 100644 ---- a/fs/bcachefs/opts.h -+++ b/fs/bcachefs/opts.h -@@ -16,7 +16,8 @@ extern const char * const bch2_version_upgrade_opts[]; - extern const char * const bch2_sb_features[]; - extern const char * const bch2_sb_compat[]; - extern const char * const __bch2_btree_ids[]; --extern const char * const bch2_csum_opts[]; -+extern const char * const __bch2_csum_opts[]; -+extern const char * const __bch2_compression_types[]; - extern const char * const bch2_compression_opts[]; - extern const char * const __bch2_str_hash_types[]; - extern const char * const bch2_str_hash_opts[]; -@@ -27,6 +28,7 @@ extern const char * const bch2_d_types[]; - void bch2_prt_jset_entry_type(struct printbuf *, enum bch_jset_entry_type); - void bch2_prt_fs_usage_type(struct printbuf *, enum bch_fs_usage_type); - void bch2_prt_data_type(struct printbuf *, enum bch_data_type); -+void bch2_prt_csum_opt(struct printbuf *, enum bch_csum_opt); - void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type); - void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type); - void bch2_prt_str_hash_type(struct printbuf *, enum bch_str_hash_type); -@@ -171,12 +173,12 @@ enum fsck_err_opts { - "size", "Maximum size of checksummed/compressed extents")\ - x(metadata_checksum, u8, \ - OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -- OPT_STR(bch2_csum_opts), \ -+ OPT_STR(__bch2_csum_opts), \ - BCH_SB_META_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \ - NULL, NULL) \ - x(data_checksum, u8, \ - OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -- OPT_STR(bch2_csum_opts), \ -+ OPT_STR(__bch2_csum_opts), \ - BCH_SB_DATA_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \ - NULL, NULL) \ - x(compression, u8, \ -@@ -220,14 +222,14 @@ enum fsck_err_opts { - BCH_SB_ERASURE_CODE, false, \ - NULL, "Enable erasure coding (DO NOT USE YET)") \ - x(inodes_32bit, u8, \ -- OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ - OPT_BOOL(), \ - BCH_SB_INODE_32BIT, true, \ - NULL, "Constrain inode numbers to 32 bits") \ -- x(shard_inode_numbers, u8, \ -- OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -- OPT_BOOL(), \ -- BCH_SB_SHARD_INUMS, true, \ -+ x(shard_inode_numbers_bits, u8, \ -+ OPT_FS|OPT_FORMAT, \ -+ OPT_UINT(0, 8), \ -+ BCH_SB_SHARD_INUMS_NBITS, 0, \ - NULL, "Shard new inode numbers by CPU id") \ - x(inodes_use_key_cache, u8, \ - OPT_FS|OPT_FORMAT|OPT_MOUNT, \ -@@ -473,6 +475,18 @@ enum fsck_err_opts { - BCH2_NO_SB_OPT, true, \ - NULL, "Enable nocow mode: enables runtime locking in\n"\ - "data move path needed if nocow will ever be in use\n")\ -+ x(copygc_enabled, u8, \ -+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, true, \ -+ NULL, "Enable copygc: disable for debugging, or to\n"\ -+ "quiet the system when doing performance testing\n")\ -+ x(rebalance_enabled, u8, \ -+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, true, \ -+ NULL, "Enable rebalance: disable for debugging, or to\n"\ -+ "quiet the system when doing performance testing\n")\ - x(no_data_io, u8, \ - OPT_MOUNT, \ - OPT_BOOL(), \ -@@ -488,7 +502,7 @@ enum fsck_err_opts { - OPT_DEVICE, \ - OPT_UINT(0, S64_MAX), \ - BCH2_NO_SB_OPT, 0, \ -- "size", "Size of filesystem on device") \ -+ "size", "Specifies the bucket size; must be greater than the btree node size")\ - x(durability, u8, \ - OPT_DEVICE|OPT_SB_FIELD_ONE_BIAS, \ - OPT_UINT(0, BCH_REPLICAS_MAX), \ -@@ -624,14 +638,39 @@ struct bch_io_opts { - #define x(_name, _bits) u##_bits _name; - BCH_INODE_OPTS() - #undef x -+#define x(_name, _bits) u64 _name##_from_inode:1; -+ BCH_INODE_OPTS() -+#undef x - }; - --static inline unsigned background_compression(struct bch_io_opts opts) -+static inline void bch2_io_opts_fixups(struct bch_io_opts *opts) - { -- return opts.background_compression ?: opts.compression; -+ if (!opts->background_target) -+ opts->background_target = opts->foreground_target; -+ if (!opts->background_compression) -+ opts->background_compression = opts->compression; -+ if (opts->nocow) { -+ opts->compression = opts->background_compression = 0; -+ opts->data_checksum = 0; -+ opts->erasure_code = 0; -+ } - } - - struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts); - bool bch2_opt_is_inode_opt(enum bch_opt_id); - -+/* rebalance opts: */ -+ -+static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_io_opts *opts) -+{ -+ return (struct bch_extent_rebalance) { -+ .type = BIT(BCH_EXTENT_ENTRY_rebalance), -+#define x(_name) \ -+ ._name = opts->_name, \ -+ ._name##_from_inode = opts->_name##_from_inode, -+ BCH_REBALANCE_OPTS() -+#undef x -+ }; -+}; -+ - #endif /* _BCACHEFS_OPTS_H */ -diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h -index 1d570387b77f..d0dd398baa2b 100644 ---- a/fs/bcachefs/printbuf.h -+++ b/fs/bcachefs/printbuf.h -@@ -251,16 +251,23 @@ static inline void prt_hex_byte_upper(struct printbuf *out, u8 byte) - printbuf_nul_terminate_reserved(out); - } - -+static inline void printbuf_reset_keep_tabstops(struct printbuf *buf) -+{ -+ buf->pos = 0; -+ buf->allocation_failure = 0; -+ buf->last_newline = 0; -+ buf->last_field = 0; -+ buf->indent = 0; -+ buf->cur_tabstop = 0; -+} -+ - /** - * printbuf_reset - re-use a printbuf without freeing and re-initializing it: - */ - static inline void printbuf_reset(struct printbuf *buf) - { -- buf->pos = 0; -- buf->allocation_failure = 0; -- buf->indent = 0; -+ printbuf_reset_keep_tabstops(buf); - buf->nr_tabstops = 0; -- buf->cur_tabstop = 0; - } - - /** -diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c -index 74f45a8162ad..8b857fc33244 100644 ---- a/fs/bcachefs/quota.c -+++ b/fs/bcachefs/quota.c -@@ -60,7 +60,7 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { - }; - - int bch2_quota_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h -index a62abcc5332a..1551800ff44c 100644 ---- a/fs/bcachefs/quota.h -+++ b/fs/bcachefs/quota.h -@@ -5,10 +5,10 @@ - #include "inode.h" - #include "quota_types.h" - --enum bch_validate_flags; - extern const struct bch_sb_field_ops bch_sb_field_ops_quota; - --int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - #define bch2_bkey_ops_quota ((struct bkey_ops) { \ -diff --git a/fs/bcachefs/rcu_pending.c b/fs/bcachefs/rcu_pending.c -index 40a20192eee8..bef2aa1b8bcd 100644 ---- a/fs/bcachefs/rcu_pending.c -+++ b/fs/bcachefs/rcu_pending.c -@@ -25,21 +25,37 @@ enum rcu_pending_special { - #define RCU_PENDING_KVFREE_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE) - #define RCU_PENDING_CALL_RCU_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU) - --static inline unsigned long __get_state_synchronize_rcu(struct srcu_struct *ssp) -+#ifdef __KERNEL__ -+typedef unsigned long rcu_gp_poll_state_t; -+ -+static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) -+{ -+ return l == r; -+} -+#else -+typedef struct urcu_gp_poll_state rcu_gp_poll_state_t; -+ -+static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) -+{ -+ return l.grace_period_id == r.grace_period_id; -+} -+#endif -+ -+static inline rcu_gp_poll_state_t __get_state_synchronize_rcu(struct srcu_struct *ssp) - { - return ssp - ? get_state_synchronize_srcu(ssp) - : get_state_synchronize_rcu(); - } - --static inline unsigned long __start_poll_synchronize_rcu(struct srcu_struct *ssp) -+static inline rcu_gp_poll_state_t __start_poll_synchronize_rcu(struct srcu_struct *ssp) - { - return ssp - ? start_poll_synchronize_srcu(ssp) - : start_poll_synchronize_rcu(); - } - --static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, unsigned long cookie) -+static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, rcu_gp_poll_state_t cookie) - { - return ssp - ? poll_state_synchronize_srcu(ssp, cookie) -@@ -71,13 +87,13 @@ struct rcu_pending_seq { - GENRADIX(struct rcu_head *) objs; - size_t nr; - struct rcu_head **cursor; -- unsigned long seq; -+ rcu_gp_poll_state_t seq; - }; - - struct rcu_pending_list { - struct rcu_head *head; - struct rcu_head *tail; -- unsigned long seq; -+ rcu_gp_poll_state_t seq; - }; - - struct rcu_pending_pcpu { -@@ -316,10 +332,10 @@ static void rcu_pending_rcu_cb(struct rcu_head *rcu) - } - - static __always_inline struct rcu_pending_seq * --get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq) -+get_object_radix(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq) - { - darray_for_each_reverse(p->objs, objs) -- if (objs->seq == seq) -+ if (rcu_gp_poll_cookie_eq(objs->seq, seq)) - return objs; - - if (darray_push_gfp(&p->objs, ((struct rcu_pending_seq) { .seq = seq }), GFP_ATOMIC)) -@@ -329,7 +345,7 @@ get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq) - } - - static noinline bool --rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq, -+rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq, - struct rcu_head *head, void *ptr, - unsigned long *flags) - { -@@ -364,7 +380,7 @@ rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq, - again: - for (struct rcu_pending_list *i = p->lists; - i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) { -- if (i->seq == seq) { -+ if (rcu_gp_poll_cookie_eq(i->seq, seq)) { - rcu_pending_list_add(i, head); - return false; - } -@@ -408,7 +424,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, - struct rcu_pending_pcpu *p; - struct rcu_pending_seq *objs; - struct genradix_node *new_node = NULL; -- unsigned long seq, flags; -+ unsigned long flags; - bool start_gp = false; - - BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN)); -@@ -416,7 +432,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, - local_irq_save(flags); - p = this_cpu_ptr(pending->p); - spin_lock(&p->lock); -- seq = __get_state_synchronize_rcu(pending->srcu); -+ rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu); - restart: - if (may_sleep && - unlikely(process_finished_items(pending, p, flags))) -diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c -index cd6647374353..90dbf04c07a1 100644 ---- a/fs/bcachefs/rebalance.c -+++ b/fs/bcachefs/rebalance.c -@@ -24,6 +24,192 @@ - #include - #include - -+/* bch_extent_rebalance: */ -+ -+static const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ -+ bkey_extent_entry_for_each(ptrs, entry) -+ if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) -+ return &entry->rebalance; -+ -+ return NULL; -+} -+ -+static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c, -+ struct bch_io_opts *opts, -+ struct bkey_s_c k, -+ struct bkey_ptrs_c ptrs) -+{ -+ if (!opts->background_compression) -+ return 0; -+ -+ unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ unsigned ptr_bit = 1; -+ unsigned rewrite_ptrs = 0; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -+ p.ptr.unwritten) -+ return 0; -+ -+ if (!p.ptr.cached && p.crc.compression_type != compression_type) -+ rewrite_ptrs |= ptr_bit; -+ ptr_bit <<= 1; -+ } -+ -+ return rewrite_ptrs; -+} -+ -+static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c, -+ struct bch_io_opts *opts, -+ struct bkey_ptrs_c ptrs) -+{ -+ if (!opts->background_target || -+ !bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) -+ return 0; -+ -+ unsigned ptr_bit = 1; -+ unsigned rewrite_ptrs = 0; -+ -+ bkey_for_each_ptr(ptrs, ptr) { -+ if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target)) -+ rewrite_ptrs |= ptr_bit; -+ ptr_bit <<= 1; -+ } -+ -+ return rewrite_ptrs; -+} -+ -+static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, -+ struct bch_io_opts *opts, -+ struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ -+ return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) | -+ bch2_bkey_ptrs_need_move(c, opts, ptrs); -+} -+ -+u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) -+{ -+ const struct bch_extent_rebalance *opts = bch2_bkey_rebalance_opts(k); -+ if (!opts) -+ return 0; -+ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ u64 sectors = 0; -+ -+ if (opts->background_compression) { -+ unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || -+ p.ptr.unwritten) { -+ sectors = 0; -+ goto incompressible; -+ } -+ -+ if (!p.ptr.cached && p.crc.compression_type != compression_type) -+ sectors += p.crc.compressed_size; -+ } -+ } -+incompressible: -+ if (opts->background_target && -+ bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) { -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -+ if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->background_target)) -+ sectors += p.crc.compressed_size; -+ } -+ -+ return sectors; -+} -+ -+static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opts *opts, -+ struct bkey_s_c k) -+{ -+ if (!bkey_extent_is_direct_data(k.k)) -+ return 0; -+ -+ const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k); -+ -+ if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k)) { -+ struct bch_extent_rebalance new = io_opts_to_rebalance_opts(opts); -+ return old == NULL || memcmp(old, &new, sizeof(new)); -+ } else { -+ return old != NULL; -+ } -+} -+ -+int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, -+ struct bkey_i *_k) -+{ -+ if (!bkey_extent_is_direct_data(&_k->k)) -+ return 0; -+ -+ struct bkey_s k = bkey_i_to_s(_k); -+ struct bch_extent_rebalance *old = -+ (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); -+ -+ if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k.s_c)) { -+ if (!old) { -+ old = bkey_val_end(k); -+ k.k->u64s += sizeof(*old) / sizeof(u64); -+ } -+ -+ *old = io_opts_to_rebalance_opts(opts); -+ } else { -+ if (old) -+ extent_entry_drop(k, (union bch_extent_entry *) old); -+ } -+ -+ return 0; -+} -+ -+int bch2_get_update_rebalance_opts(struct btree_trans *trans, -+ struct bch_io_opts *io_opts, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ BUG_ON(iter->flags & BTREE_ITER_is_extents); -+ BUG_ON(iter->flags & BTREE_ITER_filter_snapshots); -+ -+ const struct bch_extent_rebalance *r = k.k->type == KEY_TYPE_reflink_v -+ ? bch2_bkey_rebalance_opts(k) : NULL; -+ if (r) { -+#define x(_name) \ -+ if (r->_name##_from_inode) { \ -+ io_opts->_name = r->_name; \ -+ io_opts->_name##_from_inode = true; \ -+ } -+ BCH_REBALANCE_OPTS() -+#undef x -+ } -+ -+ if (!bch2_bkey_rebalance_needs_update(trans->c, io_opts, k)) -+ return 0; -+ -+ struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8); -+ int ret = PTR_ERR_OR_ZERO(n); -+ if (ret) -+ return ret; -+ -+ bkey_reassemble(n, k); -+ -+ /* On successfull transaction commit, @k was invalidated: */ -+ -+ return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?: -+ bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: -+ bch2_trans_commit(trans, NULL, NULL, 0) ?: -+ -BCH_ERR_transaction_restart_nested; -+} -+ - #define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) - - static const char * const bch2_rebalance_state_strs[] = { -@@ -33,7 +219,7 @@ static const char * const bch2_rebalance_state_strs[] = { - #undef x - }; - --static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum) -+int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum) - { - struct btree_iter iter; - struct bkey_s_c k; -@@ -71,9 +257,8 @@ static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum) - int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) - { - int ret = bch2_trans_commit_do(c, NULL, NULL, -- BCH_TRANS_COMMIT_no_enospc| -- BCH_TRANS_COMMIT_lazy_rw, -- __bch2_set_rebalance_needs_scan(trans, inum)); -+ BCH_TRANS_COMMIT_no_enospc, -+ bch2_set_rebalance_needs_scan_trans(trans, inum)); - rebalance_wakeup(c); - return ret; - } -@@ -121,6 +306,9 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) - { -+ if (!bch2_bkey_rebalance_opts(k)) -+ return 0; -+ - struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0); - int ret = PTR_ERR_OR_ZERO(n); - if (ret) -@@ -134,32 +322,28 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, - static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, - struct bpos work_pos, - struct btree_iter *extent_iter, -+ struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) - { - struct bch_fs *c = trans->c; -- struct bkey_s_c k; - - bch2_trans_iter_exit(trans, extent_iter); - bch2_trans_iter_init(trans, extent_iter, - work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink, - work_pos, - BTREE_ITER_all_snapshots); -- k = bch2_btree_iter_peek_slot(extent_iter); -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(extent_iter); - if (bkey_err(k)) - return k; - -- const struct bch_extent_rebalance *r = k.k ? bch2_bkey_rebalance_opts(k) : NULL; -- if (!r) { -- /* raced due to btree write buffer, nothing to do */ -- return bkey_s_c_null; -- } -+ int ret = bch2_move_get_io_opts_one(trans, io_opts, extent_iter, k); -+ if (ret) -+ return bkey_s_c_err(ret); - - memset(data_opts, 0, sizeof(*data_opts)); -- -- data_opts->rewrite_ptrs = -- bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression); -- data_opts->target = r->target; -- data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; -+ data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k); -+ data_opts->target = io_opts->background_target; -+ data_opts->write_flags |= BCH_WRITE_only_specified_devs; - - if (!data_opts->rewrite_ptrs) { - /* -@@ -178,12 +362,28 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, - if (trace_rebalance_extent_enabled()) { - struct printbuf buf = PRINTBUF; - -- prt_str(&buf, "target="); -- bch2_target_to_text(&buf, c, r->target); -- prt_str(&buf, " compression="); -- bch2_compression_opt_to_text(&buf, r->compression); -- prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, k); -+ prt_newline(&buf); -+ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ -+ unsigned p = bch2_bkey_ptrs_need_compress(c, io_opts, k, ptrs); -+ if (p) { -+ prt_str(&buf, "compression="); -+ bch2_compression_opt_to_text(&buf, io_opts->background_compression); -+ prt_str(&buf, " "); -+ bch2_prt_u64_base2(&buf, p); -+ prt_newline(&buf); -+ } -+ -+ p = bch2_bkey_ptrs_need_move(c, io_opts, ptrs); -+ if (p) { -+ prt_str(&buf, "move="); -+ bch2_target_to_text(&buf, c, io_opts->background_target); -+ prt_str(&buf, " "); -+ bch2_prt_u64_base2(&buf, p); -+ prt_newline(&buf); -+ } - - trace_rebalance_extent(c, buf.buf); - printbuf_exit(&buf); -@@ -212,14 +412,10 @@ static int do_rebalance_extent(struct moving_context *ctxt, - bch2_bkey_buf_init(&sk); - - ret = bkey_err(k = next_rebalance_extent(trans, work_pos, -- extent_iter, &data_opts)); -+ extent_iter, &io_opts, &data_opts)); - if (ret || !k.k) - goto out; - -- ret = bch2_move_get_io_opts_one(trans, &io_opts, k); -- if (ret) -- goto out; -- - atomic64_add(k.k->size, &ctxt->stats->sectors_seen); - - /* -@@ -253,21 +449,9 @@ static bool rebalance_pred(struct bch_fs *c, void *arg, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) - { -- unsigned target, compression; -- -- if (k.k->p.inode) { -- target = io_opts->background_target; -- compression = background_compression(*io_opts); -- } else { -- const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); -- -- target = r ? r->target : io_opts->background_target; -- compression = r ? r->compression : background_compression(*io_opts); -- } -- -- data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression); -- data_opts->target = target; -- data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; -+ data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k); -+ data_opts->target = io_opts->background_target; -+ data_opts->write_flags |= BCH_WRITE_only_specified_devs; - return data_opts->rewrite_ptrs != 0; - } - -@@ -338,9 +522,9 @@ static int do_rebalance(struct moving_context *ctxt) - BTREE_ITER_all_snapshots); - - while (!bch2_move_ratelimit(ctxt)) { -- if (!r->enabled) { -+ if (!c->opts.rebalance_enabled) { - bch2_moving_ctxt_flush_all(ctxt); -- kthread_wait_freezable(r->enabled || -+ kthread_wait_freezable(c->opts.rebalance_enabled || - kthread_should_stop()); - } - -diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h -index 28a52638f16c..0a0821ab895d 100644 ---- a/fs/bcachefs/rebalance.h -+++ b/fs/bcachefs/rebalance.h -@@ -2,8 +2,18 @@ - #ifndef _BCACHEFS_REBALANCE_H - #define _BCACHEFS_REBALANCE_H - -+#include "compress.h" -+#include "disk_groups.h" - #include "rebalance_types.h" - -+u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); -+int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *); -+int bch2_get_update_rebalance_opts(struct btree_trans *, -+ struct bch_io_opts *, -+ struct btree_iter *, -+ struct bkey_s_c); -+ -+int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); - int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); - int bch2_set_fs_needs_rebalance(struct bch_fs *); - -diff --git a/fs/bcachefs/rebalance_format.h b/fs/bcachefs/rebalance_format.h -new file mode 100644 -index 000000000000..ff9a1342a22b ---- /dev/null -+++ b/fs/bcachefs/rebalance_format.h -@@ -0,0 +1,53 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_REBALANCE_FORMAT_H -+#define _BCACHEFS_REBALANCE_FORMAT_H -+ -+struct bch_extent_rebalance { -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u64 type:6, -+ unused:3, -+ -+ promote_target_from_inode:1, -+ erasure_code_from_inode:1, -+ data_checksum_from_inode:1, -+ background_compression_from_inode:1, -+ data_replicas_from_inode:1, -+ background_target_from_inode:1, -+ -+ promote_target:16, -+ erasure_code:1, -+ data_checksum:4, -+ data_replicas:4, -+ background_compression:8, /* enum bch_compression_opt */ -+ background_target:16; -+#elif defined (__BIG_ENDIAN_BITFIELD) -+ __u64 background_target:16, -+ background_compression:8, -+ data_replicas:4, -+ data_checksum:4, -+ erasure_code:1, -+ promote_target:16, -+ -+ background_target_from_inode:1, -+ data_replicas_from_inode:1, -+ background_compression_from_inode:1, -+ data_checksum_from_inode:1, -+ erasure_code_from_inode:1, -+ promote_target_from_inode:1, -+ -+ unused:3, -+ type:6; -+#endif -+}; -+ -+/* subset of BCH_INODE_OPTS */ -+#define BCH_REBALANCE_OPTS() \ -+ x(data_checksum) \ -+ x(background_compression) \ -+ x(data_replicas) \ -+ x(promote_target) \ -+ x(background_target) \ -+ x(erasure_code) -+ -+#endif /* _BCACHEFS_REBALANCE_FORMAT_H */ -+ -diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h -index 0fffb536c1d0..fe5098c17dfc 100644 ---- a/fs/bcachefs/rebalance_types.h -+++ b/fs/bcachefs/rebalance_types.h -@@ -30,8 +30,6 @@ struct bch_fs_rebalance { - struct bbpos scan_start; - struct bbpos scan_end; - struct bch_move_stats scan_stats; -- -- unsigned enabled:1; - }; - - #endif /* _BCACHEFS_REBALANCE_TYPES_H */ -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -index 3c7f941dde39..98825437381c 100644 ---- a/fs/bcachefs/recovery.c -+++ b/fs/bcachefs/recovery.c -@@ -34,21 +34,83 @@ - - #define QSTR(n) { { { .len = strlen(n) } }, .name = n } - --void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) -+int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) - { -- if (btree >= BTREE_ID_NR_MAX) -- return; -- - u64 b = BIT_ULL(btree); -+ int ret = 0; -+ -+ mutex_lock(&c->sb_lock); -+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - - if (!(c->sb.btrees_lost_data & b)) { -- bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree)); -+ struct printbuf buf = PRINTBUF; -+ bch2_btree_id_to_text(&buf, btree); -+ bch_err(c, "flagging btree %s lost data", buf.buf); -+ printbuf_exit(&buf); -+ ext->btrees_lost_data |= cpu_to_le64(b); -+ } - -- mutex_lock(&c->sb_lock); -- bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); -- bch2_write_super(c); -- mutex_unlock(&c->sb_lock); -+ /* Once we have runtime self healing for topology errors we won't need this: */ -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; -+ -+ /* Btree node accounting will be off: */ -+ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ /* -+ * These are much more minor, and don't need to be corrected right away, -+ * but in debug mode we want the next fsck run to be clean: -+ */ -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; -+#endif -+ -+ switch (btree) { -+ case BTREE_ID_alloc: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); -+ goto out; -+ case BTREE_ID_backpointers: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; -+ goto out; -+ case BTREE_ID_need_discard: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ goto out; -+ case BTREE_ID_freespace: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ goto out; -+ case BTREE_ID_bucket_gens: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ goto out; -+ case BTREE_ID_lru: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; -+ goto out; -+ case BTREE_ID_accounting: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; -+ goto out; -+ default: -+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; -+ goto out; - } -+out: -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ -+ return ret; -+} -+ -+static void kill_btree(struct bch_fs *c, enum btree_id btree) -+{ -+ bch2_btree_id_root(c, btree)->alive = false; -+ bch2_shoot_down_journal_keys(c, btree, 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - } - - /* for -o reconstruct_alloc: */ -@@ -79,6 +141,8 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) - __set_bit_le64(BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_fs_usage_replicas_wrong, ext->errors_silent); - -+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_to_missing_lru_entry, ext->errors_silent); -+ - __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); -@@ -99,16 +163,9 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - -- bch2_shoot_down_journal_keys(c, BTREE_ID_alloc, -- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); -- bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers, -- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); -- bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard, -- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); -- bch2_shoot_down_journal_keys(c, BTREE_ID_freespace, -- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); -- bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens, -- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); -+ for (unsigned i = 0; i < btree_id_nr_alive(c); i++) -+ if (btree_id_is_alloc(i)) -+ kill_btree(c, i); - } - - /* -@@ -354,10 +411,13 @@ int bch2_journal_replay(struct bch_fs *c) - ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim - : 0), - bch2_journal_replay_key(trans, k)); -- bch_err_msg(c, ret, "while replaying key at btree %s level %u:", -- bch2_btree_id_str(k->btree_id), k->level); -- if (ret) -+ if (ret) { -+ struct printbuf buf = PRINTBUF; -+ bch2_btree_id_level_to_text(&buf, k->btree_id, k->level); -+ bch_err_msg(c, ret, "while replaying key at %s:", buf.buf); -+ printbuf_exit(&buf); - goto err; -+ } - - BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten); - } -@@ -403,7 +463,9 @@ static int journal_replay_entry_early(struct bch_fs *c, - - switch (entry->type) { - case BCH_JSET_ENTRY_btree_root: { -- struct btree_root *r; -+ -+ if (unlikely(!entry->u64s)) -+ return 0; - - if (fsck_err_on(entry->btree_id >= BTREE_ID_NR_MAX, - c, invalid_btree_id, -@@ -417,15 +479,11 @@ static int journal_replay_entry_early(struct bch_fs *c, - return ret; - } - -- r = bch2_btree_id_root(c, entry->btree_id); -+ struct btree_root *r = bch2_btree_id_root(c, entry->btree_id); - -- if (entry->u64s) { -- r->level = entry->level; -- bkey_copy(&r->key, (struct bkey_i *) entry->start); -- r->error = 0; -- } else { -- r->error = -BCH_ERR_btree_node_read_error; -- } -+ r->level = entry->level; -+ bkey_copy(&r->key, (struct bkey_i *) entry->start); -+ r->error = 0; - r->alive = true; - break; - } -@@ -505,6 +563,7 @@ static int journal_replay_early(struct bch_fs *c, - - static int read_btree_roots(struct bch_fs *c) - { -+ struct printbuf buf = PRINTBUF; - int ret = 0; - - for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { -@@ -513,33 +572,22 @@ static int read_btree_roots(struct bch_fs *c) - if (!r->alive) - continue; - -- if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc) -- continue; -+ printbuf_reset(&buf); -+ bch2_btree_id_level_to_text(&buf, i, r->level); - - if (mustfix_fsck_err_on((ret = r->error), - c, btree_root_bkey_invalid, - "invalid btree root %s", -- bch2_btree_id_str(i)) || -+ buf.buf) || - mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)), - c, btree_root_read_error, -- "error reading btree root %s l=%u: %s", -- bch2_btree_id_str(i), r->level, bch2_err_str(ret))) { -- if (btree_id_is_alloc(i)) { -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations); -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info); -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus); -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs); -- c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); -+ "error reading btree root %s: %s", -+ buf.buf, bch2_err_str(ret))) { -+ if (btree_id_is_alloc(i)) - r->error = 0; -- } else if (!(c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) { -- bch_info(c, "will run btree node scan"); -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes); -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); -- } - -- ret = 0; -- bch2_btree_lost_data(c, i); -+ ret = bch2_btree_lost_data(c, i); -+ BUG_ON(ret); - } - } - -@@ -553,6 +601,7 @@ static int read_btree_roots(struct bch_fs *c) - } - } - fsck_err: -+ printbuf_exit(&buf); - return ret; - } - -@@ -563,6 +612,7 @@ static bool check_version_upgrade(struct bch_fs *c) - bch2_latest_compatible_version(c->sb.version)); - unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; - unsigned new_version = 0; -+ bool ret = false; - - if (old_version < bcachefs_metadata_required_upgrade_below) { - if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || -@@ -618,14 +668,32 @@ static bool check_version_upgrade(struct bch_fs *c) - } - - bch_info(c, "%s", buf.buf); -+ printbuf_exit(&buf); - -- bch2_sb_upgrade(c, new_version); -+ ret = true; -+ } - -+ if (new_version > c->sb.version_incompat && -+ c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { -+ struct printbuf buf = PRINTBUF; -+ -+ prt_str(&buf, "Now allowing incompatible features up to "); -+ bch2_version_to_text(&buf, new_version); -+ prt_str(&buf, ", previously allowed up to "); -+ bch2_version_to_text(&buf, c->sb.version_incompat_allowed); -+ prt_newline(&buf); -+ -+ bch_info(c, "%s", buf.buf); - printbuf_exit(&buf); -- return true; -+ -+ ret = true; - } - -- return false; -+ if (ret) -+ bch2_sb_upgrade(c, new_version, -+ c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible); -+ -+ return ret; - } - - int bch2_fs_recovery(struct bch_fs *c) -@@ -660,8 +728,13 @@ int bch2_fs_recovery(struct bch_fs *c) - goto err; - } - -- if (c->opts.norecovery) -- c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1; -+ if (c->opts.norecovery) { -+ c->opts.recovery_pass_last = c->opts.recovery_pass_last -+ ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read) -+ : BCH_RECOVERY_PASS_snapshots_read; -+ c->opts.nochanges = true; -+ c->opts.read_only = true; -+ } - - mutex_lock(&c->sb_lock); - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); -@@ -708,17 +781,20 @@ int bch2_fs_recovery(struct bch_fs *c) - - c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); - -+ if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors) { -+ SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe); -+ write_sb = true; -+ } -+ - if (write_sb) - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - -- if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) -- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); -- - if (c->opts.fsck) - set_bit(BCH_FS_fsck_running, &c->flags); - if (c->sb.clean) - set_bit(BCH_FS_clean_recovery, &c->flags); -+ set_bit(BCH_FS_recovery_running, &c->flags); - - ret = bch2_blacklist_table_initialize(c); - if (ret) { -@@ -807,15 +883,15 @@ int bch2_fs_recovery(struct bch_fs *c) - c->journal_replay_seq_start = last_seq; - c->journal_replay_seq_end = blacklist_seq - 1; - -- if (c->opts.reconstruct_alloc) -- bch2_reconstruct_alloc(c); -- - zero_out_btree_mem_ptr(&c->journal_keys); - - ret = journal_replay_early(c, clean); - if (ret) - goto err; - -+ if (c->opts.reconstruct_alloc) -+ bch2_reconstruct_alloc(c); -+ - /* - * After an unclean shutdown, skip then next few journal sequence - * numbers as they may have been referenced by btree writes that -@@ -870,16 +946,17 @@ int bch2_fs_recovery(struct bch_fs *c) - */ - set_bit(BCH_FS_may_go_rw, &c->flags); - clear_bit(BCH_FS_fsck_running, &c->flags); -+ clear_bit(BCH_FS_recovery_running, &c->flags); - - /* in case we don't run journal replay, i.e. norecovery mode */ - set_bit(BCH_FS_accounting_replay_done, &c->flags); - -+ bch2_async_btree_node_rewrites_flush(c); -+ - /* fsync if we fixed errors */ -- if (test_bit(BCH_FS_errors_fixed, &c->flags) && -- bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) { -+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) { - bch2_journal_flush_all_pins(&c->journal); - bch2_journal_meta(&c->journal); -- bch2_write_ref_put(c, BCH_WRITE_REF_fsync); - } - - /* If we fixed errors, verify that fs is actually clean now: */ -@@ -1021,7 +1098,7 @@ int bch2_fs_initialize(struct bch_fs *c) - bch2_check_version_downgrade(c); - - if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { -- bch2_sb_upgrade(c, bcachefs_metadata_version_current); -+ bch2_sb_upgrade(c, bcachefs_metadata_version_current, false); - SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); - bch2_write_super(c); - } -@@ -1035,7 +1112,6 @@ int bch2_fs_initialize(struct bch_fs *c) - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - -- c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; - set_bit(BCH_FS_btree_running, &c->flags); - set_bit(BCH_FS_may_go_rw, &c->flags); - -@@ -1076,9 +1152,6 @@ int bch2_fs_initialize(struct bch_fs *c) - if (ret) - goto err; - -- for_each_online_member(c, ca) -- ca->new_fs_bucket_idx = 0; -- - ret = bch2_fs_freespace_init(c); - if (ret) - goto err; -@@ -1137,6 +1210,7 @@ int bch2_fs_initialize(struct bch_fs *c) - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - -+ c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; - return 0; - err: - bch_err_fn(c, ret); -diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h -index 4bf818de1f2f..b0d55754b21b 100644 ---- a/fs/bcachefs/recovery.h -+++ b/fs/bcachefs/recovery.h -@@ -2,7 +2,7 @@ - #ifndef _BCACHEFS_RECOVERY_H - #define _BCACHEFS_RECOVERY_H - --void bch2_btree_lost_data(struct bch_fs *, enum btree_id); -+int bch2_btree_lost_data(struct bch_fs *, enum btree_id); - - int bch2_journal_replay(struct bch_fs *); - -diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c -index dff589ddc984..0b3c951c32da 100644 ---- a/fs/bcachefs/recovery_passes.c -+++ b/fs/bcachefs/recovery_passes.c -@@ -46,7 +46,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c) - - set_bit(BCH_FS_may_go_rw, &c->flags); - -- if (keys->nr || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes) -+ if (keys->nr || !c->opts.read_only || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes) - return bch2_fs_read_write_early(c); - return 0; - } -@@ -100,20 +100,34 @@ u64 bch2_recovery_passes_from_stable(u64 v) - /* - * For when we need to rewind recovery passes and run a pass we skipped: - */ --int bch2_run_explicit_recovery_pass(struct bch_fs *c, -- enum bch_recovery_pass pass) -+static int __bch2_run_explicit_recovery_pass(struct bch_fs *c, -+ enum bch_recovery_pass pass) - { -- if (c->opts.recovery_passes & BIT_ULL(pass)) -+ if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) -+ return -BCH_ERR_not_in_recovery; -+ -+ if (c->recovery_passes_complete & BIT_ULL(pass)) - return 0; - -- bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", -- bch2_recovery_passes[pass], pass, -- bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); -+ bool print = !(c->opts.recovery_passes & BIT_ULL(pass)); -+ -+ if (pass < BCH_RECOVERY_PASS_set_may_go_rw && -+ c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { -+ if (print) -+ bch_info(c, "need recovery pass %s (%u), but already rw", -+ bch2_recovery_passes[pass], pass); -+ return -BCH_ERR_cannot_rewind_recovery; -+ } -+ -+ if (print) -+ bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", -+ bch2_recovery_passes[pass], pass, -+ bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); - - c->opts.recovery_passes |= BIT_ULL(pass); - -- if (c->curr_recovery_pass >= pass) { -- c->curr_recovery_pass = pass; -+ if (c->curr_recovery_pass > pass) { -+ c->next_recovery_pass = pass; - c->recovery_passes_complete &= (1ULL << pass) >> 1; - return -BCH_ERR_restart_recovery; - } else { -@@ -121,6 +135,27 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, - } - } - -+int bch2_run_explicit_recovery_pass(struct bch_fs *c, -+ enum bch_recovery_pass pass) -+{ -+ unsigned long flags; -+ spin_lock_irqsave(&c->recovery_pass_lock, flags); -+ int ret = __bch2_run_explicit_recovery_pass(c, pass); -+ spin_unlock_irqrestore(&c->recovery_pass_lock, flags); -+ return ret; -+} -+ -+int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, -+ enum bch_recovery_pass pass) -+{ -+ lockdep_assert_held(&c->sb_lock); -+ -+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); -+ __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); -+ -+ return bch2_run_explicit_recovery_pass(c, pass); -+} -+ - int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, - enum bch_recovery_pass pass) - { -@@ -233,31 +268,48 @@ int bch2_run_recovery_passes(struct bch_fs *c) - */ - c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; - -- while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { -- if (c->opts.recovery_pass_last && -- c->curr_recovery_pass > c->opts.recovery_pass_last) -- break; -- -- if (should_run_recovery_pass(c, c->curr_recovery_pass)) { -- unsigned pass = c->curr_recovery_pass; -+ while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { -+ c->next_recovery_pass = c->curr_recovery_pass + 1; - -- ret = bch2_run_recovery_pass(c, c->curr_recovery_pass) ?: -- bch2_journal_flush(&c->journal); -- if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || -- (ret && c->curr_recovery_pass < pass)) -- continue; -- if (ret) -- break; -+ spin_lock_irq(&c->recovery_pass_lock); -+ unsigned pass = c->curr_recovery_pass; - -- c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); -+ if (c->opts.recovery_pass_last && -+ c->curr_recovery_pass > c->opts.recovery_pass_last) { -+ spin_unlock_irq(&c->recovery_pass_lock); -+ break; - } - -- c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); -- -- if (!test_bit(BCH_FS_error, &c->flags)) -- bch2_clear_recovery_pass_required(c, c->curr_recovery_pass); -- -- c->curr_recovery_pass++; -+ if (!should_run_recovery_pass(c, pass)) { -+ c->curr_recovery_pass++; -+ c->recovery_pass_done = max(c->recovery_pass_done, pass); -+ spin_unlock_irq(&c->recovery_pass_lock); -+ continue; -+ } -+ spin_unlock_irq(&c->recovery_pass_lock); -+ -+ ret = bch2_run_recovery_pass(c, pass) ?: -+ bch2_journal_flush(&c->journal); -+ -+ if (!ret && !test_bit(BCH_FS_error, &c->flags)) -+ bch2_clear_recovery_pass_required(c, pass); -+ -+ spin_lock_irq(&c->recovery_pass_lock); -+ if (c->next_recovery_pass < c->curr_recovery_pass) { -+ /* -+ * bch2_run_explicit_recovery_pass() was called: we -+ * can't always catch -BCH_ERR_restart_recovery because -+ * it may have been called from another thread (btree -+ * node read completion) -+ */ -+ ret = 0; -+ c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); -+ } else { -+ c->recovery_passes_complete |= BIT_ULL(pass); -+ c->recovery_pass_done = max(c->recovery_pass_done, pass); -+ } -+ c->curr_recovery_pass = c->next_recovery_pass; -+ spin_unlock_irq(&c->recovery_pass_lock); - } - - return ret; -diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h -index 99b464e127b8..7d7339c8fa29 100644 ---- a/fs/bcachefs/recovery_passes.h -+++ b/fs/bcachefs/recovery_passes.h -@@ -9,6 +9,7 @@ u64 bch2_recovery_passes_from_stable(u64 v); - u64 bch2_fsck_recovery_passes(void); - - int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); -+int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass); - int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); - - int bch2_run_online_recovery_passes(struct bch_fs *); -diff --git a/fs/bcachefs/recovery_passes_types.h b/fs/bcachefs/recovery_passes_types.h -index 94dc20ca2065..418557960ed6 100644 ---- a/fs/bcachefs/recovery_passes_types.h -+++ b/fs/bcachefs/recovery_passes_types.h -@@ -8,53 +8,59 @@ - #define PASS_ALWAYS BIT(3) - #define PASS_ONLINE BIT(4) - -+#ifdef CONFIG_BCACHEFS_DEBUG -+#define PASS_FSCK_DEBUG BIT(1) -+#else -+#define PASS_FSCK_DEBUG 0 -+#endif -+ - /* - * Passes may be reordered, but the second field is a persistent identifier and - * must never change: - */ --#define BCH_RECOVERY_PASSES() \ -- x(recovery_pass_empty, 41, PASS_SILENT) \ -- x(scan_for_btree_nodes, 37, 0) \ -- x(check_topology, 4, 0) \ -- x(accounting_read, 39, PASS_ALWAYS) \ -- x(alloc_read, 0, PASS_ALWAYS) \ -- x(stripes_read, 1, PASS_ALWAYS) \ -- x(initialize_subvolumes, 2, 0) \ -- x(snapshots_read, 3, PASS_ALWAYS) \ -- x(check_allocations, 5, PASS_FSCK) \ -- x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ -- x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ -- x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ -- x(journal_replay, 9, PASS_ALWAYS) \ -- x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \ -- x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \ -- x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \ -- x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK) \ -- x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \ -- x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ -- x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ -- x(bucket_gens_init, 17, 0) \ -- x(reconstruct_snapshots, 38, 0) \ -- x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ -- x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ -- x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ -- x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \ -- x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \ -- x(fs_upgrade_for_subvolumes, 22, 0) \ -- x(check_inodes, 24, PASS_FSCK) \ -- x(check_extents, 25, PASS_FSCK) \ -- x(check_indirect_extents, 26, PASS_FSCK) \ -- x(check_dirents, 27, PASS_FSCK) \ -- x(check_xattrs, 28, PASS_FSCK) \ -- x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ -- x(check_unreachable_inodes, 40, PASS_ONLINE|PASS_FSCK) \ -- x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ -- x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ -- x(check_nlinks, 31, PASS_FSCK) \ -- x(resume_logged_ops, 23, PASS_ALWAYS) \ -- x(delete_dead_inodes, 32, PASS_ALWAYS) \ -- x(fix_reflink_p, 33, 0) \ -- x(set_fs_needs_rebalance, 34, 0) \ -+#define BCH_RECOVERY_PASSES() \ -+ x(recovery_pass_empty, 41, PASS_SILENT) \ -+ x(scan_for_btree_nodes, 37, 0) \ -+ x(check_topology, 4, 0) \ -+ x(accounting_read, 39, PASS_ALWAYS) \ -+ x(alloc_read, 0, PASS_ALWAYS) \ -+ x(stripes_read, 1, PASS_ALWAYS) \ -+ x(initialize_subvolumes, 2, 0) \ -+ x(snapshots_read, 3, PASS_ALWAYS) \ -+ x(check_allocations, 5, PASS_FSCK) \ -+ x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ -+ x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ -+ x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ -+ x(journal_replay, 9, PASS_ALWAYS) \ -+ x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \ -+ x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \ -+ x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \ -+ x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK_DEBUG) \ -+ x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \ -+ x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ -+ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ -+ x(bucket_gens_init, 17, 0) \ -+ x(reconstruct_snapshots, 38, 0) \ -+ x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ -+ x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ -+ x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ -+ x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \ -+ x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \ -+ x(fs_upgrade_for_subvolumes, 22, 0) \ -+ x(check_inodes, 24, PASS_FSCK) \ -+ x(check_extents, 25, PASS_FSCK) \ -+ x(check_indirect_extents, 26, PASS_ONLINE|PASS_FSCK) \ -+ x(check_dirents, 27, PASS_FSCK) \ -+ x(check_xattrs, 28, PASS_FSCK) \ -+ x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ -+ x(check_unreachable_inodes, 40, PASS_FSCK) \ -+ x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ -+ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ -+ x(check_nlinks, 31, PASS_FSCK) \ -+ x(resume_logged_ops, 23, PASS_ALWAYS) \ -+ x(delete_dead_inodes, 32, PASS_ALWAYS) \ -+ x(fix_reflink_p, 33, 0) \ -+ x(set_fs_needs_rebalance, 34, 0) - - /* We normally enumerate recovery passes in the order we run them: */ - enum bch_recovery_pass { -diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c -index f457925fa362..93ba4f4e47ca 100644 ---- a/fs/bcachefs/reflink.c -+++ b/fs/bcachefs/reflink.c -@@ -15,6 +15,17 @@ - - #include - -+static inline bool bkey_extent_is_reflink_data(const struct bkey *k) -+{ -+ switch (k->type) { -+ case KEY_TYPE_reflink_v: -+ case KEY_TYPE_indirect_inline_data: -+ return true; -+ default: -+ return false; -+ } -+} -+ - static inline unsigned bkey_type_to_indirect(const struct bkey *k) - { - switch (k->type) { -@@ -30,15 +41,15 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k) - /* reflink pointers */ - - int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - int ret = 0; - -- bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), -+ bkey_fsck_err_on(REFLINK_P_IDX(p.v) < le32_to_cpu(p.v->front_pad), - c, reflink_p_front_pad_bad, - "idx < front_pad (%llu < %u)", -- le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); -+ REFLINK_P_IDX(p.v), le32_to_cpu(p.v->front_pad)); - fsck_err: - return ret; - } -@@ -49,7 +60,7 @@ void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - - prt_printf(out, "idx %llu front_pad %u back_pad %u", -- le64_to_cpu(p.v->idx), -+ REFLINK_P_IDX(p.v), - le32_to_cpu(p.v->front_pad), - le32_to_cpu(p.v->back_pad)); - } -@@ -65,49 +76,250 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r - */ - return false; - -- if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx)) -+ if (REFLINK_P_IDX(l.v) + l.k->size != REFLINK_P_IDX(r.v)) -+ return false; -+ -+ if (REFLINK_P_ERROR(l.v) != REFLINK_P_ERROR(r.v)) - return false; - - bch2_key_resize(l.k, l.k->size + r.k->size); - return true; - } - -+/* indirect extents */ -+ -+int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, -+ struct bkey_validate_context from) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, REFLINK_P_IDX_MAX)), -+ c, reflink_v_pos_bad, -+ "indirect extent above maximum position 0:%llu", -+ REFLINK_P_IDX_MAX); -+ -+ ret = bch2_bkey_ptrs_validate(c, k, from); -+fsck_err: -+ return ret; -+} -+ -+void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k); -+ -+ prt_printf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount)); -+ -+ bch2_bkey_ptrs_to_text(out, c, k); -+} -+ -+#if 0 -+Currently disabled, needs to be debugged: -+ -+bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) -+{ -+ struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l); -+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(_r); -+ -+ return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r); -+} -+#endif -+ -+/* indirect inline data */ -+ -+int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, -+ struct bkey_validate_context from) -+{ -+ return 0; -+} -+ -+void bch2_indirect_inline_data_to_text(struct printbuf *out, -+ struct bch_fs *c, struct bkey_s_c k) -+{ -+ struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k); -+ unsigned datalen = bkey_inline_data_bytes(k.k); -+ -+ prt_printf(out, "refcount %llu datalen %u: %*phN", -+ le64_to_cpu(d.v->refcount), datalen, -+ min(datalen, 32U), d.v->data); -+} -+ -+/* lookup */ -+ -+static int bch2_indirect_extent_not_missing(struct btree_trans *trans, struct bkey_s_c_reflink_p p, -+ bool should_commit) -+{ -+ struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p); -+ int ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ return ret; -+ -+ SET_REFLINK_P_ERROR(&new->v, false); -+ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun); -+ if (ret) -+ return ret; -+ -+ if (!should_commit) -+ return 0; -+ -+ return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -+ -BCH_ERR_transaction_restart_nested; -+} -+ -+static int bch2_indirect_extent_missing_error(struct btree_trans *trans, -+ struct bkey_s_c_reflink_p p, -+ u64 missing_start, u64 missing_end, -+ bool should_commit) -+{ -+ if (REFLINK_P_ERROR(p.v)) -+ return -BCH_ERR_missing_indirect_extent; -+ -+ struct bch_fs *c = trans->c; -+ u64 live_start = REFLINK_P_IDX(p.v); -+ u64 live_end = REFLINK_P_IDX(p.v) + p.k->size; -+ u64 refd_start = live_start - le32_to_cpu(p.v->front_pad); -+ u64 refd_end = live_end + le32_to_cpu(p.v->back_pad); -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ BUG_ON(missing_start < refd_start); -+ BUG_ON(missing_end > refd_end); -+ -+ if (fsck_err(trans, reflink_p_to_missing_reflink_v, -+ "pointer to missing indirect extent\n" -+ " %s\n" -+ " missing range %llu-%llu", -+ (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), -+ missing_start, missing_end)) { -+ struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p); -+ ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ goto err; -+ -+ /* -+ * Is the missing range not actually needed? -+ * -+ * p.v->idx refers to the data that we actually want, but if the -+ * indirect extent we point to was bigger, front_pad and back_pad -+ * indicate the range we took a reference on. -+ */ -+ -+ if (missing_end <= live_start) { -+ new->v.front_pad = cpu_to_le32(live_start - missing_end); -+ } else if (missing_start >= live_end) { -+ new->v.back_pad = cpu_to_le32(missing_start - live_end); -+ } else { -+ struct bpos new_start = bkey_start_pos(&new->k); -+ struct bpos new_end = new->k.p; -+ -+ if (missing_start > live_start) -+ new_start.offset += missing_start - live_start; -+ if (missing_end < live_end) -+ new_end.offset -= live_end - missing_end; -+ -+ bch2_cut_front(new_start, &new->k_i); -+ bch2_cut_back(new_end, &new->k_i); -+ -+ SET_REFLINK_P_ERROR(&new->v, true); -+ } -+ -+ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun); -+ if (ret) -+ goto err; -+ -+ if (should_commit) -+ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -+ -BCH_ERR_transaction_restart_nested; -+ } -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+/* -+ * This is used from the read path, which doesn't expect to have to do a -+ * transaction commit, and from triggers, which should not be doing a commit: -+ */ -+struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans, -+ struct btree_iter *iter, -+ s64 *offset_into_extent, -+ struct bkey_s_c_reflink_p p, -+ bool should_commit, -+ unsigned iter_flags) -+{ -+ BUG_ON(*offset_into_extent < -((s64) le32_to_cpu(p.v->front_pad))); -+ BUG_ON(*offset_into_extent >= p.k->size + le32_to_cpu(p.v->back_pad)); -+ -+ u64 reflink_offset = REFLINK_P_IDX(p.v) + *offset_into_extent; -+ -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_reflink, -+ POS(0, reflink_offset), iter_flags); -+ if (bkey_err(k)) -+ return k; -+ -+ if (unlikely(!bkey_extent_is_reflink_data(k.k))) { -+ bch2_trans_iter_exit(trans, iter); -+ -+ unsigned size = min((u64) k.k->size, -+ REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad) - -+ reflink_offset); -+ bch2_key_resize(&iter->k, size); -+ -+ int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset, -+ k.k->p.offset, should_commit); -+ if (ret) -+ return bkey_s_c_err(ret); -+ } else if (unlikely(REFLINK_P_ERROR(p.v))) { -+ bch2_trans_iter_exit(trans, iter); -+ -+ int ret = bch2_indirect_extent_not_missing(trans, p, should_commit); -+ if (ret) -+ return bkey_s_c_err(ret); -+ } -+ -+ *offset_into_extent = reflink_offset - bkey_start_offset(k.k); -+ return k; -+} -+ -+/* reflink pointer trigger */ -+ - static int trans_trigger_reflink_p_segment(struct btree_trans *trans, - struct bkey_s_c_reflink_p p, u64 *idx, - enum btree_iter_update_trigger_flags flags) - { - struct bch_fs *c = trans->c; -- struct btree_iter iter; -- struct bkey_i *k; -- __le64 *refcount; -- int add = !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1; - struct printbuf buf = PRINTBUF; -- int ret; - -- k = bch2_bkey_get_mut_noupdate(trans, &iter, -- BTREE_ID_reflink, POS(0, *idx), -- BTREE_ITER_with_updates); -- ret = PTR_ERR_OR_ZERO(k); -+ s64 offset_into_extent = *idx - REFLINK_P_IDX(p.v); -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_lookup_indirect_extent(trans, &iter, &offset_into_extent, p, false, -+ BTREE_ITER_intent| -+ BTREE_ITER_with_updates); -+ int ret = bkey_err(k); - if (ret) -- goto err; -+ return ret; - -- refcount = bkey_refcount(bkey_i_to_s(k)); -- if (!refcount) { -- bch2_bkey_val_to_text(&buf, c, p.s_c); -- bch2_trans_inconsistent(trans, -- "nonexistent indirect extent at %llu while marking\n %s", -- *idx, buf.buf); -- ret = -EIO; -- goto err; -+ if (bkey_deleted(k.k)) { -+ if (!(flags & BTREE_TRIGGER_overwrite)) -+ ret = -BCH_ERR_missing_indirect_extent; -+ goto next; - } - -+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); -+ ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ goto err; -+ -+ __le64 *refcount = bkey_refcount(bkey_i_to_s(new)); - if (!*refcount && (flags & BTREE_TRIGGER_overwrite)) { - bch2_bkey_val_to_text(&buf, c, p.s_c); -- bch2_trans_inconsistent(trans, -- "indirect extent refcount underflow at %llu while marking\n %s", -- *idx, buf.buf); -- ret = -EIO; -- goto err; -+ prt_printf(&buf, "\n "); -+ bch2_bkey_val_to_text(&buf, c, k); -+ log_fsck_err(trans, reflink_refcount_underflow, -+ "indirect extent refcount underflow while marking\n %s", -+ buf.buf); -+ goto next; - } - - if (flags & BTREE_TRIGGER_insert) { -@@ -115,25 +327,26 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, - u64 pad; - - pad = max_t(s64, le32_to_cpu(v->front_pad), -- le64_to_cpu(v->idx) - bkey_start_offset(&k->k)); -+ REFLINK_P_IDX(v) - bkey_start_offset(&new->k)); - BUG_ON(pad > U32_MAX); - v->front_pad = cpu_to_le32(pad); - - pad = max_t(s64, le32_to_cpu(v->back_pad), -- k->k.p.offset - p.k->size - le64_to_cpu(v->idx)); -+ new->k.p.offset - p.k->size - REFLINK_P_IDX(v)); - BUG_ON(pad > U32_MAX); - v->back_pad = cpu_to_le32(pad); - } - -- le64_add_cpu(refcount, add); -+ le64_add_cpu(refcount, !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1); - - bch2_btree_iter_set_pos_to_extent_start(&iter); -- ret = bch2_trans_update(trans, &iter, k, 0); -+ ret = bch2_trans_update(trans, &iter, new, 0); - if (ret) - goto err; -- -- *idx = k->k.p.offset; -+next: -+ *idx = k.k->p.offset; - err: -+fsck_err: - bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); - return ret; -@@ -147,9 +360,7 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, - struct bch_fs *c = trans->c; - struct reflink_gc *r; - int add = !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1; -- u64 start = le64_to_cpu(p.v->idx); -- u64 end = le64_to_cpu(p.v->idx) + p.k->size; -- u64 next_idx = end + le32_to_cpu(p.v->back_pad); -+ u64 next_idx = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad); - s64 ret = 0; - struct printbuf buf = PRINTBUF; - -@@ -168,36 +379,14 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, - *idx = r->offset; - return 0; - not_found: -- BUG_ON(!(flags & BTREE_TRIGGER_check_repair)); -- -- if (fsck_err(trans, reflink_p_to_missing_reflink_v, -- "pointer to missing indirect extent\n" -- " %s\n" -- " missing range %llu-%llu", -- (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), -- *idx, next_idx)) { -- struct bkey_i *update = bch2_bkey_make_mut_noupdate(trans, p.s_c); -- ret = PTR_ERR_OR_ZERO(update); -+ if (flags & BTREE_TRIGGER_check_repair) { -+ ret = bch2_indirect_extent_missing_error(trans, p, *idx, next_idx, false); - if (ret) - goto err; -- -- if (next_idx <= start) { -- bkey_i_to_reflink_p(update)->v.front_pad = cpu_to_le32(start - next_idx); -- } else if (*idx >= end) { -- bkey_i_to_reflink_p(update)->v.back_pad = cpu_to_le32(*idx - end); -- } else { -- bkey_error_init(update); -- update->k.p = p.k->p; -- update->k.size = p.k->size; -- set_bkey_val_u64s(&update->k, 0); -- } -- -- ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, update, BTREE_TRIGGER_norun); - } - - *idx = next_idx; - err: --fsck_err: - printbuf_exit(&buf); - return ret; - } -@@ -210,8 +399,8 @@ static int __trigger_reflink_p(struct btree_trans *trans, - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - int ret = 0; - -- u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); -- u64 end = le64_to_cpu(p.v->idx) + p.k->size + le32_to_cpu(p.v->back_pad); -+ u64 idx = REFLINK_P_IDX(p.v) - le32_to_cpu(p.v->front_pad); -+ u64 end = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad); - - if (flags & BTREE_TRIGGER_transactional) { - while (idx < end && !ret) -@@ -253,35 +442,7 @@ int bch2_trigger_reflink_p(struct btree_trans *trans, - return trigger_run_overwrite_then_insert(__trigger_reflink_p, trans, btree_id, level, old, new, flags); - } - --/* indirect extents */ -- --int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) --{ -- return bch2_bkey_ptrs_validate(c, k, flags); --} -- --void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, -- struct bkey_s_c k) --{ -- struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k); -- -- prt_printf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount)); -- -- bch2_bkey_ptrs_to_text(out, c, k); --} -- --#if 0 --Currently disabled, needs to be debugged: -- --bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) --{ -- struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l); -- struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(_r); -- -- return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r); --} --#endif -+/* indirect extent trigger */ - - static inline void - check_indirect_extent_deleting(struct bkey_s new, -@@ -307,25 +468,6 @@ int bch2_trigger_reflink_v(struct btree_trans *trans, - return bch2_trigger_extent(trans, btree_id, level, old, new, flags); - } - --/* indirect inline data */ -- --int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) --{ -- return 0; --} -- --void bch2_indirect_inline_data_to_text(struct printbuf *out, -- struct bch_fs *c, struct bkey_s_c k) --{ -- struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k); -- unsigned datalen = bkey_inline_data_bytes(k.k); -- -- prt_printf(out, "refcount %llu datalen %u: %*phN", -- le64_to_cpu(d.v->refcount), datalen, -- min(datalen, 32U), d.v->data); --} -- - int bch2_trigger_indirect_inline_data(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s new, -@@ -336,9 +478,12 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *trans, - return 0; - } - -+/* create */ -+ - static int bch2_make_extent_indirect(struct btree_trans *trans, - struct btree_iter *extent_iter, -- struct bkey_i *orig) -+ struct bkey_i *orig, -+ bool reflink_p_may_update_opts_field) - { - struct bch_fs *c = trans->c; - struct btree_iter reflink_iter = { NULL }; -@@ -358,6 +503,14 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, - if (ret) - goto err; - -+ /* -+ * XXX: we're assuming that 56 bits will be enough for the life of the -+ * filesystem: we need to implement wraparound, with a cursor in the -+ * logged ops btree: -+ */ -+ if (bkey_ge(reflink_iter.pos, POS(0, REFLINK_P_IDX_MAX - orig->k.size))) -+ return -ENOSPC; -+ - r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); - ret = PTR_ERR_OR_ZERO(r_v); - if (ret) -@@ -394,7 +547,10 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, - memset(&r_p->v, 0, sizeof(r_p->v)); - #endif - -- r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k)); -+ SET_REFLINK_P_IDX(&r_p->v, bkey_start_offset(&r_v->k)); -+ -+ if (reflink_p_may_update_opts_field) -+ SET_REFLINK_P_MAY_UPDATE_OPTIONS(&r_p->v, true); - - ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, - BTREE_UPDATE_internal_snapshot_node); -@@ -409,7 +565,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) - struct bkey_s_c k; - int ret; - -- for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) { -+ for_each_btree_key_max_continue_norestart(*iter, end, 0, k, ret) { - if (bkey_extent_is_unwritten(k)) - continue; - -@@ -426,7 +582,8 @@ s64 bch2_remap_range(struct bch_fs *c, - subvol_inum dst_inum, u64 dst_offset, - subvol_inum src_inum, u64 src_offset, - u64 remap_sectors, -- u64 new_i_size, s64 *i_sectors_delta) -+ u64 new_i_size, s64 *i_sectors_delta, -+ bool may_change_src_io_path_opts) - { - struct btree_trans *trans; - struct btree_iter dst_iter, src_iter; -@@ -439,6 +596,8 @@ s64 bch2_remap_range(struct bch_fs *c, - struct bpos src_want; - u64 dst_done = 0; - u32 dst_snapshot, src_snapshot; -+ bool reflink_p_may_update_opts_field = -+ bch2_request_incompat_feature(c, bcachefs_metadata_version_reflink_p_may_update_opts); - int ret = 0, ret2 = 0; - - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink)) -@@ -520,7 +679,8 @@ s64 bch2_remap_range(struct bch_fs *c, - src_k = bkey_i_to_s_c(new_src.k); - - ret = bch2_make_extent_indirect(trans, &src_iter, -- new_src.k); -+ new_src.k, -+ reflink_p_may_update_opts_field); - if (ret) - continue; - -@@ -533,11 +693,15 @@ s64 bch2_remap_range(struct bch_fs *c, - struct bkey_i_reflink_p *dst_p = - bkey_reflink_p_init(new_dst.k); - -- u64 offset = le64_to_cpu(src_p.v->idx) + -+ u64 offset = REFLINK_P_IDX(src_p.v) + - (src_want.offset - - bkey_start_offset(src_k.k)); - -- dst_p->v.idx = cpu_to_le64(offset); -+ SET_REFLINK_P_IDX(&dst_p->v, offset); -+ -+ if (reflink_p_may_update_opts_field && -+ may_change_src_io_path_opts) -+ SET_REFLINK_P_MAY_UPDATE_OPTIONS(&dst_p->v, true); - } else { - BUG(); - } -@@ -547,7 +711,7 @@ s64 bch2_remap_range(struct bch_fs *c, - min(src_k.k->p.offset - src_want.offset, - dst_end.offset - dst_iter.pos.offset)); - -- ret = bch2_bkey_set_needs_rebalance(c, new_dst.k, &opts) ?: -+ ret = bch2_bkey_set_needs_rebalance(c, &opts, new_dst.k) ?: - bch2_extent_update(trans, dst_inum, &dst_iter, - new_dst.k, &disk_res, - new_i_size, i_sectors_delta, -@@ -591,3 +755,97 @@ s64 bch2_remap_range(struct bch_fs *c, - - return dst_done ?: ret ?: ret2; - } -+ -+/* fsck */ -+ -+static int bch2_gc_write_reflink_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ size_t *idx) -+{ -+ struct bch_fs *c = trans->c; -+ const __le64 *refcount = bkey_refcount_c(k); -+ struct printbuf buf = PRINTBUF; -+ struct reflink_gc *r; -+ int ret = 0; -+ -+ if (!refcount) -+ return 0; -+ -+ while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) && -+ r->offset < k.k->p.offset) -+ ++*idx; -+ -+ if (!r || -+ r->offset != k.k->p.offset || -+ r->size != k.k->size) { -+ bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); -+ return -EINVAL; -+ } -+ -+ if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), -+ trans, reflink_v_refcount_wrong, -+ "reflink key has wrong refcount:\n" -+ " %s\n" -+ " should be %u", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf), -+ r->refcount)) { -+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); -+ ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ goto out; -+ -+ if (!r->refcount) -+ new->k.type = KEY_TYPE_deleted; -+ else -+ *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); -+ ret = bch2_trans_update(trans, iter, new, 0); -+ } -+out: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+int bch2_gc_reflink_done(struct bch_fs *c) -+{ -+ size_t idx = 0; -+ -+ int ret = bch2_trans_run(c, -+ for_each_btree_key_commit(trans, iter, -+ BTREE_ID_reflink, POS_MIN, -+ BTREE_ITER_prefetch, k, -+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -+ bch2_gc_write_reflink_key(trans, &iter, k, &idx))); -+ c->reflink_gc_nr = 0; -+ return ret; -+} -+ -+int bch2_gc_reflink_start(struct bch_fs *c) -+{ -+ c->reflink_gc_nr = 0; -+ -+ int ret = bch2_trans_run(c, -+ for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, -+ BTREE_ITER_prefetch, k, ({ -+ const __le64 *refcount = bkey_refcount_c(k); -+ -+ if (!refcount) -+ continue; -+ -+ struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table, -+ c->reflink_gc_nr++, GFP_KERNEL); -+ if (!r) { -+ ret = -BCH_ERR_ENOMEM_gc_reflink_start; -+ break; -+ } -+ -+ r->offset = k.k->p.offset; -+ r->size = k.k->size; -+ r->refcount = 0; -+ 0; -+ }))); -+ -+ bch_err_fn(c, ret); -+ return ret; -+} -diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h -index 51afe11d8ed6..1632780bdf18 100644 ---- a/fs/bcachefs/reflink.h -+++ b/fs/bcachefs/reflink.h -@@ -2,9 +2,8 @@ - #ifndef _BCACHEFS_REFLINK_H - #define _BCACHEFS_REFLINK_H - --enum bch_validate_flags; -- --int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); - int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, -@@ -19,7 +18,8 @@ int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, - .min_val_size = 16, \ - }) - --int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, -@@ -34,7 +34,7 @@ int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, - }) - - int bch2_indirect_inline_data_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - void bch2_indirect_inline_data_to_text(struct printbuf *, - struct bch_fs *, struct bkey_s_c); - int bch2_trigger_indirect_inline_data(struct btree_trans *, -@@ -73,7 +73,15 @@ static inline __le64 *bkey_refcount(struct bkey_s k) - } - } - -+struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *, struct btree_iter *, -+ s64 *, struct bkey_s_c_reflink_p, -+ bool, unsigned); -+ - s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64, -- subvol_inum, u64, u64, u64, s64 *); -+ subvol_inum, u64, u64, u64, s64 *, -+ bool); -+ -+int bch2_gc_reflink_done(struct bch_fs *); -+int bch2_gc_reflink_start(struct bch_fs *); - - #endif /* _BCACHEFS_REFLINK_H */ -diff --git a/fs/bcachefs/reflink_format.h b/fs/bcachefs/reflink_format.h -index 6772eebb1fc6..92995e4f898e 100644 ---- a/fs/bcachefs/reflink_format.h -+++ b/fs/bcachefs/reflink_format.h -@@ -4,7 +4,7 @@ - - struct bch_reflink_p { - struct bch_val v; -- __le64 idx; -+ __le64 idx_flags; - /* - * A reflink pointer might point to an indirect extent which is then - * later split (by copygc or rebalance). If we only pointed to part of -@@ -17,6 +17,11 @@ struct bch_reflink_p { - __le32 back_pad; - } __packed __aligned(8); - -+LE64_BITMASK(REFLINK_P_IDX, struct bch_reflink_p, idx_flags, 0, 56); -+LE64_BITMASK(REFLINK_P_ERROR, struct bch_reflink_p, idx_flags, 56, 57); -+LE64_BITMASK(REFLINK_P_MAY_UPDATE_OPTIONS, -+ struct bch_reflink_p, idx_flags, 57, 58); -+ - struct bch_reflink_v { - struct bch_val v; - __le64 refcount; -diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c -index 005275281804..59c8770e4a0e 100644 ---- a/fs/bcachefs/sb-clean.c -+++ b/fs/bcachefs/sb-clean.c -@@ -23,6 +23,10 @@ - int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *clean, - int write) - { -+ struct bkey_validate_context from = { -+ .flags = write, -+ .from = BKEY_VALIDATE_superblock, -+ }; - struct jset_entry *entry; - int ret; - -@@ -40,7 +44,7 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle - ret = bch2_journal_entry_validate(c, NULL, entry, - le16_to_cpu(c->disk_sb.sb->version), - BCH_SB_BIG_ENDIAN(c->disk_sb.sb), -- write); -+ from); - if (ret) - return ret; - } -diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h -index 62ea478215d0..fdcf598f08b1 100644 ---- a/fs/bcachefs/sb-counters_format.h -+++ b/fs/bcachefs/sb-counters_format.h -@@ -2,86 +2,91 @@ - #ifndef _BCACHEFS_SB_COUNTERS_FORMAT_H - #define _BCACHEFS_SB_COUNTERS_FORMAT_H - --#define BCH_PERSISTENT_COUNTERS() \ -- x(io_read, 0) \ -- x(io_write, 1) \ -- x(io_move, 2) \ -- x(bucket_invalidate, 3) \ -- x(bucket_discard, 4) \ -- x(bucket_alloc, 5) \ -- x(bucket_alloc_fail, 6) \ -- x(btree_cache_scan, 7) \ -- x(btree_cache_reap, 8) \ -- x(btree_cache_cannibalize, 9) \ -- x(btree_cache_cannibalize_lock, 10) \ -- x(btree_cache_cannibalize_lock_fail, 11) \ -- x(btree_cache_cannibalize_unlock, 12) \ -- x(btree_node_write, 13) \ -- x(btree_node_read, 14) \ -- x(btree_node_compact, 15) \ -- x(btree_node_merge, 16) \ -- x(btree_node_split, 17) \ -- x(btree_node_rewrite, 18) \ -- x(btree_node_alloc, 19) \ -- x(btree_node_free, 20) \ -- x(btree_node_set_root, 21) \ -- x(btree_path_relock_fail, 22) \ -- x(btree_path_upgrade_fail, 23) \ -- x(btree_reserve_get_fail, 24) \ -- x(journal_entry_full, 25) \ -- x(journal_full, 26) \ -- x(journal_reclaim_finish, 27) \ -- x(journal_reclaim_start, 28) \ -- x(journal_write, 29) \ -- x(read_promote, 30) \ -- x(read_bounce, 31) \ -- x(read_split, 33) \ -- x(read_retry, 32) \ -- x(read_reuse_race, 34) \ -- x(move_extent_read, 35) \ -- x(move_extent_write, 36) \ -- x(move_extent_finish, 37) \ -- x(move_extent_fail, 38) \ -- x(move_extent_start_fail, 39) \ -- x(copygc, 40) \ -- x(copygc_wait, 41) \ -- x(gc_gens_end, 42) \ -- x(gc_gens_start, 43) \ -- x(trans_blocked_journal_reclaim, 44) \ -- x(trans_restart_btree_node_reused, 45) \ -- x(trans_restart_btree_node_split, 46) \ -- x(trans_restart_fault_inject, 47) \ -- x(trans_restart_iter_upgrade, 48) \ -- x(trans_restart_journal_preres_get, 49) \ -- x(trans_restart_journal_reclaim, 50) \ -- x(trans_restart_journal_res_get, 51) \ -- x(trans_restart_key_cache_key_realloced, 52) \ -- x(trans_restart_key_cache_raced, 53) \ -- x(trans_restart_mark_replicas, 54) \ -- x(trans_restart_mem_realloced, 55) \ -- x(trans_restart_memory_allocation_failure, 56) \ -- x(trans_restart_relock, 57) \ -- x(trans_restart_relock_after_fill, 58) \ -- x(trans_restart_relock_key_cache_fill, 59) \ -- x(trans_restart_relock_next_node, 60) \ -- x(trans_restart_relock_parent_for_fill, 61) \ -- x(trans_restart_relock_path, 62) \ -- x(trans_restart_relock_path_intent, 63) \ -- x(trans_restart_too_many_iters, 64) \ -- x(trans_restart_traverse, 65) \ -- x(trans_restart_upgrade, 66) \ -- x(trans_restart_would_deadlock, 67) \ -- x(trans_restart_would_deadlock_write, 68) \ -- x(trans_restart_injected, 69) \ -- x(trans_restart_key_cache_upgrade, 70) \ -- x(trans_traverse_all, 71) \ -- x(transaction_commit, 72) \ -- x(write_super, 73) \ -- x(trans_restart_would_deadlock_recursion_limit, 74) \ -- x(trans_restart_write_buffer_flush, 75) \ -- x(trans_restart_split_race, 76) \ -- x(write_buffer_flush_slowpath, 77) \ -- x(write_buffer_flush_sync, 78) -+enum counters_flags { -+ TYPE_COUNTER = BIT(0), /* event counters */ -+ TYPE_SECTORS = BIT(1), /* amount counters, the unit is sectors */ -+}; -+ -+#define BCH_PERSISTENT_COUNTERS() \ -+ x(io_read, 0, TYPE_SECTORS) \ -+ x(io_write, 1, TYPE_SECTORS) \ -+ x(io_move, 2, TYPE_SECTORS) \ -+ x(bucket_invalidate, 3, TYPE_COUNTER) \ -+ x(bucket_discard, 4, TYPE_COUNTER) \ -+ x(bucket_alloc, 5, TYPE_COUNTER) \ -+ x(bucket_alloc_fail, 6, TYPE_COUNTER) \ -+ x(btree_cache_scan, 7, TYPE_COUNTER) \ -+ x(btree_cache_reap, 8, TYPE_COUNTER) \ -+ x(btree_cache_cannibalize, 9, TYPE_COUNTER) \ -+ x(btree_cache_cannibalize_lock, 10, TYPE_COUNTER) \ -+ x(btree_cache_cannibalize_lock_fail, 11, TYPE_COUNTER) \ -+ x(btree_cache_cannibalize_unlock, 12, TYPE_COUNTER) \ -+ x(btree_node_write, 13, TYPE_COUNTER) \ -+ x(btree_node_read, 14, TYPE_COUNTER) \ -+ x(btree_node_compact, 15, TYPE_COUNTER) \ -+ x(btree_node_merge, 16, TYPE_COUNTER) \ -+ x(btree_node_split, 17, TYPE_COUNTER) \ -+ x(btree_node_rewrite, 18, TYPE_COUNTER) \ -+ x(btree_node_alloc, 19, TYPE_COUNTER) \ -+ x(btree_node_free, 20, TYPE_COUNTER) \ -+ x(btree_node_set_root, 21, TYPE_COUNTER) \ -+ x(btree_path_relock_fail, 22, TYPE_COUNTER) \ -+ x(btree_path_upgrade_fail, 23, TYPE_COUNTER) \ -+ x(btree_reserve_get_fail, 24, TYPE_COUNTER) \ -+ x(journal_entry_full, 25, TYPE_COUNTER) \ -+ x(journal_full, 26, TYPE_COUNTER) \ -+ x(journal_reclaim_finish, 27, TYPE_COUNTER) \ -+ x(journal_reclaim_start, 28, TYPE_COUNTER) \ -+ x(journal_write, 29, TYPE_COUNTER) \ -+ x(read_promote, 30, TYPE_COUNTER) \ -+ x(read_bounce, 31, TYPE_COUNTER) \ -+ x(read_split, 33, TYPE_COUNTER) \ -+ x(read_retry, 32, TYPE_COUNTER) \ -+ x(read_reuse_race, 34, TYPE_COUNTER) \ -+ x(move_extent_read, 35, TYPE_SECTORS) \ -+ x(move_extent_write, 36, TYPE_SECTORS) \ -+ x(move_extent_finish, 37, TYPE_SECTORS) \ -+ x(move_extent_fail, 38, TYPE_COUNTER) \ -+ x(move_extent_start_fail, 39, TYPE_COUNTER) \ -+ x(copygc, 40, TYPE_COUNTER) \ -+ x(copygc_wait, 41, TYPE_COUNTER) \ -+ x(gc_gens_end, 42, TYPE_COUNTER) \ -+ x(gc_gens_start, 43, TYPE_COUNTER) \ -+ x(trans_blocked_journal_reclaim, 44, TYPE_COUNTER) \ -+ x(trans_restart_btree_node_reused, 45, TYPE_COUNTER) \ -+ x(trans_restart_btree_node_split, 46, TYPE_COUNTER) \ -+ x(trans_restart_fault_inject, 47, TYPE_COUNTER) \ -+ x(trans_restart_iter_upgrade, 48, TYPE_COUNTER) \ -+ x(trans_restart_journal_preres_get, 49, TYPE_COUNTER) \ -+ x(trans_restart_journal_reclaim, 50, TYPE_COUNTER) \ -+ x(trans_restart_journal_res_get, 51, TYPE_COUNTER) \ -+ x(trans_restart_key_cache_key_realloced, 52, TYPE_COUNTER) \ -+ x(trans_restart_key_cache_raced, 53, TYPE_COUNTER) \ -+ x(trans_restart_mark_replicas, 54, TYPE_COUNTER) \ -+ x(trans_restart_mem_realloced, 55, TYPE_COUNTER) \ -+ x(trans_restart_memory_allocation_failure, 56, TYPE_COUNTER) \ -+ x(trans_restart_relock, 57, TYPE_COUNTER) \ -+ x(trans_restart_relock_after_fill, 58, TYPE_COUNTER) \ -+ x(trans_restart_relock_key_cache_fill, 59, TYPE_COUNTER) \ -+ x(trans_restart_relock_next_node, 60, TYPE_COUNTER) \ -+ x(trans_restart_relock_parent_for_fill, 61, TYPE_COUNTER) \ -+ x(trans_restart_relock_path, 62, TYPE_COUNTER) \ -+ x(trans_restart_relock_path_intent, 63, TYPE_COUNTER) \ -+ x(trans_restart_too_many_iters, 64, TYPE_COUNTER) \ -+ x(trans_restart_traverse, 65, TYPE_COUNTER) \ -+ x(trans_restart_upgrade, 66, TYPE_COUNTER) \ -+ x(trans_restart_would_deadlock, 67, TYPE_COUNTER) \ -+ x(trans_restart_would_deadlock_write, 68, TYPE_COUNTER) \ -+ x(trans_restart_injected, 69, TYPE_COUNTER) \ -+ x(trans_restart_key_cache_upgrade, 70, TYPE_COUNTER) \ -+ x(trans_traverse_all, 71, TYPE_COUNTER) \ -+ x(transaction_commit, 72, TYPE_COUNTER) \ -+ x(write_super, 73, TYPE_COUNTER) \ -+ x(trans_restart_would_deadlock_recursion_limit, 74, TYPE_COUNTER) \ -+ x(trans_restart_write_buffer_flush, 75, TYPE_COUNTER) \ -+ x(trans_restart_split_race, 76, TYPE_COUNTER) \ -+ x(write_buffer_flush_slowpath, 77, TYPE_COUNTER) \ -+ x(write_buffer_flush_sync, 78, TYPE_COUNTER) - - enum bch_persistent_counters { - #define x(t, n, ...) BCH_COUNTER_##t, -diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c -index 8767c33c2b51..14f6b6a5fb38 100644 ---- a/fs/bcachefs/sb-downgrade.c -+++ b/fs/bcachefs/sb-downgrade.c -@@ -81,7 +81,19 @@ - BCH_FSCK_ERR_accounting_mismatch) \ - x(inode_has_child_snapshots, \ - BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ -- BCH_FSCK_ERR_inode_has_child_snapshots_wrong) -+ BCH_FSCK_ERR_inode_has_child_snapshots_wrong) \ -+ x(backpointer_bucket_gen, \ -+ BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ -+ BCH_FSCK_ERR_backpointer_to_missing_ptr, \ -+ BCH_FSCK_ERR_ptr_to_missing_backpointer) \ -+ x(disk_accounting_big_endian, \ -+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ -+ BCH_FSCK_ERR_accounting_mismatch, \ -+ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ -+ BCH_FSCK_ERR_accounting_key_junk_at_end) \ -+ x(directory_size, \ -+ BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ -+ BCH_FSCK_ERR_directory_size_mismatch) \ - - #define DOWNGRADE_TABLE() \ - x(bucket_stripe_sectors, \ -@@ -117,7 +129,19 @@ - BCH_FSCK_ERR_bkey_version_in_future) \ - x(rebalance_work_acct_fix, \ - BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ -- BCH_FSCK_ERR_accounting_mismatch) -+ BCH_FSCK_ERR_accounting_mismatch, \ -+ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ -+ BCH_FSCK_ERR_accounting_key_junk_at_end) \ -+ x(backpointer_bucket_gen, \ -+ BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ -+ BCH_FSCK_ERR_backpointer_bucket_offset_wrong, \ -+ BCH_FSCK_ERR_backpointer_to_missing_ptr, \ -+ BCH_FSCK_ERR_ptr_to_missing_backpointer) \ -+ x(disk_accounting_big_endian, \ -+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ -+ BCH_FSCK_ERR_accounting_mismatch, \ -+ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ -+ BCH_FSCK_ERR_accounting_key_junk_at_end) - - struct upgrade_downgrade_entry { - u64 recovery_passes; -diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h -index 9feb6739f77a..ea0a18364751 100644 ---- a/fs/bcachefs/sb-errors_format.h -+++ b/fs/bcachefs/sb-errors_format.h -@@ -5,9 +5,8 @@ - enum bch_fsck_flags { - FSCK_CAN_FIX = 1 << 0, - FSCK_CAN_IGNORE = 1 << 1, -- FSCK_NEED_FSCK = 1 << 2, -- FSCK_NO_RATELIMIT = 1 << 3, -- FSCK_AUTOFIX = 1 << 4, -+ FSCK_NO_RATELIMIT = 1 << 2, -+ FSCK_AUTOFIX = 1 << 3, - }; - - #define BCH_SB_ERRS() \ -@@ -58,8 +57,8 @@ enum bch_fsck_flags { - x(bset_wrong_sector_offset, 44, 0) \ - x(bset_empty, 45, 0) \ - x(bset_bad_seq, 46, 0) \ -- x(bset_blacklisted_journal_seq, 47, 0) \ -- x(first_bset_blacklisted_journal_seq, 48, 0) \ -+ x(bset_blacklisted_journal_seq, 47, FSCK_AUTOFIX) \ -+ x(first_bset_blacklisted_journal_seq, 48, FSCK_AUTOFIX) \ - x(btree_node_bad_btree, 49, 0) \ - x(btree_node_bad_level, 50, 0) \ - x(btree_node_bad_min_key, 51, 0) \ -@@ -68,17 +67,17 @@ enum bch_fsck_flags { - x(btree_node_bkey_past_bset_end, 54, 0) \ - x(btree_node_bkey_bad_format, 55, 0) \ - x(btree_node_bad_bkey, 56, 0) \ -- x(btree_node_bkey_out_of_order, 57, 0) \ -- x(btree_root_bkey_invalid, 58, 0) \ -- x(btree_root_read_error, 59, 0) \ -+ x(btree_node_bkey_out_of_order, 57, FSCK_AUTOFIX) \ -+ x(btree_root_bkey_invalid, 58, FSCK_AUTOFIX) \ -+ x(btree_root_read_error, 59, FSCK_AUTOFIX) \ - x(btree_root_bad_min_key, 60, 0) \ - x(btree_root_bad_max_key, 61, 0) \ -- x(btree_node_read_error, 62, 0) \ -- x(btree_node_topology_bad_min_key, 63, 0) \ -- x(btree_node_topology_bad_max_key, 64, 0) \ -- x(btree_node_topology_overwritten_by_prev_node, 65, 0) \ -- x(btree_node_topology_overwritten_by_next_node, 66, 0) \ -- x(btree_node_topology_interior_node_empty, 67, 0) \ -+ x(btree_node_read_error, 62, FSCK_AUTOFIX) \ -+ x(btree_node_topology_bad_min_key, 63, FSCK_AUTOFIX) \ -+ x(btree_node_topology_bad_max_key, 64, FSCK_AUTOFIX) \ -+ x(btree_node_topology_overwritten_by_prev_node, 65, FSCK_AUTOFIX) \ -+ x(btree_node_topology_overwritten_by_next_node, 66, FSCK_AUTOFIX) \ -+ x(btree_node_topology_interior_node_empty, 67, FSCK_AUTOFIX) \ - x(fs_usage_hidden_wrong, 68, FSCK_AUTOFIX) \ - x(fs_usage_btree_wrong, 69, FSCK_AUTOFIX) \ - x(fs_usage_data_wrong, 70, FSCK_AUTOFIX) \ -@@ -123,11 +122,12 @@ enum bch_fsck_flags { - x(alloc_key_cached_sectors_wrong, 109, FSCK_AUTOFIX) \ - x(alloc_key_stripe_wrong, 110, FSCK_AUTOFIX) \ - x(alloc_key_stripe_redundancy_wrong, 111, FSCK_AUTOFIX) \ -+ x(alloc_key_journal_seq_in_future, 298, FSCK_AUTOFIX) \ - x(bucket_sector_count_overflow, 112, 0) \ - x(bucket_metadata_type_mismatch, 113, 0) \ -- x(need_discard_key_wrong, 114, 0) \ -- x(freespace_key_wrong, 115, 0) \ -- x(freespace_hole_missing, 116, 0) \ -+ x(need_discard_key_wrong, 114, FSCK_AUTOFIX) \ -+ x(freespace_key_wrong, 115, FSCK_AUTOFIX) \ -+ x(freespace_hole_missing, 116, FSCK_AUTOFIX) \ - x(bucket_gens_val_size_bad, 117, 0) \ - x(bucket_gens_key_wrong, 118, FSCK_AUTOFIX) \ - x(bucket_gens_hole_wrong, 119, FSCK_AUTOFIX) \ -@@ -139,9 +139,10 @@ enum bch_fsck_flags { - x(discarding_bucket_not_in_need_discard_btree, 291, 0) \ - x(backpointer_bucket_offset_wrong, 125, 0) \ - x(backpointer_level_bad, 294, 0) \ -- x(backpointer_to_missing_device, 126, 0) \ -- x(backpointer_to_missing_alloc, 127, 0) \ -- x(backpointer_to_missing_ptr, 128, 0) \ -+ x(backpointer_dev_bad, 297, 0) \ -+ x(backpointer_to_missing_device, 126, FSCK_AUTOFIX) \ -+ x(backpointer_to_missing_alloc, 127, FSCK_AUTOFIX) \ -+ x(backpointer_to_missing_ptr, 128, FSCK_AUTOFIX) \ - x(lru_entry_at_time_0, 129, FSCK_AUTOFIX) \ - x(lru_entry_to_invalid_bucket, 130, FSCK_AUTOFIX) \ - x(lru_entry_bad, 131, FSCK_AUTOFIX) \ -@@ -167,14 +168,15 @@ enum bch_fsck_flags { - x(ptr_to_incorrect_stripe, 151, 0) \ - x(ptr_gen_newer_than_bucket_gen, 152, 0) \ - x(ptr_too_stale, 153, 0) \ -- x(stale_dirty_ptr, 154, 0) \ -+ x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \ - x(ptr_bucket_data_type_mismatch, 155, 0) \ - x(ptr_cached_and_erasure_coded, 156, 0) \ - x(ptr_crc_uncompressed_size_too_small, 157, 0) \ -+ x(ptr_crc_uncompressed_size_too_big, 161, 0) \ -+ x(ptr_crc_uncompressed_size_mismatch, 300, 0) \ - x(ptr_crc_csum_type_unknown, 158, 0) \ - x(ptr_crc_compression_type_unknown, 159, 0) \ - x(ptr_crc_redundant, 160, 0) \ -- x(ptr_crc_uncompressed_size_too_big, 161, 0) \ - x(ptr_crc_nonce_mismatch, 162, 0) \ - x(ptr_stripe_redundant, 163, 0) \ - x(reservation_key_nr_replicas_invalid, 164, 0) \ -@@ -209,6 +211,7 @@ enum bch_fsck_flags { - x(bkey_in_missing_snapshot, 190, 0) \ - x(inode_pos_inode_nonzero, 191, 0) \ - x(inode_pos_blockdev_range, 192, 0) \ -+ x(inode_alloc_cursor_inode_bad, 301, 0) \ - x(inode_unpack_error, 193, 0) \ - x(inode_str_hash_invalid, 194, 0) \ - x(inode_v3_fields_start_bad, 195, 0) \ -@@ -232,6 +235,7 @@ enum bch_fsck_flags { - x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ - x(inode_has_child_snapshots_wrong, 287, 0) \ - x(inode_unreachable, 210, FSCK_AUTOFIX) \ -+ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ - x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ - x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ - x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ -@@ -252,6 +256,7 @@ enum bch_fsck_flags { - x(dirent_in_missing_dir_inode, 227, 0) \ - x(dirent_in_non_dir_inode, 228, 0) \ - x(dirent_to_missing_inode, 229, 0) \ -+ x(dirent_to_overwritten_inode, 302, 0) \ - x(dirent_to_missing_subvol, 230, 0) \ - x(dirent_to_itself, 231, 0) \ - x(quota_type_invalid, 232, 0) \ -@@ -288,7 +293,7 @@ enum bch_fsck_flags { - x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \ - x(snapshot_node_missing, 264, 0) \ - x(dup_backpointer_to_bad_csum_extent, 265, 0) \ -- x(btree_bitmap_not_marked, 266, 0) \ -+ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \ - x(sb_clean_entry_overrun, 267, 0) \ - x(btree_ptr_v2_written_0, 268, 0) \ - x(subvol_snapshot_bad, 269, 0) \ -@@ -306,7 +311,10 @@ enum bch_fsck_flags { - x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ - x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ - x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ -- x(MAX, 295, 0) -+ x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \ -+ x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \ -+ x(directory_size_mismatch, 303, FSCK_AUTOFIX) \ -+ x(MAX, 304, 0) - - enum bch_sb_error_id { - #define x(t, n, ...) BCH_FSCK_ERR_##t = n, -diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c -index 617d07e53b20..7e7c66a1e1a6 100644 ---- a/fs/bcachefs/six.c -+++ b/fs/bcachefs/six.c -@@ -491,8 +491,12 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type, - list_del(&wait->list); - raw_spin_unlock(&lock->wait_lock); - -- if (unlikely(acquired)) -+ if (unlikely(acquired)) { - do_six_unlock_type(lock, type); -+ } else if (type == SIX_LOCK_write) { -+ six_clear_bitmask(lock, SIX_LOCK_HELD_write); -+ six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read); -+ } - break; - } - -@@ -501,10 +505,6 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type, - - __set_current_state(TASK_RUNNING); - out: -- if (ret && type == SIX_LOCK_write) { -- six_clear_bitmask(lock, SIX_LOCK_HELD_write); -- six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read); -- } - trace_contention_end(lock, 0); - - return ret; -@@ -616,8 +616,6 @@ void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long - - if (type != SIX_LOCK_write) - six_release(&lock->dep_map, ip); -- else -- lock->seq++; - - if (type == SIX_LOCK_intent && - lock->intent_lock_recurse) { -@@ -625,6 +623,15 @@ void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long - return; - } - -+ if (type == SIX_LOCK_write && -+ lock->write_lock_recurse) { -+ --lock->write_lock_recurse; -+ return; -+ } -+ -+ if (type == SIX_LOCK_write) -+ lock->seq++; -+ - do_six_unlock_type(lock, type); - } - EXPORT_SYMBOL_GPL(six_unlock_ip); -@@ -735,13 +742,13 @@ void six_lock_increment(struct six_lock *lock, enum six_lock_type type) - atomic_add(l[type].lock_val, &lock->state); - } - break; -+ case SIX_LOCK_write: -+ lock->write_lock_recurse++; -+ fallthrough; - case SIX_LOCK_intent: - EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent)); - lock->intent_lock_recurse++; - break; -- case SIX_LOCK_write: -- BUG(); -- break; - } - } - EXPORT_SYMBOL_GPL(six_lock_increment); -diff --git a/fs/bcachefs/six.h b/fs/bcachefs/six.h -index 68d46fd7f391..c142e06b7a3a 100644 ---- a/fs/bcachefs/six.h -+++ b/fs/bcachefs/six.h -@@ -137,6 +137,7 @@ struct six_lock { - atomic_t state; - u32 seq; - unsigned intent_lock_recurse; -+ unsigned write_lock_recurse; - struct task_struct *owner; - unsigned __percpu *readers; - raw_spinlock_t wait_lock; -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -index ae57638506c3..c54091a28909 100644 ---- a/fs/bcachefs/snapshot.c -+++ b/fs/bcachefs/snapshot.c -@@ -2,6 +2,7 @@ - - #include "bcachefs.h" - #include "bkey_buf.h" -+#include "btree_cache.h" - #include "btree_key_cache.h" - #include "btree_update.h" - #include "buckets.h" -@@ -32,7 +33,7 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, - } - - int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - int ret = 0; - -@@ -225,7 +226,7 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, - } - - int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_snapshot s; - u32 i, id; -@@ -279,23 +280,6 @@ int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, - return ret; - } - --static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id) --{ -- struct snapshot_t *t = snapshot_t_mut(c, id); -- u32 parent = id; -- -- while ((parent = bch2_snapshot_parent_early(c, parent)) && -- parent - id - 1 < IS_ANCESTOR_BITMAP) -- __set_bit(parent - id - 1, t->is_ancestor); --} -- --static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id) --{ -- mutex_lock(&c->snapshot_table_lock); -- __set_is_ancestor_bitmap(c, id); -- mutex_unlock(&c->snapshot_table_lock); --} -- - static int __bch2_mark_snapshot(struct btree_trans *trans, - enum btree_id btree, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, -@@ -317,6 +301,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, - if (new.k->type == KEY_TYPE_snapshot) { - struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); - -+ t->live = true; - t->parent = le32_to_cpu(s.v->parent); - t->children[0] = le32_to_cpu(s.v->children[0]); - t->children[1] = le32_to_cpu(s.v->children[1]); -@@ -335,7 +320,11 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, - t->skip[2] = 0; - } - -- __set_is_ancestor_bitmap(c, id); -+ u32 parent = id; -+ -+ while ((parent = bch2_snapshot_parent_early(c, parent)) && -+ parent - id - 1 < IS_ANCESTOR_BITMAP) -+ __set_bit(parent - id - 1, t->is_ancestor); - - if (BCH_SNAPSHOT_DELETED(s.v)) { - set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); -@@ -365,70 +354,6 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, - BTREE_ITER_with_updates, snapshot, s); - } - --static int bch2_snapshot_live(struct btree_trans *trans, u32 id) --{ -- struct bch_snapshot v; -- int ret; -- -- if (!id) -- return 0; -- -- ret = bch2_snapshot_lookup(trans, id, &v); -- if (bch2_err_matches(ret, ENOENT)) -- bch_err(trans->c, "snapshot node %u not found", id); -- if (ret) -- return ret; -- -- return !BCH_SNAPSHOT_DELETED(&v); --} -- --/* -- * If @k is a snapshot with just one live child, it's part of a linear chain, -- * which we consider to be an equivalence class: and then after snapshot -- * deletion cleanup, there should only be a single key at a given position in -- * this equivalence class. -- * -- * This sets the equivalence class of @k to be the child's equivalence class, if -- * it's part of such a linear chain: this correctly sets equivalence classes on -- * startup if we run leaf to root (i.e. in natural key order). -- */ --static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k) --{ -- struct bch_fs *c = trans->c; -- unsigned i, nr_live = 0, live_idx = 0; -- struct bkey_s_c_snapshot snap; -- u32 id = k.k->p.offset, child[2]; -- -- if (k.k->type != KEY_TYPE_snapshot) -- return 0; -- -- snap = bkey_s_c_to_snapshot(k); -- -- child[0] = le32_to_cpu(snap.v->children[0]); -- child[1] = le32_to_cpu(snap.v->children[1]); -- -- for (i = 0; i < 2; i++) { -- int ret = bch2_snapshot_live(trans, child[i]); -- -- if (ret < 0) -- return ret; -- -- if (ret) -- live_idx = i; -- nr_live += ret; -- } -- -- mutex_lock(&c->snapshot_table_lock); -- -- snapshot_t_mut(c, id)->equiv = nr_live == 1 -- ? snapshot_t_mut(c, child[live_idx])->equiv -- : id; -- -- mutex_unlock(&c->snapshot_table_lock); -- -- return 0; --} -- - /* fsck: */ - - static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child) -@@ -506,7 +431,6 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, - break; - } - } -- - bch2_trans_iter_exit(trans, &iter); - - if (!ret && !found) { -@@ -536,6 +460,7 @@ static int check_snapshot_tree(struct btree_trans *trans, - struct bch_snapshot s; - struct bch_subvolume subvol; - struct printbuf buf = PRINTBUF; -+ struct btree_iter snapshot_iter = {}; - u32 root_id; - int ret; - -@@ -545,22 +470,35 @@ static int check_snapshot_tree(struct btree_trans *trans, - st = bkey_s_c_to_snapshot_tree(k); - root_id = le32_to_cpu(st.v->root_snapshot); - -- ret = bch2_snapshot_lookup(trans, root_id, &s); -+ struct bkey_s_c_snapshot snapshot_k = -+ bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots, -+ POS(0, root_id), 0, snapshot); -+ ret = bkey_err(snapshot_k); - if (ret && !bch2_err_matches(ret, ENOENT)) - goto err; - -+ if (!ret) -+ bkey_val_copy(&s, snapshot_k); -+ - if (fsck_err_on(ret || - root_id != bch2_snapshot_root(c, root_id) || - st.k->p.offset != le32_to_cpu(s.tree), - trans, snapshot_tree_to_missing_snapshot, - "snapshot tree points to missing/incorrect snapshot:\n %s", -- (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { -+ (bch2_bkey_val_to_text(&buf, c, st.s_c), -+ prt_newline(&buf), -+ ret -+ ? prt_printf(&buf, "(%s)", bch2_err_str(ret)) -+ : bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c), -+ buf.buf))) { - ret = bch2_btree_delete_at(trans, iter, 0); - goto err; - } - -- ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), -- false, 0, &subvol); -+ if (!st.v->master_subvol) -+ goto out; -+ -+ ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol); - if (ret && !bch2_err_matches(ret, ENOENT)) - goto err; - -@@ -603,8 +541,10 @@ static int check_snapshot_tree(struct btree_trans *trans, - u->v.master_subvol = cpu_to_le32(subvol_id); - st = snapshot_tree_i_to_s_c(u); - } -+out: - err: - fsck_err: -+ bch2_trans_iter_exit(trans, &snapshot_iter); - printbuf_exit(&buf); - return ret; - } -@@ -799,7 +739,7 @@ static int check_snapshot(struct btree_trans *trans, - - if (should_have_subvol) { - id = le32_to_cpu(s.subvol); -- ret = bch2_subvolume_get(trans, id, 0, false, &subvol); -+ ret = bch2_subvolume_get(trans, id, false, &subvol); - if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "snapshot points to nonexistent subvolume:\n %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); -@@ -902,7 +842,7 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) - { - struct bch_fs *c = trans->c; - -- if (bch2_snapshot_equiv(c, id)) -+ if (bch2_snapshot_exists(c, id)) - return 0; - - /* Do we need to reconstruct the snapshot_tree entry as well? */ -@@ -951,8 +891,7 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) - - return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?: - bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, -- bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?: -- bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i)); -+ bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0); - } - - /* Figure out which snapshot nodes belong in the same tree: */ -@@ -1050,7 +989,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) - snapshot_id_list_to_text(&buf, t); - - darray_for_each(*t, id) { -- if (fsck_err_on(!bch2_snapshot_equiv(c, *id), -+ if (fsck_err_on(!bch2_snapshot_exists(c, *id), - trans, snapshot_node_missing, - "snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) { - if (t->nr > 1) { -@@ -1083,10 +1022,12 @@ int bch2_check_key_has_snapshot(struct btree_trans *trans, - struct printbuf buf = PRINTBUF; - int ret = 0; - -- if (fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), -+ if (fsck_err_on(!bch2_snapshot_exists(c, k.k->p.snapshot), - trans, bkey_in_missing_snapshot, - "key in missing snapshot %s, delete?", -- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -+ (bch2_btree_id_to_text(&buf, iter->btree_id), -+ prt_char(&buf, ' '), -+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - ret = bch2_btree_delete_at(trans, iter, - BTREE_UPDATE_internal_snapshot_node) ?: 1; - fsck_err: -@@ -1100,13 +1041,11 @@ int bch2_check_key_has_snapshot(struct btree_trans *trans, - int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) - { - struct btree_iter iter; -- struct bkey_i_snapshot *s; -- int ret = 0; -- -- s = bch2_bkey_get_mut_typed(trans, &iter, -+ struct bkey_i_snapshot *s = -+ bch2_bkey_get_mut_typed(trans, &iter, - BTREE_ID_snapshots, POS(0, id), - 0, snapshot); -- ret = PTR_ERR_OR_ZERO(s); -+ int ret = PTR_ERR_OR_ZERO(s); - if (unlikely(ret)) { - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), - trans->c, "missing snapshot %u", id); -@@ -1294,10 +1233,6 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, - goto err; - - new_snapids[i] = iter.pos.offset; -- -- mutex_lock(&c->snapshot_table_lock); -- snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i]; -- mutex_unlock(&c->snapshot_table_lock); - } - err: - bch2_trans_iter_exit(trans, &iter); -@@ -1403,129 +1338,153 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, - * that key to snapshot leaf nodes, where we can mutate it - */ - --static int delete_dead_snapshots_process_key(struct btree_trans *trans, -- struct btree_iter *iter, -- struct bkey_s_c k, -- snapshot_id_list *deleted, -- snapshot_id_list *equiv_seen, -- struct bpos *last_pos) -+struct snapshot_interior_delete { -+ u32 id; -+ u32 live_child; -+}; -+typedef DARRAY(struct snapshot_interior_delete) interior_delete_list; -+ -+static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id) - { -- int ret = bch2_check_key_has_snapshot(trans, iter, k); -- if (ret) -- return ret < 0 ? ret : 0; -+ darray_for_each(*l, i) -+ if (i->id == id) -+ return i->live_child; -+ return 0; -+} - -- struct bch_fs *c = trans->c; -- u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); -- if (!equiv) /* key for invalid snapshot node, but we chose not to delete */ -+static unsigned __live_child(struct snapshot_table *t, u32 id, -+ snapshot_id_list *delete_leaves, -+ interior_delete_list *delete_interior) -+{ -+ struct snapshot_t *s = __snapshot_t(t, id); -+ if (!s) - return 0; - -- if (!bkey_eq(k.k->p, *last_pos)) -- equiv_seen->nr = 0; -+ for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) -+ if (s->children[i] && -+ !snapshot_list_has_id(delete_leaves, s->children[i]) && -+ !interior_delete_has_id(delete_interior, s->children[i])) -+ return s->children[i]; - -- if (snapshot_list_has_id(deleted, k.k->p.snapshot)) -- return bch2_btree_delete_at(trans, iter, -- BTREE_UPDATE_internal_snapshot_node); -+ for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) { -+ u32 live_child = s->children[i] -+ ? __live_child(t, s->children[i], delete_leaves, delete_interior) -+ : 0; -+ if (live_child) -+ return live_child; -+ } - -- if (!bpos_eq(*last_pos, k.k->p) && -- snapshot_list_has_id(equiv_seen, equiv)) -- return bch2_btree_delete_at(trans, iter, -- BTREE_UPDATE_internal_snapshot_node); -+ return 0; -+} - -- *last_pos = k.k->p; -+static unsigned live_child(struct bch_fs *c, u32 id, -+ snapshot_id_list *delete_leaves, -+ interior_delete_list *delete_interior) -+{ -+ rcu_read_lock(); -+ u32 ret = __live_child(rcu_dereference(c->snapshots), id, -+ delete_leaves, delete_interior); -+ rcu_read_unlock(); -+ return ret; -+} - -- ret = snapshot_list_add_nodup(c, equiv_seen, equiv); -- if (ret) -- return ret; -+static int delete_dead_snapshots_process_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ snapshot_id_list *delete_leaves, -+ interior_delete_list *delete_interior) -+{ -+ if (snapshot_list_has_id(delete_leaves, k.k->p.snapshot)) -+ return bch2_btree_delete_at(trans, iter, -+ BTREE_UPDATE_internal_snapshot_node); - -- /* -- * When we have a linear chain of snapshot nodes, we consider -- * those to form an equivalence class: we're going to collapse -- * them all down to a single node, and keep the leaf-most node - -- * which has the same id as the equivalence class id. -- * -- * If there are multiple keys in different snapshots at the same -- * position, we're only going to keep the one in the newest -- * snapshot (we delete the others above) - the rest have been -- * overwritten and are redundant, and for the key we're going to keep we -- * need to move it to the equivalance class ID if it's not there -- * already. -- */ -- if (equiv != k.k->p.snapshot) { -+ u32 live_child = interior_delete_has_id(delete_interior, k.k->p.snapshot); -+ if (live_child) { - struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); - int ret = PTR_ERR_OR_ZERO(new); - if (ret) - return ret; - -- new->k.p.snapshot = equiv; -- -- struct btree_iter new_iter; -- bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p, -- BTREE_ITER_all_snapshots| -- BTREE_ITER_cached| -- BTREE_ITER_intent); -+ new->k.p.snapshot = live_child; - -- ret = bch2_btree_iter_traverse(&new_iter) ?: -- bch2_trans_update(trans, &new_iter, new, -- BTREE_UPDATE_internal_snapshot_node) ?: -- bch2_btree_delete_at(trans, iter, -- BTREE_UPDATE_internal_snapshot_node); -- bch2_trans_iter_exit(trans, &new_iter); -+ struct btree_iter dst_iter; -+ struct bkey_s_c dst_k = bch2_bkey_get_iter(trans, &dst_iter, -+ iter->btree_id, new->k.p, -+ BTREE_ITER_all_snapshots| -+ BTREE_ITER_intent); -+ ret = bkey_err(dst_k); - if (ret) - return ret; -+ -+ ret = (bkey_deleted(dst_k.k) -+ ? bch2_trans_update(trans, &dst_iter, new, -+ BTREE_UPDATE_internal_snapshot_node) -+ : 0) ?: -+ bch2_btree_delete_at(trans, iter, -+ BTREE_UPDATE_internal_snapshot_node); -+ bch2_trans_iter_exit(trans, &dst_iter); -+ return ret; - } - - return 0; - } - --static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k) -+/* -+ * For a given snapshot, if it doesn't have a subvolume that points to it, and -+ * it doesn't have child snapshot nodes - it's now redundant and we can mark it -+ * as deleted. -+ */ -+static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k, -+ snapshot_id_list *delete_leaves, -+ interior_delete_list *delete_interior) - { -- struct bkey_s_c_snapshot snap; -- u32 children[2]; -- int ret; -- - if (k.k->type != KEY_TYPE_snapshot) - return 0; - -- snap = bkey_s_c_to_snapshot(k); -- if (BCH_SNAPSHOT_DELETED(snap.v) || -- BCH_SNAPSHOT_SUBVOL(snap.v)) -+ struct bch_fs *c = trans->c; -+ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); -+ unsigned live_children = 0; -+ -+ if (BCH_SNAPSHOT_SUBVOL(s.v)) - return 0; - -- children[0] = le32_to_cpu(snap.v->children[0]); -- children[1] = le32_to_cpu(snap.v->children[1]); -+ for (unsigned i = 0; i < 2; i++) { -+ u32 child = le32_to_cpu(s.v->children[i]); - -- ret = bch2_snapshot_live(trans, children[0]) ?: -- bch2_snapshot_live(trans, children[1]); -- if (ret < 0) -- return ret; -- return !ret; --} -+ live_children += child && -+ !snapshot_list_has_id(delete_leaves, child); -+ } - --/* -- * For a given snapshot, if it doesn't have a subvolume that points to it, and -- * it doesn't have child snapshot nodes - it's now redundant and we can mark it -- * as deleted. -- */ --static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k) --{ -- int ret = bch2_snapshot_needs_delete(trans, k); -+ if (live_children == 0) { -+ return snapshot_list_add(c, delete_leaves, s.k->p.offset); -+ } else if (live_children == 1) { -+ struct snapshot_interior_delete d = { -+ .id = s.k->p.offset, -+ .live_child = live_child(c, s.k->p.offset, delete_leaves, delete_interior), -+ }; -+ -+ if (!d.live_child) { -+ bch_err(c, "error finding live child of snapshot %u", d.id); -+ return -EINVAL; -+ } - -- return ret <= 0 -- ? ret -- : bch2_snapshot_node_set_deleted(trans, k.k->p.offset); -+ return darray_push(delete_interior, d); -+ } else { -+ return 0; -+ } - } - - static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, -- snapshot_id_list *skip) -+ interior_delete_list *skip) - { - rcu_read_lock(); -- while (snapshot_list_has_id(skip, id)) -+ while (interior_delete_has_id(skip, id)) - id = __bch2_snapshot_parent(c, id); - - while (n--) { - do { - id = __bch2_snapshot_parent(c, id); -- } while (snapshot_list_has_id(skip, id)); -+ } while (interior_delete_has_id(skip, id)); - } - rcu_read_unlock(); - -@@ -1534,7 +1493,7 @@ static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, - - static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, - struct btree_iter *iter, struct bkey_s_c k, -- snapshot_id_list *deleted) -+ interior_delete_list *deleted) - { - struct bch_fs *c = trans->c; - u32 nr_deleted_ancestors = 0; -@@ -1544,7 +1503,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, - if (k.k->type != KEY_TYPE_snapshot) - return 0; - -- if (snapshot_list_has_id(deleted, k.k->p.offset)) -+ if (interior_delete_has_id(deleted, k.k->p.offset)) - return 0; - - s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot); -@@ -1553,7 +1512,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, - return ret; - - darray_for_each(*deleted, i) -- nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i); -+ nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id); - - if (!nr_deleted_ancestors) - return 0; -@@ -1571,7 +1530,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, - for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) { - u32 id = le32_to_cpu(s->v.skip[j]); - -- if (snapshot_list_has_id(deleted, id)) { -+ if (interior_delete_has_id(deleted, id)) { - id = bch2_snapshot_nth_parent_skip(c, - parent, - depth > 1 -@@ -1590,51 +1549,45 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, - - int bch2_delete_dead_snapshots(struct bch_fs *c) - { -- struct btree_trans *trans; -- snapshot_id_list deleted = { 0 }; -- snapshot_id_list deleted_interior = { 0 }; -- int ret = 0; -- - if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) - return 0; - -- trans = bch2_trans_get(c); -+ struct btree_trans *trans = bch2_trans_get(c); -+ snapshot_id_list delete_leaves = {}; -+ interior_delete_list delete_interior = {}; -+ int ret = 0; - - /* - * For every snapshot node: If we have no live children and it's not - * pointed to by a subvolume, delete it: - */ -- ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, -- POS_MIN, 0, k, -- NULL, NULL, 0, -- bch2_delete_redundant_snapshot(trans, k)); -- bch_err_msg(c, ret, "deleting redundant snapshots"); -+ ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, -+ check_should_delete_snapshot(trans, k, &delete_leaves, &delete_interior)); -+ if (!bch2_err_matches(ret, EROFS)) -+ bch_err_msg(c, ret, "walking snapshots"); - if (ret) - goto err; - -- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, -- POS_MIN, 0, k, -- bch2_snapshot_set_equiv(trans, k)); -- bch_err_msg(c, ret, "in bch2_snapshots_set_equiv"); -- if (ret) -+ if (!delete_leaves.nr && !delete_interior.nr) - goto err; - -- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, -- POS_MIN, 0, k, ({ -- if (k.k->type != KEY_TYPE_snapshot) -- continue; -+ { -+ struct printbuf buf = PRINTBUF; -+ prt_printf(&buf, "deleting leaves"); -+ darray_for_each(delete_leaves, i) -+ prt_printf(&buf, " %u", *i); - -- BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v) -- ? snapshot_list_add(c, &deleted, k.k->p.offset) -- : 0; -- })); -- bch_err_msg(c, ret, "walking snapshots"); -- if (ret) -- goto err; -+ prt_printf(&buf, " interior"); -+ darray_for_each(delete_interior, i) -+ prt_printf(&buf, " %u->%u", i->id, i->live_child); -+ -+ ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf)); -+ printbuf_exit(&buf); -+ if (ret) -+ goto err; -+ } - - for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { -- struct bpos last_pos = POS_MIN; -- snapshot_id_list equiv_seen = { 0 }; - struct disk_reservation res = { 0 }; - - if (!btree_type_has_snapshots(btree)) -@@ -1644,33 +1597,26 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) - btree, POS_MIN, - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - &res, NULL, BCH_TRANS_COMMIT_no_enospc, -- delete_dead_snapshots_process_key(trans, &iter, k, &deleted, -- &equiv_seen, &last_pos)); -+ delete_dead_snapshots_process_key(trans, &iter, k, -+ &delete_leaves, -+ &delete_interior)); - - bch2_disk_reservation_put(c, &res); -- darray_exit(&equiv_seen); - -- bch_err_msg(c, ret, "deleting keys from dying snapshots"); -+ if (!bch2_err_matches(ret, EROFS)) -+ bch_err_msg(c, ret, "deleting keys from dying snapshots"); - if (ret) - goto err; - } - -- bch2_trans_unlock(trans); -- down_write(&c->snapshot_create_lock); -- -- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, -- POS_MIN, 0, k, ({ -- u32 snapshot = k.k->p.offset; -- u32 equiv = bch2_snapshot_equiv(c, snapshot); -- -- equiv != snapshot -- ? snapshot_list_add(c, &deleted_interior, snapshot) -- : 0; -- })); -- -- bch_err_msg(c, ret, "walking snapshots"); -- if (ret) -- goto err_create_lock; -+ darray_for_each(delete_leaves, i) { -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_snapshot_node_delete(trans, *i)); -+ if (!bch2_err_matches(ret, EROFS)) -+ bch_err_msg(c, ret, "deleting snapshot %u", *i); -+ if (ret) -+ goto err; -+ } - - /* - * Fixing children of deleted snapshots can't be done completely -@@ -1680,32 +1626,24 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) - ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, - BTREE_ITER_intent, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -- bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior)); -+ bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &delete_interior)); - if (ret) -- goto err_create_lock; -- -- darray_for_each(deleted, i) { -- ret = commit_do(trans, NULL, NULL, 0, -- bch2_snapshot_node_delete(trans, *i)); -- bch_err_msg(c, ret, "deleting snapshot %u", *i); -- if (ret) -- goto err_create_lock; -- } -+ goto err; - -- darray_for_each(deleted_interior, i) { -+ darray_for_each(delete_interior, i) { - ret = commit_do(trans, NULL, NULL, 0, -- bch2_snapshot_node_delete(trans, *i)); -- bch_err_msg(c, ret, "deleting snapshot %u", *i); -+ bch2_snapshot_node_delete(trans, i->id)); -+ if (!bch2_err_matches(ret, EROFS)) -+ bch_err_msg(c, ret, "deleting snapshot %u", i->id); - if (ret) -- goto err_create_lock; -+ goto err; - } --err_create_lock: -- up_write(&c->snapshot_create_lock); - err: -- darray_exit(&deleted_interior); -- darray_exit(&deleted); -+ darray_exit(&delete_interior); -+ darray_exit(&delete_leaves); - bch2_trans_put(trans); -- bch_err_fn(c, ret); -+ if (!bch2_err_matches(ret, EROFS)) -+ bch_err_fn(c, ret); - return ret; - } - -@@ -1721,8 +1659,12 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work) - - void bch2_delete_dead_snapshots_async(struct bch_fs *c) - { -- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) && -- !queue_work(c->write_ref_wq, &c->snapshot_delete_work)) -+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots)) -+ return; -+ -+ BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); -+ -+ if (!queue_work(c->write_ref_wq, &c->snapshot_delete_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); - } - -@@ -1735,18 +1677,10 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, - struct bkey_s_c k; - int ret; - -- bch2_trans_iter_init(trans, &iter, id, pos, -- BTREE_ITER_not_extents| -- BTREE_ITER_all_snapshots); -- while (1) { -- k = bch2_btree_iter_prev(&iter); -- ret = bkey_err(k); -- if (ret) -- break; -- -- if (!k.k) -- break; -- -+ for_each_btree_key_reverse_norestart(trans, iter, id, bpos_predecessor(pos), -+ BTREE_ITER_not_extents| -+ BTREE_ITER_all_snapshots, -+ k, ret) { - if (!bkey_eq(pos, k.k->p)) - break; - -@@ -1760,37 +1694,36 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, - return ret; - } - --static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) -+static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap) - { -- struct bch_fs *c = trans->c; -- struct bkey_s_c_snapshot snap; -- int ret = 0; -+ /* If there's one child, it's redundant and keys will be moved to the child */ -+ return !!snap.v->children[0] + !!snap.v->children[1] == 1; -+} - -+static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) -+{ - if (k.k->type != KEY_TYPE_snapshot) - return 0; - -- snap = bkey_s_c_to_snapshot(k); -+ struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k); - if (BCH_SNAPSHOT_DELETED(snap.v) || -- bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset || -- (ret = bch2_snapshot_needs_delete(trans, k)) > 0) { -- set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); -- return 0; -- } -+ interior_snapshot_needs_delete(snap)) -+ set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags); - -- return ret; -+ return 0; - } - - int bch2_snapshots_read(struct bch_fs *c) - { -+ /* -+ * Initializing the is_ancestor bitmaps requires ancestors to already be -+ * initialized - so mark in reverse: -+ */ - int ret = bch2_trans_run(c, -- for_each_btree_key(trans, iter, BTREE_ID_snapshots, -- POS_MIN, 0, k, -+ for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots, -+ POS_MAX, 0, k, - __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: -- bch2_snapshot_set_equiv(trans, k) ?: -- bch2_check_snapshot_needs_deletion(trans, k)) ?: -- for_each_btree_key(trans, iter, BTREE_ID_snapshots, -- POS_MIN, 0, k, -- (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); -+ bch2_check_snapshot_needs_deletion(trans, k))); - bch_err_fn(c, ret); - - /* -diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h -index 29c94716293e..00373cf32e7b 100644 ---- a/fs/bcachefs/snapshot.h -+++ b/fs/bcachefs/snapshot.h -@@ -2,11 +2,9 @@ - #ifndef _BCACHEFS_SNAPSHOT_H - #define _BCACHEFS_SNAPSHOT_H - --enum bch_validate_flags; -- - void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - int bch2_snapshot_tree_validate(struct bch_fs *, struct bkey_s_c, -- enum bch_validate_flags); -+ struct bkey_validate_context); - - #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ - .key_validate = bch2_snapshot_tree_validate, \ -@@ -19,7 +17,8 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); - int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); - - void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); --int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, - enum btree_iter_update_trigger_flags); -@@ -120,19 +119,19 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) - return id; - } - --static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id) -+static inline bool __bch2_snapshot_exists(struct bch_fs *c, u32 id) - { - const struct snapshot_t *s = snapshot_t(c, id); -- return s ? s->equiv : 0; -+ return s ? s->live : 0; - } - --static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) -+static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id) - { - rcu_read_lock(); -- id = __bch2_snapshot_equiv(c, id); -+ bool ret = __bch2_snapshot_exists(c, id); - rcu_read_unlock(); - -- return id; -+ return ret; - } - - static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) -diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c -new file mode 100644 -index 000000000000..d78451c2a0c6 ---- /dev/null -+++ b/fs/bcachefs/str_hash.c -@@ -0,0 +1,295 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "btree_cache.h" -+#include "btree_update.h" -+#include "dirent.h" -+#include "fsck.h" -+#include "str_hash.h" -+#include "subvolume.h" -+ -+static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d) -+{ -+ if (d.v->d_type == DT_SUBVOL) { -+ struct bch_subvolume subvol; -+ int ret = bch2_subvolume_get(trans, le32_to_cpu(d.v->d_child_subvol), -+ false, &subvol); -+ if (ret && !bch2_err_matches(ret, ENOENT)) -+ return ret; -+ return !ret; -+ } else { -+ struct btree_iter iter; -+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0); -+ int ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ ret = bkey_is_inode(k.k); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+ } -+} -+ -+static noinline int fsck_rename_dirent(struct btree_trans *trans, -+ struct snapshots_seen *s, -+ const struct bch_hash_desc desc, -+ struct bch_hash_info *hash_info, -+ struct bkey_s_c_dirent old) -+{ -+ struct qstr old_name = bch2_dirent_get_name(old); -+ struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32); -+ int ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ return ret; -+ -+ bkey_dirent_init(&new->k_i); -+ dirent_copy_target(new, old); -+ new->k.p = old.k->p; -+ -+ for (unsigned i = 0; i < 1000; i++) { -+ unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u", -+ old_name.len, old_name.name, i); -+ unsigned u64s = BKEY_U64s + dirent_val_u64s(len); -+ -+ if (u64s > U8_MAX) -+ return -EINVAL; -+ -+ new->k.u64s = u64s; -+ -+ ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, -+ (subvol_inum) { 0, old.k->p.inode }, -+ old.k->p.snapshot, &new->k_i, -+ BTREE_UPDATE_internal_snapshot_node); -+ if (!bch2_err_matches(ret, EEXIST)) -+ break; -+ } -+ -+ if (ret) -+ return ret; -+ -+ return bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); -+} -+ -+static noinline int hash_pick_winner(struct btree_trans *trans, -+ const struct bch_hash_desc desc, -+ struct bch_hash_info *hash_info, -+ struct bkey_s_c k1, -+ struct bkey_s_c k2) -+{ -+ if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) && -+ !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k))) -+ return 0; -+ -+ switch (desc.btree_id) { -+ case BTREE_ID_dirents: { -+ int ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k1)); -+ if (ret < 0) -+ return ret; -+ if (!ret) -+ return 0; -+ -+ ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k2)); -+ if (ret < 0) -+ return ret; -+ if (!ret) -+ return 1; -+ return 2; -+ } -+ default: -+ return 0; -+ } -+} -+ -+static int repair_inode_hash_info(struct btree_trans *trans, -+ struct bch_inode_unpacked *snapshot_root) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, -+ SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot - 1), -+ BTREE_ITER_all_snapshots, k, ret) { -+ if (k.k->p.offset != snapshot_root->bi_inum) -+ break; -+ if (!bkey_is_inode(k.k)) -+ continue; -+ -+ struct bch_inode_unpacked inode; -+ ret = bch2_inode_unpack(k, &inode); -+ if (ret) -+ break; -+ -+ if (fsck_err_on(inode.bi_hash_seed != snapshot_root->bi_hash_seed || -+ INODE_STR_HASH(&inode) != INODE_STR_HASH(snapshot_root), -+ trans, inode_snapshot_mismatch, -+ "inode hash info in different snapshots don't match")) { -+ inode.bi_hash_seed = snapshot_root->bi_hash_seed; -+ SET_INODE_STR_HASH(&inode, INODE_STR_HASH(snapshot_root)); -+ ret = __bch2_fsck_write_inode(trans, &inode) ?: -+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -+ -BCH_ERR_transaction_restart_nested; -+ break; -+ } -+ } -+fsck_err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+/* -+ * All versions of the same inode in different snapshots must have the same hash -+ * seed/type: verify that the hash info we're using matches the root -+ */ -+static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum, -+ struct bch_hash_info *hash_info) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, U32_MAX), -+ BTREE_ITER_all_snapshots, k, ret) { -+ if (k.k->p.offset != inum) -+ break; -+ if (bkey_is_inode(k.k)) -+ goto found; -+ } -+ bch_err(c, "%s(): inum %llu not found", __func__, inum); -+ ret = -BCH_ERR_fsck_repair_unimplemented; -+ goto err; -+found:; -+ struct bch_inode_unpacked inode; -+ ret = bch2_inode_unpack(k, &inode); -+ if (ret) -+ goto err; -+ -+ struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode); -+ if (hash_info->type != hash2.type || -+ memcmp(&hash_info->siphash_key, &hash2.siphash_key, sizeof(hash2.siphash_key))) { -+ ret = repair_inode_hash_info(trans, &inode); -+ if (!ret) { -+ bch_err(c, "inode hash info mismatch with root, but mismatch not found\n" -+ "%u %llx %llx\n" -+ "%u %llx %llx", -+ hash_info->type, -+ hash_info->siphash_key.k0, -+ hash_info->siphash_key.k1, -+ hash2.type, -+ hash2.siphash_key.k0, -+ hash2.siphash_key.k1); -+ ret = -BCH_ERR_fsck_repair_unimplemented; -+ } -+ } -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int __bch2_str_hash_check_key(struct btree_trans *trans, -+ struct snapshots_seen *s, -+ const struct bch_hash_desc *desc, -+ struct bch_hash_info *hash_info, -+ struct btree_iter *k_iter, struct bkey_s_c hash_k) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter = { NULL }; -+ struct printbuf buf = PRINTBUF; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ u64 hash = desc->hash_bkey(hash_info, hash_k); -+ if (hash_k.k->p.offset < hash) -+ goto bad_hash; -+ -+ for_each_btree_key_norestart(trans, iter, desc->btree_id, -+ SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), -+ BTREE_ITER_slots, k, ret) { -+ if (bkey_eq(k.k->p, hash_k.k->p)) -+ break; -+ -+ if (k.k->type == desc->key_type && -+ !desc->cmp_bkey(k, hash_k)) -+ goto duplicate_entries; -+ -+ if (bkey_deleted(k.k)) { -+ bch2_trans_iter_exit(trans, &iter); -+ goto bad_hash; -+ } -+ } -+out: -+ bch2_trans_iter_exit(trans, &iter); -+ printbuf_exit(&buf); -+ return ret; -+bad_hash: -+ /* -+ * Before doing any repair, check hash_info itself: -+ */ -+ ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info); -+ if (ret) -+ goto out; -+ -+ if (fsck_err(trans, hash_table_key_wrong_offset, -+ "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s", -+ bch2_btree_id_str(desc->btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { -+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k); -+ if (IS_ERR(new)) -+ return PTR_ERR(new); -+ -+ k = bch2_hash_set_or_get_in_snapshot(trans, &iter, *desc, hash_info, -+ (subvol_inum) { 0, hash_k.k->p.inode }, -+ hash_k.k->p.snapshot, new, -+ STR_HASH_must_create| -+ BTREE_ITER_with_updates| -+ BTREE_UPDATE_internal_snapshot_node); -+ ret = bkey_err(k); -+ if (ret) -+ goto out; -+ if (k.k) -+ goto duplicate_entries; -+ -+ ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, -+ BTREE_UPDATE_internal_snapshot_node) ?: -+ bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?: -+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: -+ -BCH_ERR_transaction_restart_nested; -+ goto out; -+ } -+fsck_err: -+ goto out; -+duplicate_entries: -+ ret = hash_pick_winner(trans, *desc, hash_info, hash_k, k); -+ if (ret < 0) -+ goto out; -+ -+ if (!fsck_err(trans, hash_table_key_duplicate, -+ "duplicate hash table keys%s:\n%s", -+ ret != 2 ? "" : ", both point to valid inodes", -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, hash_k), -+ prt_newline(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), -+ buf.buf))) -+ goto out; -+ -+ switch (ret) { -+ case 0: -+ ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0); -+ break; -+ case 1: -+ ret = bch2_hash_delete_at(trans, *desc, hash_info, &iter, 0); -+ break; -+ case 2: -+ ret = fsck_rename_dirent(trans, s, *desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?: -+ bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0); -+ goto out; -+ } -+ -+ ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: -+ -BCH_ERR_transaction_restart_nested; -+ goto out; -+} -diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h -index ec2b1feea520..55a4ac7bf220 100644 ---- a/fs/bcachefs/str_hash.h -+++ b/fs/bcachefs/str_hash.h -@@ -160,7 +160,7 @@ bch2_hash_lookup_in_snapshot(struct btree_trans *trans, - struct bkey_s_c k; - int ret; - -- for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, -+ for_each_btree_key_max_norestart(trans, *iter, desc.btree_id, - SPOS(inum.inum, desc.hash_key(info, key), snapshot), - POS(inum.inum, U64_MAX), - BTREE_ITER_slots|flags, k, ret) { -@@ -210,7 +210,7 @@ bch2_hash_hole(struct btree_trans *trans, - if (ret) - return ret; - -- for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, -+ for_each_btree_key_max_norestart(trans, *iter, desc.btree_id, - SPOS(inum.inum, desc.hash_key(info, key), snapshot), - POS(inum.inum, U64_MAX), - BTREE_ITER_slots|BTREE_ITER_intent, k, ret) -@@ -265,7 +265,7 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans, - bool found = false; - int ret; - -- for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, -+ for_each_btree_key_max_norestart(trans, *iter, desc.btree_id, - SPOS(insert->k.p.inode, - desc.hash_bkey(info, bkey_i_to_s_c(insert)), - snapshot), -@@ -393,4 +393,26 @@ int bch2_hash_delete(struct btree_trans *trans, - return ret; - } - -+struct snapshots_seen; -+int __bch2_str_hash_check_key(struct btree_trans *, -+ struct snapshots_seen *, -+ const struct bch_hash_desc *, -+ struct bch_hash_info *, -+ struct btree_iter *, struct bkey_s_c); -+ -+static inline int bch2_str_hash_check_key(struct btree_trans *trans, -+ struct snapshots_seen *s, -+ const struct bch_hash_desc *desc, -+ struct bch_hash_info *hash_info, -+ struct btree_iter *k_iter, struct bkey_s_c hash_k) -+{ -+ if (hash_k.k->type != desc->key_type) -+ return 0; -+ -+ if (likely(desc->hash_bkey(hash_info, hash_k) == hash_k.k->p.offset)) -+ return 0; -+ -+ return __bch2_str_hash_check_key(trans, s, desc, hash_info, k_iter, hash_k); -+} -+ - #endif /* _BCACHEFS_STR_HASH_H */ -diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c -index 80e5efaff524..e3d0475232e5 100644 ---- a/fs/bcachefs/subvolume.c -+++ b/fs/bcachefs/subvolume.c -@@ -207,7 +207,7 @@ int bch2_check_subvol_children(struct bch_fs *c) - /* Subvolumes: */ - - int bch2_subvolume_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k); - int ret = 0; -@@ -286,11 +286,11 @@ int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol) - static __always_inline int - bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, - bool inconsistent_if_not_found, -- int iter_flags, - struct bch_subvolume *s) - { - int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol), -- iter_flags, subvolume, s); -+ BTREE_ITER_cached| -+ BTREE_ITER_with_updates, subvolume, s); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) && - inconsistent_if_not_found, - trans->c, "missing subvolume %u", subvol); -@@ -299,16 +299,15 @@ bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, - - int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, - bool inconsistent_if_not_found, -- int iter_flags, - struct bch_subvolume *s) - { -- return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s); -+ return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, s); - } - - int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) - { - struct bch_subvolume s; -- int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s); -+ int ret = bch2_subvolume_get_inlined(trans, subvol, true, &s); - if (ret) - return ret; - -@@ -328,7 +327,7 @@ int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, - struct bch_snapshot snap; - - return bch2_snapshot_lookup(trans, snapshot, &snap) ?: -- bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); -+ bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, subvol); - } - - int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, -@@ -396,8 +395,7 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d - struct bch_subvolume s; - - return lockrestart_do(trans, -- bch2_subvolume_get(trans, subvolid_to_delete, true, -- BTREE_ITER_cached, &s)) ?: -+ bch2_subvolume_get(trans, subvolid_to_delete, true, &s)) ?: - for_each_btree_key_commit(trans, iter, - BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, -@@ -411,26 +409,56 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d - */ - static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) - { -- struct btree_iter iter; -- struct bkey_s_c_subvolume subvol; -- u32 snapid; -- int ret = 0; -+ struct btree_iter subvol_iter = {}, snapshot_iter = {}, snapshot_tree_iter = {}; - -- subvol = bch2_bkey_get_iter_typed(trans, &iter, -+ struct bkey_s_c_subvolume subvol = -+ bch2_bkey_get_iter_typed(trans, &subvol_iter, - BTREE_ID_subvolumes, POS(0, subvolid), - BTREE_ITER_cached|BTREE_ITER_intent, - subvolume); -- ret = bkey_err(subvol); -+ int ret = bkey_err(subvol); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, - "missing subvolume %u", subvolid); - if (ret) -- return ret; -+ goto err; - -- snapid = le32_to_cpu(subvol.v->snapshot); -+ u32 snapid = le32_to_cpu(subvol.v->snapshot); -+ -+ struct bkey_s_c_snapshot snapshot = -+ bch2_bkey_get_iter_typed(trans, &snapshot_iter, -+ BTREE_ID_snapshots, POS(0, snapid), -+ 0, snapshot); -+ ret = bkey_err(subvol); -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, -+ "missing snapshot %u", snapid); -+ if (ret) -+ goto err; -+ -+ u32 treeid = le32_to_cpu(snapshot.v->tree); - -- ret = bch2_btree_delete_at(trans, &iter, 0) ?: -+ struct bkey_s_c_snapshot_tree snapshot_tree = -+ bch2_bkey_get_iter_typed(trans, &snapshot_tree_iter, -+ BTREE_ID_snapshot_trees, POS(0, treeid), -+ 0, snapshot_tree); -+ -+ if (le32_to_cpu(snapshot_tree.v->master_subvol) == subvolid) { -+ struct bkey_i_snapshot_tree *snapshot_tree_mut = -+ bch2_bkey_make_mut_typed(trans, &snapshot_tree_iter, -+ &snapshot_tree.s_c, -+ 0, snapshot_tree); -+ ret = PTR_ERR_OR_ZERO(snapshot_tree_mut); -+ if (ret) -+ goto err; -+ -+ snapshot_tree_mut->v.master_subvol = 0; -+ } -+ -+ ret = bch2_btree_delete_at(trans, &subvol_iter, 0) ?: - bch2_snapshot_node_set_deleted(trans, snapid); -- bch2_trans_iter_exit(trans, &iter); -+err: -+ bch2_trans_iter_exit(trans, &snapshot_tree_iter); -+ bch2_trans_iter_exit(trans, &snapshot_iter); -+ bch2_trans_iter_exit(trans, &subvol_iter); - return ret; - } - -@@ -675,7 +703,7 @@ static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) - /* set bi_subvol on root inode */ - int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) - { -- int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, -+ int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_fs_upgrade_for_subvolumes(trans)); - bch_err_fn(c, ret); - return ret; -diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h -index f897d106e142..910f6196700e 100644 ---- a/fs/bcachefs/subvolume.h -+++ b/fs/bcachefs/subvolume.h -@@ -5,12 +5,11 @@ - #include "darray.h" - #include "subvolume_types.h" - --enum bch_validate_flags; -- - int bch2_check_subvols(struct bch_fs *); - int bch2_check_subvol_children(struct bch_fs *); - --int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s, -@@ -25,7 +24,7 @@ int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, - - int bch2_subvol_has_children(struct btree_trans *, u32); - int bch2_subvolume_get(struct btree_trans *, unsigned, -- bool, int, struct bch_subvolume *); -+ bool, struct bch_subvolume *); - int __bch2_subvolume_get_snapshot(struct btree_trans *, u32, - u32 *, bool); - int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); -@@ -34,7 +33,7 @@ int bch2_subvol_is_ro_trans(struct btree_trans *, u32); - int bch2_subvol_is_ro(struct bch_fs *, u32); - - static inline struct bkey_s_c --bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos end, -+bch2_btree_iter_peek_in_subvolume_max_type(struct btree_iter *iter, struct bpos end, - u32 subvolid, unsigned flags) - { - u32 snapshot; -@@ -43,10 +42,10 @@ bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos - return bkey_s_c_err(ret); - - bch2_btree_iter_set_snapshot(iter, snapshot); -- return bch2_btree_iter_peek_upto_type(iter, end, flags); -+ return bch2_btree_iter_peek_max_type(iter, end, flags); - } - --#define for_each_btree_key_in_subvolume_upto_continue(_trans, _iter, \ -+#define for_each_btree_key_in_subvolume_max_continue(_trans, _iter, \ - _end, _subvolid, _flags, _k, _do) \ - ({ \ - struct bkey_s_c _k; \ -@@ -54,7 +53,7 @@ bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos - \ - do { \ - _ret3 = lockrestart_do(_trans, ({ \ -- (_k) = bch2_btree_iter_peek_in_subvolume_upto_type(&(_iter), \ -+ (_k) = bch2_btree_iter_peek_in_subvolume_max_type(&(_iter), \ - _end, _subvolid, (_flags)); \ - if (!(_k).k) \ - break; \ -@@ -67,14 +66,14 @@ bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos - _ret3; \ - }) - --#define for_each_btree_key_in_subvolume_upto(_trans, _iter, _btree_id, \ -+#define for_each_btree_key_in_subvolume_max(_trans, _iter, _btree_id, \ - _start, _end, _subvolid, _flags, _k, _do) \ - ({ \ - struct btree_iter _iter; \ - bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ - (_start), (_flags)); \ - \ -- for_each_btree_key_in_subvolume_upto_continue(_trans, _iter, \ -+ for_each_btree_key_in_subvolume_max_continue(_trans, _iter, \ - _end, _subvolid, _flags, _k, _do); \ - }) - -diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h -index f2ec4277c2a5..1549d6daf7af 100644 ---- a/fs/bcachefs/subvolume_types.h -+++ b/fs/bcachefs/subvolume_types.h -@@ -9,13 +9,13 @@ typedef DARRAY(u32) snapshot_id_list; - #define IS_ANCESTOR_BITMAP 128 - - struct snapshot_t { -+ bool live; - u32 parent; - u32 skip[3]; - u32 depth; - u32 children[2]; - u32 subvol; /* Nonzero only if a subvolume points to this node: */ - u32 tree; -- u32 equiv; - unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)]; - }; - -diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c -index 7c71594f6a8b..8037ccbacf6a 100644 ---- a/fs/bcachefs/super-io.c -+++ b/fs/bcachefs/super-io.c -@@ -23,6 +23,7 @@ - - #include - #include -+#include - - static const struct blk_holder_ops bch2_sb_handle_bdev_ops = { - }; -@@ -41,7 +42,7 @@ static const struct bch2_metadata_version bch2_metadata_versions[] = { - #undef x - }; - --void bch2_version_to_text(struct printbuf *out, unsigned v) -+void bch2_version_to_text(struct printbuf *out, enum bcachefs_metadata_version v) - { - const char *str = "(unknown version)"; - -@@ -54,7 +55,7 @@ void bch2_version_to_text(struct printbuf *out, unsigned v) - prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str); - } - --unsigned bch2_latest_compatible_version(unsigned v) -+enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version v) - { - if (!BCH_VERSION_MAJOR(v)) - return v; -@@ -68,6 +69,16 @@ unsigned bch2_latest_compatible_version(unsigned v) - return v; - } - -+void bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) -+{ -+ mutex_lock(&c->sb_lock); -+ SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, -+ max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); -+ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field); -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+} -+ - const char * const bch2_sb_fields[] = { - #define x(name, nr) #name, - BCH_SB_FIELDS() -@@ -368,6 +379,12 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, - return -BCH_ERR_invalid_sb_features; - } - -+ if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || -+ BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { -+ prt_printf(out, "Filesystem has incompatible version"); -+ return -BCH_ERR_invalid_sb_features; -+ } -+ - block_size = le16_to_cpu(sb->block_size); - - if (block_size > PAGE_SECTORS) { -@@ -406,6 +423,21 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, - return -BCH_ERR_invalid_sb_time_precision; - } - -+ /* old versions didn't know to downgrade this field */ -+ if (BCH_SB_VERSION_INCOMPAT_ALLOWED(sb) > le16_to_cpu(sb->version)) -+ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, le16_to_cpu(sb->version)); -+ -+ if (BCH_SB_VERSION_INCOMPAT(sb) > BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)) { -+ prt_printf(out, "Invalid version_incompat "); -+ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); -+ prt_str(out, " > incompat_allowed "); -+ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); -+ if (flags & BCH_VALIDATE_write) -+ return -BCH_ERR_invalid_sb_version; -+ else -+ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, BCH_SB_VERSION_INCOMPAT(sb)); -+ } -+ - if (!flags) { - /* - * Been seeing a bug where these are getting inexplicably -@@ -428,6 +460,11 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, - SET_BCH_SB_PROMOTE_WHOLE_EXTENTS(sb, true); - } - -+#ifdef __KERNEL__ -+ if (!BCH_SB_SHARD_INUMS_NBITS(sb)) -+ SET_BCH_SB_SHARD_INUMS_NBITS(sb, ilog2(roundup_pow_of_two(num_online_cpus()))); -+#endif -+ - for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { - const struct bch_option *opt = bch2_opt_table + opt_id; - -@@ -519,6 +556,9 @@ static void bch2_sb_update(struct bch_fs *c) - c->sb.uuid = src->uuid; - c->sb.user_uuid = src->user_uuid; - c->sb.version = le16_to_cpu(src->version); -+ c->sb.version_incompat = BCH_SB_VERSION_INCOMPAT(src); -+ c->sb.version_incompat_allowed -+ = BCH_SB_VERSION_INCOMPAT_ALLOWED(src); - c->sb.version_min = le16_to_cpu(src->version_min); - c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src); - c->sb.nr_devices = src->nr_devices; -@@ -676,7 +716,8 @@ static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf - } - - enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb); -- if (csum_type >= BCH_CSUM_NR) { -+ if (csum_type >= BCH_CSUM_NR || -+ bch2_csum_type_is_encryption(csum_type)) { - prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); - return -BCH_ERR_invalid_sb_csum_type; - } -@@ -878,7 +919,7 @@ static void write_super_endio(struct bio *bio) - ? BCH_MEMBER_ERROR_write - : BCH_MEMBER_ERROR_read, - "superblock %s error: %s", -- bio_data_dir(bio) ? "write" : "read", -+ str_write_read(bio_data_dir(bio)), - bch2_blk_status_to_str(bio->bi_status))) - ca->sb_write_error = 1; - -@@ -891,14 +932,15 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca) - struct bch_sb *sb = ca->disk_sb.sb; - struct bio *bio = ca->disk_sb.bio; - -+ memset(ca->sb_read_scratch, 0, BCH_SB_READ_SCRATCH_BUF_SIZE); -+ - bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META); - bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]); - bio->bi_end_io = write_super_endio; - bio->bi_private = ca; -- bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE); -+ bch2_bio_map(bio, ca->sb_read_scratch, BCH_SB_READ_SCRATCH_BUF_SIZE); - -- this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], -- bio_sectors(bio)); -+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio)); - - percpu_ref_get(&ca->io_ref); - closure_bio_submit(bio, &c->sb_write); -@@ -1042,9 +1084,16 @@ int bch2_write_super(struct bch_fs *c) - ": Superblock write was silently dropped! (seq %llu expected %llu)", - le64_to_cpu(ca->sb_read_scratch->seq), - ca->disk_sb.seq); -- bch2_fs_fatal_error(c, "%s", buf.buf); -+ -+ if (c->opts.errors != BCH_ON_ERROR_continue && -+ c->opts.errors != BCH_ON_ERROR_fix_safe) { -+ ret = -BCH_ERR_erofs_sb_err; -+ bch2_fs_fatal_error(c, "%s", buf.buf); -+ } else { -+ bch_err(c, "%s", buf.buf); -+ } -+ - printbuf_exit(&buf); -- ret = -BCH_ERR_erofs_sb_err; - } - - if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) { -@@ -1149,6 +1198,8 @@ bool bch2_check_version_downgrade(struct bch_fs *c) - */ - if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) - SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); -+ if (BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb) > bcachefs_metadata_version_current) -+ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, bcachefs_metadata_version_current); - if (c->sb.version > bcachefs_metadata_version_current) - c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); - if (c->sb.version_min > bcachefs_metadata_version_current) -@@ -1157,7 +1208,7 @@ bool bch2_check_version_downgrade(struct bch_fs *c) - return ret; - } - --void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) -+void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) - { - lockdep_assert_held(&c->sb_lock); - -@@ -1167,6 +1218,10 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) - - c->disk_sb.sb->version = cpu_to_le16(new_version); - c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); -+ -+ if (incompat) -+ SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, -+ max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version)); - } - - static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, -@@ -1331,6 +1386,14 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, - bch2_version_to_text(out, le16_to_cpu(sb->version)); - prt_newline(out); - -+ prt_printf(out, "Incompatible features allowed:\t"); -+ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); -+ prt_newline(out); -+ -+ prt_printf(out, "Incompatible features in use:\t"); -+ bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); -+ prt_newline(out); -+ - prt_printf(out, "Version upgrade complete:\t"); - bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb)); - prt_newline(out); -diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h -index fadd364e2802..f1ab4f943720 100644 ---- a/fs/bcachefs/super-io.h -+++ b/fs/bcachefs/super-io.h -@@ -10,14 +10,29 @@ - - #include - -+#define BCH_SB_READ_SCRATCH_BUF_SIZE 4096 -+ - static inline bool bch2_version_compatible(u16 version) - { - return BCH_VERSION_MAJOR(version) <= BCH_VERSION_MAJOR(bcachefs_metadata_version_current) && - version >= bcachefs_metadata_version_min; - } - --void bch2_version_to_text(struct printbuf *, unsigned); --unsigned bch2_latest_compatible_version(unsigned); -+void bch2_version_to_text(struct printbuf *, enum bcachefs_metadata_version); -+enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version); -+ -+void bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version); -+ -+static inline bool bch2_request_incompat_feature(struct bch_fs *c, -+ enum bcachefs_metadata_version version) -+{ -+ if (unlikely(version > c->sb.version_incompat)) { -+ if (version > c->sb.version_incompat_allowed) -+ return false; -+ bch2_set_version_incompat(c, version); -+ } -+ return true; -+} - - static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f) - { -@@ -92,7 +107,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) - } - - bool bch2_check_version_downgrade(struct bch_fs *); --void bch2_sb_upgrade(struct bch_fs *, unsigned); -+void bch2_sb_upgrade(struct bch_fs *, unsigned, bool); - - void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, - struct bch_sb_field *); -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -index a6ed9a0bf1c7..d97ea7bd1171 100644 ---- a/fs/bcachefs/super.c -+++ b/fs/bcachefs/super.c -@@ -290,7 +290,7 @@ static void __bch2_fs_read_only(struct bch_fs *c) - - bch2_fs_journal_stop(&c->journal); - -- bch_info(c, "%sshutdown complete, journal seq %llu", -+ bch_info(c, "%sclean shutdown complete, journal seq %llu", - test_bit(BCH_FS_clean_shutdown, &c->flags) ? "" : "un", - c->journal.seq_ondisk); - -@@ -441,6 +441,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) - { - int ret; - -+ BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); -+ - if (test_bit(BCH_FS_initial_gc_unfixed, &c->flags)) { - bch_err(c, "cannot go rw, unfixed btree errors"); - return -BCH_ERR_erofs_unfixed_errors; -@@ -561,6 +563,7 @@ static void __bch2_fs_free(struct bch_fs *c) - bch2_io_clock_exit(&c->io_clock[WRITE]); - bch2_io_clock_exit(&c->io_clock[READ]); - bch2_fs_compress_exit(c); -+ bch2_fs_btree_gc_exit(c); - bch2_journal_keys_put_initial(c); - bch2_find_btree_nodes_exit(&c->found_btree_nodes); - BUG_ON(atomic_read(&c->journal_keys.ref)); -@@ -584,7 +587,6 @@ static void __bch2_fs_free(struct bch_fs *c) - #endif - kfree(rcu_dereference_protected(c->disk_groups, 1)); - kfree(c->journal_seq_blacklist_table); -- kfree(c->unused_inode_hints); - - if (c->write_ref_wq) - destroy_workqueue(c->write_ref_wq); -@@ -766,21 +768,17 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) - - refcount_set(&c->ro_ref, 1); - init_waitqueue_head(&c->ro_ref_wait); -+ spin_lock_init(&c->recovery_pass_lock); - sema_init(&c->online_fsck_mutex, 1); - -- init_rwsem(&c->gc_lock); -- mutex_init(&c->gc_gens_lock); -- atomic_set(&c->journal_keys.ref, 1); -- c->journal_keys.initial_ref_held = true; -- - for (i = 0; i < BCH_TIME_STAT_NR; i++) - bch2_time_stats_init(&c->times[i]); - -- bch2_fs_gc_init(c); - bch2_fs_copygc_init(c); - bch2_fs_btree_key_cache_init_early(&c->btree_key_cache); - bch2_fs_btree_iter_init_early(c); - bch2_fs_btree_interior_update_init_early(c); -+ bch2_fs_journal_keys_init(c); - bch2_fs_allocator_background_init(c); - bch2_fs_allocator_foreground_init(c); - bch2_fs_rebalance_init(c); -@@ -809,9 +807,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) - INIT_LIST_HEAD(&c->vfs_inodes_list); - mutex_init(&c->vfs_inodes_lock); - -- c->copy_gc_enabled = 1; -- c->rebalance.enabled = 1; -- - c->journal.flush_write_time = &c->times[BCH_TIME_journal_flush_write]; - c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write]; - c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq]; -@@ -873,8 +868,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) - (btree_blocks(c) + 1) * 2 * - sizeof(struct sort_iter_set); - -- c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus())); -- - if (!(c->btree_update_wq = alloc_workqueue("bcachefs", - WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) || - !(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io", -@@ -901,9 +894,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) - !(c->online_reserved = alloc_percpu(u64)) || - mempool_init_kvmalloc_pool(&c->btree_bounce_pool, 1, - c->opts.btree_node_size) || -- mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || -- !(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits, -- sizeof(u64), GFP_KERNEL))) { -+ mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048)) { - ret = -BCH_ERR_ENOMEM_fs_other_alloc; - goto err; - } -@@ -917,6 +908,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) - bch2_fs_btree_cache_init(c) ?: - bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: - bch2_fs_btree_interior_update_init(c) ?: -+ bch2_fs_btree_gc_init(c) ?: - bch2_fs_buckets_waiting_for_journal_init(c) ?: - bch2_fs_btree_write_buffer_init(c) ?: - bch2_fs_subvolumes_init(c) ?: -@@ -1033,9 +1025,12 @@ int bch2_fs_start(struct bch_fs *c) - bch2_dev_allocator_add(c, ca); - bch2_recalc_capacity(c); - -+ c->recovery_task = current; - ret = BCH_SB_INITIALIZED(c->disk_sb.sb) - ? bch2_fs_recovery(c) - : bch2_fs_initialize(c); -+ c->recovery_task = NULL; -+ - if (ret) - goto err; - -@@ -1120,12 +1115,12 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, - - prt_bdevname(&buf, fs->bdev); - prt_char(&buf, ' '); -- bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));; -+ bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time)); - prt_newline(&buf); - - prt_bdevname(&buf, sb->bdev); - prt_char(&buf, ' '); -- bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));; -+ bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time)); - prt_newline(&buf); - - if (!opts->no_splitbrain_check) -@@ -1198,7 +1193,7 @@ static void bch2_dev_free(struct bch_dev *ca) - - free_percpu(ca->io_done); - bch2_dev_buckets_free(ca); -- free_page((unsigned long) ca->sb_read_scratch); -+ kfree(ca->sb_read_scratch); - - bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]); - bch2_time_stats_quantiles_exit(&ca->io_latency[READ]); -@@ -1309,8 +1304,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, - init_completion(&ca->ref_completion); - init_completion(&ca->io_ref_completion); - -- init_rwsem(&ca->bucket_lock); -- - INIT_WORK(&ca->io_error_work, bch2_io_error_work); - - bch2_time_stats_quantiles_init(&ca->io_latency[READ]); -@@ -1337,7 +1330,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, - - if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete, - PERCPU_REF_INIT_DEAD, GFP_KERNEL) || -- !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) || -+ !(ca->sb_read_scratch = kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_KERNEL)) || - bch2_dev_buckets_alloc(c, ca) || - !(ca->io_done = alloc_percpu(*ca->io_done))) - goto err; -@@ -1366,7 +1359,6 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) - { - struct bch_member member = bch2_sb_member_get(c->disk_sb.sb, dev_idx); - struct bch_dev *ca = NULL; -- int ret = 0; - - if (bch2_fs_init_fault("dev_alloc")) - goto err; -@@ -1378,10 +1370,8 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) - ca->fs = c; - - bch2_dev_attach(c, ca, dev_idx); -- return ret; -+ return 0; - err: -- if (ca) -- bch2_dev_free(ca); - return -BCH_ERR_ENOMEM_dev_alloc; - } - -@@ -1751,11 +1741,6 @@ int bch2_dev_add(struct bch_fs *c, const char *path) - if (ret) - goto err; - -- ret = bch2_dev_journal_alloc(ca, true); -- bch_err_msg(c, ret, "allocating journal"); -- if (ret) -- goto err; -- - down_write(&c->state_lock); - mutex_lock(&c->sb_lock); - -@@ -1806,11 +1791,14 @@ int bch2_dev_add(struct bch_fs *c, const char *path) - if (ret) - goto err_late; - -- ca->new_fs_bucket_idx = 0; -- - if (ca->mi.state == BCH_MEMBER_STATE_rw) - __bch2_dev_read_write(c, ca); - -+ ret = bch2_dev_journal_alloc(ca, false); -+ bch_err_msg(c, ret, "allocating journal"); -+ if (ret) -+ goto err_late; -+ - up_write(&c->state_lock); - return 0; - -diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h -index dada09331d2e..fa6d52216510 100644 ---- a/fs/bcachefs/super.h -+++ b/fs/bcachefs/super.h -@@ -34,16 +34,6 @@ void bch2_fs_read_only(struct bch_fs *); - int bch2_fs_read_write(struct bch_fs *); - int bch2_fs_read_write_early(struct bch_fs *); - --/* -- * Only for use in the recovery/fsck path: -- */ --static inline void bch2_fs_lazy_rw(struct bch_fs *c) --{ -- if (!test_bit(BCH_FS_rw, &c->flags) && -- !test_bit(BCH_FS_was_rw, &c->flags)) -- bch2_fs_read_write_early(c); --} -- - void __bch2_fs_stop(struct bch_fs *); - void bch2_fs_free(struct bch_fs *); - void bch2_fs_stop(struct bch_fs *); -diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c -index 03e59f86f360..a7eb1f511484 100644 ---- a/fs/bcachefs/sysfs.c -+++ b/fs/bcachefs/sysfs.c -@@ -146,7 +146,7 @@ write_attribute(trigger_journal_writes); - write_attribute(trigger_btree_cache_shrink); - write_attribute(trigger_btree_key_cache_shrink); - write_attribute(trigger_freelist_wakeup); --rw_attribute(gc_gens_pos); -+read_attribute(gc_gens_pos); - - read_attribute(uuid); - read_attribute(minor); -@@ -203,7 +203,6 @@ read_attribute(disk_groups); - - read_attribute(has_data); - read_attribute(alloc_debug); --read_attribute(accounting); - read_attribute(usage_base); - - #define x(t, n, ...) read_attribute(t); -@@ -211,12 +210,11 @@ BCH_PERSISTENT_COUNTERS() - #undef x - - rw_attribute(discard); -+read_attribute(state); - rw_attribute(label); - --rw_attribute(copy_gc_enabled); - read_attribute(copy_gc_wait); - --rw_attribute(rebalance_enabled); - sysfs_pd_controller_attribute(rebalance); - read_attribute(rebalance_status); - -@@ -237,11 +235,6 @@ write_attribute(perf_test); - BCH_TIME_STATS() - #undef x - --static struct attribute sysfs_state_rw = { -- .name = "state", -- .mode = 0444, --}; -- - static size_t bch2_btree_cache_size(struct bch_fs *c) - { - struct btree_cache *bc = &c->btree_cache; -@@ -302,7 +295,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c - - static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c) - { -- prt_printf(out, "%s: ", bch2_btree_id_str(c->gc_gens_btree)); -+ bch2_btree_id_to_text(out, c->gc_gens_btree); -+ prt_printf(out, ": "); - bch2_bpos_to_text(out, c->gc_gens_pos); - prt_printf(out, "\n"); - } -@@ -339,9 +333,6 @@ SHOW(bch2_fs) - if (attr == &sysfs_gc_gens_pos) - bch2_gc_gens_pos_to_text(out, c); - -- sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); -- -- sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled); - sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */ - - if (attr == &sysfs_copy_gc_wait) -@@ -405,9 +396,6 @@ SHOW(bch2_fs) - if (attr == &sysfs_alloc_debug) - bch2_fs_alloc_debug_to_text(out, c); - -- if (attr == &sysfs_accounting) -- bch2_fs_accounting_to_text(out, c); -- - if (attr == &sysfs_usage_base) - bch2_fs_usage_base_to_text(out, c); - -@@ -418,23 +406,6 @@ STORE(bch2_fs) - { - struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); - -- if (attr == &sysfs_copy_gc_enabled) { -- ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled) -- ?: (ssize_t) size; -- -- if (c->copygc_thread) -- wake_up_process(c->copygc_thread); -- return ret; -- } -- -- if (attr == &sysfs_rebalance_enabled) { -- ssize_t ret = strtoul_safe(buf, c->rebalance.enabled) -- ?: (ssize_t) size; -- -- rebalance_wakeup(c); -- return ret; -- } -- - sysfs_pd_controller_store(rebalance, &c->rebalance.pd); - - /* Debugging: */ -@@ -534,15 +505,22 @@ SHOW(bch2_fs_counters) - - printbuf_tabstop_push(out, 32); - -- #define x(t, ...) \ -+ #define x(t, n, f, ...) \ - if (attr == &sysfs_##t) { \ - counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\ - counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\ -+ if (f & TYPE_SECTORS) { \ -+ counter <<= 9; \ -+ counter_since_mount <<= 9; \ -+ } \ -+ \ - prt_printf(out, "since mount:\t"); \ -+ (f & TYPE_COUNTER) ? prt_u64(out, counter_since_mount) :\ - prt_human_readable_u64(out, counter_since_mount); \ - prt_newline(out); \ - \ - prt_printf(out, "since filesystem creation:\t"); \ -+ (f & TYPE_COUNTER) ? prt_u64(out, counter) : \ - prt_human_readable_u64(out, counter); \ - prt_newline(out); \ - } -@@ -610,10 +588,8 @@ struct attribute *bch2_fs_internal_files[] = { - - &sysfs_gc_gens_pos, - -- &sysfs_copy_gc_enabled, - &sysfs_copy_gc_wait, - -- &sysfs_rebalance_enabled, - sysfs_pd_controller_files(rebalance), - - &sysfs_moving_ctxts, -@@ -622,7 +598,6 @@ struct attribute *bch2_fs_internal_files[] = { - - &sysfs_disk_groups, - &sysfs_alloc_debug, -- &sysfs_accounting, - &sysfs_usage_base, - NULL - }; -@@ -682,6 +657,13 @@ STORE(bch2_fs_opts_dir) - (id == Opt_compression && !c->opts.background_compression))) - bch2_set_rebalance_needs_scan(c, 0); - -+ if (v && id == Opt_rebalance_enabled) -+ rebalance_wakeup(c); -+ -+ if (v && id == Opt_copygc_enabled && -+ c->copygc_thread) -+ wake_up_process(c->copygc_thread); -+ - ret = size; - err: - bch2_write_ref_put(c, BCH_WRITE_REF_sysfs); -@@ -790,7 +772,7 @@ SHOW(bch2_dev) - prt_char(out, '\n'); - } - -- if (attr == &sysfs_state_rw) { -+ if (attr == &sysfs_state) { - prt_string_option(out, bch2_member_states, ca->mi.state); - prt_char(out, '\n'); - } -@@ -870,7 +852,7 @@ struct attribute *bch2_dev_files[] = { - - /* settings: */ - &sysfs_discard, -- &sysfs_state_rw, -+ &sysfs_state, - &sysfs_label, - - &sysfs_has_data, -diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c -index fb5c1543e52f..6c6469814637 100644 ---- a/fs/bcachefs/tests.c -+++ b/fs/bcachefs/tests.c -@@ -131,7 +131,7 @@ static int test_iterate(struct bch_fs *c, u64 nr) - i = 0; - - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, -+ for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ - BUG_ON(k.k->p.offset != i++); -@@ -186,7 +186,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) - i = 0; - - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_extents, -+ for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ - BUG_ON(bkey_start_offset(k.k) != i); -@@ -242,7 +242,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) - i = 0; - - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, -+ for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ - BUG_ON(k.k->p.offset != i); -@@ -259,7 +259,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) - i = 0; - - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, -+ for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - BTREE_ITER_slots, k, ({ - if (i >= nr * 2) -@@ -302,7 +302,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) - i = 0; - - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_extents, -+ for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ - BUG_ON(bkey_start_offset(k.k) != i + 8); -@@ -320,7 +320,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) - i = 0; - - ret = bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_extents, -+ for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - BTREE_ITER_slots, k, ({ - if (i == nr) -@@ -349,10 +349,10 @@ static int test_peek_end(struct bch_fs *c, u64 nr) - bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), 0); - -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - BUG_ON(k.k); - -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - BUG_ON(k.k); - - bch2_trans_iter_exit(trans, &iter); -@@ -369,10 +369,10 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) - bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), 0); - -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - BUG_ON(k.k); - -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - BUG_ON(k.k); - - bch2_trans_iter_exit(trans, &iter); -@@ -488,7 +488,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) - trans = bch2_trans_get(c); - bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, - SPOS(0, 0, snapid_lo), 0); -- lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); - - BUG_ON(k.k->p.snapshot != U32_MAX); - -@@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos) - - bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos, - BTREE_ITER_intent); -- k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)); -+ k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)); - ret = bkey_err(k); - if (ret) - goto err; -@@ -726,7 +726,7 @@ static int seq_insert(struct bch_fs *c, u64 nr) - static int seq_lookup(struct bch_fs *c, u64 nr) - { - return bch2_trans_run(c, -- for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs, -+ for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, - 0)); -diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h -index 5597b9d6297f..56a5a7fbc0fd 100644 ---- a/fs/bcachefs/trace.h -+++ b/fs/bcachefs/trace.h -@@ -199,6 +199,30 @@ DECLARE_EVENT_CLASS(bio, - (unsigned long long)__entry->sector, __entry->nr_sector) - ); - -+/* disk_accounting.c */ -+ -+TRACE_EVENT(accounting_mem_insert, -+ TP_PROTO(struct bch_fs *c, const char *acc), -+ TP_ARGS(c, acc), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(unsigned, new_nr ) -+ __string(acc, acc ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->new_nr = c->accounting.k.nr; -+ __assign_str(acc); -+ ), -+ -+ TP_printk("%d,%d entries %u added %s", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->new_nr, -+ __get_str(acc)) -+); -+ - /* fs.c: */ - TRACE_EVENT(bch2_sync_fs, - TP_PROTO(struct super_block *sb, int wait), -@@ -848,8 +872,8 @@ TRACE_EVENT(move_data, - TRACE_EVENT(evacuate_bucket, - TP_PROTO(struct bch_fs *c, struct bpos *bucket, - unsigned sectors, unsigned bucket_size, -- u64 fragmentation, int ret), -- TP_ARGS(c, bucket, sectors, bucket_size, fragmentation, ret), -+ int ret), -+ TP_ARGS(c, bucket, sectors, bucket_size, ret), - - TP_STRUCT__entry( - __field(dev_t, dev ) -@@ -857,7 +881,6 @@ TRACE_EVENT(evacuate_bucket, - __field(u64, bucket ) - __field(u32, sectors ) - __field(u32, bucket_size ) -- __field(u64, fragmentation ) - __field(int, ret ) - ), - -@@ -867,45 +890,42 @@ TRACE_EVENT(evacuate_bucket, - __entry->bucket = bucket->offset; - __entry->sectors = sectors; - __entry->bucket_size = bucket_size; -- __entry->fragmentation = fragmentation; - __entry->ret = ret; - ), - -- TP_printk("%d,%d %llu:%llu sectors %u/%u fragmentation %llu ret %i", -+ TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->member, __entry->bucket, - __entry->sectors, __entry->bucket_size, -- __entry->fragmentation, __entry->ret) -+ __entry->ret) - ); - - TRACE_EVENT(copygc, - TP_PROTO(struct bch_fs *c, -- u64 sectors_moved, u64 sectors_not_moved, -- u64 buckets_moved, u64 buckets_not_moved), -- TP_ARGS(c, -- sectors_moved, sectors_not_moved, -- buckets_moved, buckets_not_moved), -+ u64 buckets, -+ u64 sectors_seen, -+ u64 sectors_moved), -+ TP_ARGS(c, buckets, sectors_seen, sectors_moved), - - TP_STRUCT__entry( - __field(dev_t, dev ) -+ __field(u64, buckets ) -+ __field(u64, sectors_seen ) - __field(u64, sectors_moved ) -- __field(u64, sectors_not_moved ) -- __field(u64, buckets_moved ) -- __field(u64, buckets_not_moved ) - ), - - TP_fast_assign( - __entry->dev = c->dev; -+ __entry->buckets = buckets; -+ __entry->sectors_seen = sectors_seen; - __entry->sectors_moved = sectors_moved; -- __entry->sectors_not_moved = sectors_not_moved; -- __entry->buckets_moved = buckets_moved; -- __entry->buckets_not_moved = buckets_moved; - ), - -- TP_printk("%d,%d sectors moved %llu remain %llu buckets moved %llu remain %llu", -+ TP_printk("%d,%d buckets %llu sectors seen %llu moved %llu", - MAJOR(__entry->dev), MINOR(__entry->dev), -- __entry->sectors_moved, __entry->sectors_not_moved, -- __entry->buckets_moved, __entry->buckets_not_moved) -+ __entry->buckets, -+ __entry->sectors_seen, -+ __entry->sectors_moved) - ); - - TRACE_EVENT(copygc_wait, -@@ -1316,6 +1336,12 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced, - __entry->new_u64s) - ); - -+DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ - TRACE_EVENT(path_downgrade, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, -@@ -1352,10 +1378,21 @@ TRACE_EVENT(path_downgrade, - __entry->pos_snapshot) - ); - --DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, -- TP_PROTO(struct btree_trans *trans, -- unsigned long caller_ip), -- TP_ARGS(trans, caller_ip) -+TRACE_EVENT(key_cache_fill, -+ TP_PROTO(struct btree_trans *trans, const char *key), -+ TP_ARGS(trans, key), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __string(key, key ) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __assign_str(key); -+ ), -+ -+ TP_printk("%s %s", __entry->trans_fn, __get_str(key)) - ); - - TRACE_EVENT(write_buffer_flush, -@@ -1414,6 +1451,24 @@ TRACE_EVENT(write_buffer_flush_slowpath, - TP_printk("%zu/%zu", __entry->slowpath, __entry->total) - ); - -+TRACE_EVENT(write_buffer_maybe_flush, -+ TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *key), -+ TP_ARGS(trans, caller_ip, key), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ __string(key, key ) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __assign_str(key); -+ ), -+ -+ TP_printk("%s %pS %s", __entry->trans_fn, (void *) __entry->caller_ip, __get_str(key)) -+); -+ - DEFINE_EVENT(fs_str, rebalance_extent, - TP_PROTO(struct bch_fs *c, const char *str), - TP_ARGS(c, str) -diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h -index fb02c1c36004..1a1720116071 100644 ---- a/fs/bcachefs/util.h -+++ b/fs/bcachefs/util.h -@@ -55,6 +55,16 @@ static inline size_t buf_pages(void *p, size_t len) - PAGE_SIZE); - } - -+static inline void *bch2_kvmalloc(size_t n, gfp_t flags) -+{ -+ void *p = unlikely(n >= INT_MAX) -+ ? vmalloc(n) -+ : kvmalloc(n, flags & ~__GFP_ZERO); -+ if (p && (flags & __GFP_ZERO)) -+ memset(p, 0, n); -+ return p; -+} -+ - #define init_heap(heap, _size, gfp) \ - ({ \ - (heap)->nr = 0; \ -@@ -317,6 +327,19 @@ do { \ - _ptr ? container_of(_ptr, type, member) : NULL; \ - }) - -+static inline struct list_head *list_pop(struct list_head *head) -+{ -+ if (list_empty(head)) -+ return NULL; -+ -+ struct list_head *ret = head->next; -+ list_del_init(ret); -+ return ret; -+} -+ -+#define list_pop_entry(head, type, member) \ -+ container_of_or_null(list_pop(head), type, member) -+ - /* Does linear interpolation between powers of two */ - static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) - { -@@ -696,4 +719,13 @@ static inline bool test_bit_le64(size_t bit, __le64 *addr) - return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0; - } - -+static inline void memcpy_swab(void *_dst, void *_src, size_t len) -+{ -+ u8 *dst = _dst + len; -+ u8 *src = _src; -+ -+ while (len--) -+ *--dst = *src++; -+} -+ - #endif /* _BCACHEFS_UTIL_H */ -diff --git a/fs/bcachefs/varint.c b/fs/bcachefs/varint.c -index 6a78553d9b0c..6620ecae26af 100644 ---- a/fs/bcachefs/varint.c -+++ b/fs/bcachefs/varint.c -@@ -9,6 +9,7 @@ - #include - #endif - -+#include "errcode.h" - #include "varint.h" - - /** -@@ -53,7 +54,7 @@ int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out) - u64 v; - - if (unlikely(in + bytes > end)) -- return -1; -+ return -BCH_ERR_varint_decode_error; - - if (likely(bytes < 9)) { - __le64 v_le = 0; -@@ -115,7 +116,7 @@ int bch2_varint_decode_fast(const u8 *in, const u8 *end, u64 *out) - unsigned bytes = ffz(*in) + 1; - - if (unlikely(in + bytes > end)) -- return -1; -+ return -BCH_ERR_varint_decode_error; - - if (likely(bytes < 9)) { - v >>= bytes; -diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c -index 952aca400faf..aed7c6984173 100644 ---- a/fs/bcachefs/xattr.c -+++ b/fs/bcachefs/xattr.c -@@ -71,7 +71,7 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { - }; - - int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k, -- enum bch_validate_flags flags) -+ struct bkey_validate_context from) - { - struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); - unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, -@@ -309,7 +309,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) - u64 offset = 0, inum = inode->ei_inode.bi_inum; - - int ret = bch2_trans_run(c, -- for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_xattrs, -+ for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_xattrs, - POS(inum, offset), - POS(inum, U64_MAX), - inode->ei_inum.subvol, 0, k, ({ -@@ -565,13 +565,6 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, - ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); - err: - mutex_unlock(&inode->ei_update_lock); -- -- if (value && -- (opt_id == Opt_background_target || -- opt_id == Opt_background_compression || -- (opt_id == Opt_compression && !inode_opt_get(c, &inode->ei_inode, background_compression)))) -- bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum); -- - err_class_exit: - return bch2_err_class(ret); - } -@@ -609,7 +602,7 @@ static const struct xattr_handler bch_xattr_bcachefs_effective_handler = { - - #endif /* NO_BCACHEFS_FS */ - --const struct xattr_handler *bch2_xattr_handlers[] = { -+const struct xattr_handler * const bch2_xattr_handlers[] = { - &bch_xattr_user_handler, - &bch_xattr_trusted_handler, - &bch_xattr_security_handler, -diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h -index c188a5ad64ce..132fbbd15a66 100644 ---- a/fs/bcachefs/xattr.h -+++ b/fs/bcachefs/xattr.h -@@ -6,7 +6,8 @@ - - extern const struct bch_hash_desc bch2_xattr_hash_desc; - --int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); -+int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, -+ struct bkey_validate_context); - void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - - #define bch2_bkey_ops_xattr ((struct bkey_ops) { \ -@@ -44,6 +45,6 @@ int bch2_xattr_set(struct btree_trans *, subvol_inum, - - ssize_t bch2_xattr_list(struct dentry *, char *, size_t); - --extern const struct xattr_handler *bch2_xattr_handlers[]; -+extern const struct xattr_handler * const bch2_xattr_handlers[]; - - #endif /* _BCACHEFS_XATTR_H */ -diff --git a/fs/fs_parser.c b/fs/fs_parser.c -index 24727ec34e5a..6521e9a9d6ef 100644 ---- a/fs/fs_parser.c -+++ b/fs/fs_parser.c -@@ -13,7 +13,7 @@ - #include - #include "internal.h" - --static const struct constant_table bool_names[] = { -+const struct constant_table bool_names[] = { - { "0", false }, - { "1", true }, - { "false", false }, -@@ -22,6 +22,7 @@ static const struct constant_table bool_names[] = { - { "yes", true }, - { }, - }; -+EXPORT_SYMBOL(bool_names); - - static const struct constant_table * - __lookup_constant(const struct constant_table *tbl, const char *name) -diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h -index 6cf713a7e6c6..0974cd33bcba 100644 ---- a/include/linux/fs_parser.h -+++ b/include/linux/fs_parser.h -@@ -83,6 +83,8 @@ extern int fs_lookup_param(struct fs_context *fc, - - extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); - -+extern const struct constant_table bool_names[]; -+ - #ifdef CONFIG_VALIDATE_FS_PARSER - extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, - int low, int high, int special); -diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h -index 43a7b9dcf15e..fe17b4828171 100644 ---- a/include/linux/min_heap.h -+++ b/include/linux/min_heap.h -@@ -15,8 +15,8 @@ - */ - #define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \ - struct _name { \ -- int nr; \ -- int size; \ -+ size_t nr; \ -+ size_t size; \ - _type *data; \ - _type preallocated[_nr]; \ - } --- -2.45.2 - diff --git a/sys-kernel/hardened-kernel/hardened-kernel-6.11.8.ebuild b/sys-kernel/hardened-kernel/hardened-kernel-6.11.8.ebuild deleted file mode 100644 index 9a031cf..0000000 --- a/sys-kernel/hardened-kernel/hardened-kernel-6.11.8.ebuild +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright 2020-2024 Gentoo Authors -# Distributed under the terms of the GNU General Public License v2 - -EAPI=8 - -KERNEL_IUSE_GENERIC_UKI=1 -KERNEL_IUSE_MODULES_SIGN=1 - -inherit kernel-build toolchain-funcs - -MY_P=linux-${PV%.*} -GENPATCHES_P=genpatches-${PV%.*}-$(( ${PV##*.} + 2 )) -CONFIG_VER=6.11.5-gentoo -GENTOO_CONFIG_VER=g14 -HARDENED_PATCH_VER="${PV}-hardened1" -GENPATCHES_EXCLUDE="1500_XATTR_USER_PREFIX.patch - 1510_fs-enable-link-security-restrictions-by-default.patch - 2900_dev-root-proc-mount-fix.patch - 4200_fbcondecor.patch - 4400_alpha-sysctl-uac.patch" - -DESCRIPTION="Linux kernel built with Gentoo patches" -HOMEPAGE=" - https://wiki.gentoo.org/wiki/Project:Distribution_Kernel - https://www.kernel.org/ -" -SRC_URI+=" - https://cdn.kernel.org/pub/linux/kernel/v$(ver_cut 1).x/${MY_P}.tar.xz - https://dev.gentoo.org/~mpagano/dist/genpatches/${GENPATCHES_P}.base.tar.xz - https://dev.gentoo.org/~mpagano/dist/genpatches/${GENPATCHES_P}.extras.tar.xz - experimental? ( - https://dev.gentoo.org/~mpagano/dist/genpatches/${GENPATCHES_P}.experimental.tar.xz - ) - https://github.com/anthraxx/linux-hardened/releases/download/v${HARDENED_PATCH_VER}/linux-hardened-v${HARDENED_PATCH_VER}.patch - https://github.com/projg2/gentoo-kernel-config/archive/${GENTOO_CONFIG_VER}.tar.gz - -> gentoo-kernel-config-${GENTOO_CONFIG_VER}.tar.gz - amd64? ( - https://raw.githubusercontent.com/projg2/fedora-kernel-config-for-gentoo/${CONFIG_VER}/kernel-x86_64-fedora.config - -> kernel-x86_64-fedora.config.${CONFIG_VER} - ) - arm64? ( - https://raw.githubusercontent.com/projg2/fedora-kernel-config-for-gentoo/${CONFIG_VER}/kernel-aarch64-fedora.config - -> kernel-aarch64-fedora.config.${CONFIG_VER} - ) - ppc64? ( - https://raw.githubusercontent.com/projg2/fedora-kernel-config-for-gentoo/${CONFIG_VER}/kernel-ppc64le-fedora.config - -> kernel-ppc64le-fedora.config.${CONFIG_VER} - ) - x86? ( - https://raw.githubusercontent.com/projg2/fedora-kernel-config-for-gentoo/${CONFIG_VER}/kernel-i686-fedora.config - -> kernel-i686-fedora.config.${CONFIG_VER} - ) -" -S=${WORKDIR}/${MY_P} - -KEYWORDS="amd64 ~arm arm64 ~hppa ~loong ~ppc ppc64 ~riscv ~sparc x86" -IUSE="debug +experimental" -REQUIRED_USE=" - arm? ( savedconfig ) - hppa? ( savedconfig ) - riscv? ( savedconfig ) - sparc? ( savedconfig ) -" - -RDEPEND=" - !sys-kernel/gentoo-kernel-bin:${SLOT} -" -BDEPEND=" - debug? ( dev-util/pahole ) -" -PDEPEND=" - >=virtual/dist-kernel-${PV} -" - -QA_FLAGS_IGNORED=" - usr/src/linux-.*/scripts/gcc-plugins/.*.so - usr/src/linux-.*/vmlinux - usr/src/linux-.*/arch/powerpc/kernel/vdso.*/vdso.*.so.dbg -" - -src_prepare() { - # remove some genpatches causes conflicts with linux-hardened patch - for patch in ${GENPATCHES_EXCLUDE}; do - rm -f ${WORKDIR}/${patch} - done - # Remove already exists changes in linux-hardened patch - sed -i '322,337d' "${WORKDIR}/4567_distro-Gentoo-Kconfig.patch" - # include linux-hardened patch with priority - cp ${DISTDIR}/linux-hardened-v${HARDENED_PATCH_VER}.patch ${WORKDIR}/1199_linux-hardened-${HARDENED_PATCH_VER}.patch - - local PATCHES=( - # meh, genpatches have no directory - "${WORKDIR}"/*.patch - ) - default - - sed -i "s@\-hardened1@@g" Makefile || die - - local biendian=false - - # prepare the default config - case ${ARCH} in - amd64) - cp "${FILESDIR}/${MY_P}.amd64.config" .config || die - ;; - *) - die "Unsupported arch ${ARCH}" - ;; - esac - - local myversion="-hardened" - echo "CONFIG_LOCALVERSION=\"${myversion}\"" > "${T}"/version.config || die - local dist_conf_path="${WORKDIR}/gentoo-kernel-config-${GENTOO_CONFIG_VER}" - - local merge_configs=( - "${T}"/version.config - ) - use debug || merge_configs+=( - "${dist_conf_path}"/no-debug.config - ) - - merge_configs+=( "${dist_conf_path}"/hardened-base.config ) - - tc-is-gcc && merge_configs+=( "${dist_conf_path}"/hardened-gcc-plugins.config ) - - if [[ -f "${dist_conf_path}/hardened-${ARCH}.config" ]]; then - merge_configs+=( "${dist_conf_path}/hardened-${ARCH}.config" ) - fi - - # this covers ppc64 and aarch64_be only for now - if [[ ${biendian} == true && $(tc-endian) == big ]]; then - merge_configs+=( "${dist_conf_path}/big-endian.config" ) - fi - - use secureboot && merge_configs+=( "${dist_conf_path}/secureboot.config" ) - - kernel-build_merge_configs "${merge_configs[@]}" -}