mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-04-08 00:00:05 +03:00
Compare commits
106 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
b5c4d0bcc4 | ||
|
592f04256f | ||
|
bf44ffca50 | ||
|
4812b3e3d9 | ||
|
4a5c50774e | ||
|
e47a5ce2bc | ||
|
3aa2a4c47c | ||
|
33483738c6 | ||
|
ea5bdadabd | ||
|
66c943be6c | ||
|
3d968816a4 | ||
|
76b3ec9d66 | ||
|
3e2d5b2b9a | ||
|
7c47145f6c | ||
|
396545c2ea | ||
|
3bfe433d22 | ||
|
19d3a1778c | ||
|
e54f288992 | ||
|
7c66e9fac5 | ||
|
2160e9a7bd | ||
|
941dfd7a29 | ||
|
18b4914587 | ||
|
8af5c93e48 | ||
|
6657ce2de3 | ||
|
5504533986 | ||
|
a55c655158 | ||
|
b8435c5693 | ||
|
80d0a21ed3 | ||
|
a1080f2f6a | ||
|
3bf9a19571 | ||
|
00dd889d33 | ||
|
1a8e684ae0 | ||
|
3d972489a8 | ||
|
197437be12 | ||
|
0a23a5ab1a | ||
|
017c3cc66e | ||
|
8b7810403b | ||
|
ce0c2241a4 | ||
|
846f3398a2 | ||
|
e6aecdd3b8 | ||
|
62ea232b09 | ||
|
64ce740ac6 | ||
|
c0836924b1 | ||
|
f42ee45c6e | ||
|
6cbadc946d | ||
|
86cbeaf1c2 | ||
|
dd1a882d17 | ||
|
3e15e96cb9 | ||
|
d7f02b4ed5 | ||
|
1606364799 | ||
|
553e605886 | ||
|
55538d928b | ||
|
7d30f895c6 | ||
|
8cc5bdede9 | ||
|
67c9b378c7 | ||
|
55afc8ad37 | ||
|
3fcc955d00 | ||
|
c4e25c6c3c | ||
|
8361ae1a42 | ||
|
895fc9a101 | ||
|
56b2449ded | ||
|
117e5fd7e9 | ||
|
35a13784a2 | ||
|
16e23eaec1 | ||
|
9cc5c9a79b | ||
|
6f1429b4eb | ||
|
7210d5c911 | ||
|
1055935ffe | ||
|
634c812a1e | ||
|
601deb6e5d | ||
|
ef25411bdc | ||
|
8b31dfb350 | ||
|
2aeeac7785 | ||
|
83cc57700f | ||
|
626b1945eb | ||
|
077677e61c | ||
|
a17e2ca95c | ||
|
ac124c8885 | ||
|
c560ff06f4 | ||
|
51ebefdd6e | ||
|
77d72319a4 | ||
|
c2af793c8c | ||
|
de51418b60 | ||
|
6829fb2010 | ||
|
d4932bf0e3 | ||
|
9beb9fabb2 | ||
|
511a714ed3 | ||
|
b7831af74d | ||
|
64767d6443 | ||
|
8d5f44d3b1 | ||
|
5350fd2b7b | ||
|
cb8dc1b2ba | ||
|
9ed0f3fd72 | ||
|
2541755981 | ||
|
553d6f107a | ||
|
4f9293b045 | ||
|
282331defa | ||
|
b1e6d1ced2 | ||
|
3666da87f2 | ||
|
6153c37e38 | ||
|
4e25d26a81 | ||
|
1dba682527 | ||
|
571ca8b8ce | ||
|
01413354a5 | ||
|
45468a9a6f | ||
|
ad41c2fb76 |
.bcachefs_revision
.github/workflows
Cargo.lockCargo.tomlINSTALL.mdMakefileMakefile.compilerarch/etc
bcachefs.8bch_bindgen
c_src
bcachefs.ccmd_data.ccmd_device.ccmd_dump.ccmd_format.ccmd_fs.ccmd_fsck.ccmd_fusemount.ccmd_key.ccmd_kill_btree_node.ccmd_list_journal.ccmd_migrate.ccmd_option.ccmd_top.ccmds.hcrypto.ccrypto.hlibbcachefs.clibbcachefs.hposix_to_bcachefs.cposix_to_bcachefs.htools-util.ctools-util.h
flake.lockflake.nixinclude
crypto
linux
libbcachefs
acl.calloc_background.calloc_background.halloc_background_format.halloc_foreground.calloc_foreground.halloc_types.hbackpointers.cbackpointers.hbbpos.hbcachefs.hbcachefs_format.hbcachefs_ioctl.hbkey.cbkey.hbkey_methods.cbkey_methods.hbkey_types.hbset.cbtree_cache.cbtree_cache.hbtree_gc.cbtree_gc.hbtree_io.cbtree_io.hbtree_iter.cbtree_iter.hbtree_journal_iter.cbtree_journal_iter.hbtree_journal_iter_types.hbtree_key_cache.cbtree_locking.c
@ -1 +1 @@
|
||||
3b80552e70573764bbf38b89c58749aef9dd8753
|
||||
8e5380376586b54782ffc7a4d9cf04eaf5976f85
|
||||
|
39
.github/workflows/nix-flake.yml
vendored
39
.github/workflows/nix-flake.yml
vendored
@ -1,22 +1,31 @@
|
||||
name: "Nix-Tests"
|
||||
name: Nix Flake actions
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
|
||||
jobs:
|
||||
nix-flake-check:
|
||||
nix-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: cachix/install-nix-action@v27
|
||||
with:
|
||||
extra_nix_config: |
|
||||
experimental-features = nix-command flakes
|
||||
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
|
||||
- uses: cachix/cachix-action@v15
|
||||
with:
|
||||
name: bcachefs-tools
|
||||
# If you chose API tokens for write access OR if you have a private cache
|
||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
||||
- run: nix flake show
|
||||
- run: nix flake check --print-build-logs
|
||||
- run: nix build --print-build-logs
|
||||
- uses: cachix/install-nix-action@v30
|
||||
- id: set-matrix
|
||||
name: Generate Nix Matrix
|
||||
run: |
|
||||
set -Eeu
|
||||
matrix="$(nix eval --json '.#githubActions.matrix')"
|
||||
echo "matrix=$matrix" >> "$GITHUB_OUTPUT"
|
||||
|
||||
nix-build:
|
||||
name: ${{ matrix.name }} (${{ matrix.system }})
|
||||
needs: nix-matrix
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix: ${{fromJSON(needs.nix-matrix.outputs.matrix)}}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: cachix/install-nix-action@v30
|
||||
- run: nix build -L '.#${{ matrix.attr }}'
|
||||
|
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -68,7 +68,7 @@ checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
|
||||
|
||||
[[package]]
|
||||
name = "bcachefs-tools"
|
||||
version = "1.12.0"
|
||||
version = "1.25.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bch_bindgen",
|
||||
|
@ -1,9 +1,13 @@
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
default-members = [".", "bch_bindgen"]
|
||||
|
||||
[package]
|
||||
name = "bcachefs-tools"
|
||||
version = "1.12.0"
|
||||
version = "1.25.1"
|
||||
authors = ["Yuxuan Shui <yshuiv7@gmail.com>", "Kayla Firestack <dev@kaylafire.me>", "Kent Overstreet <kent.overstreet@linux.dev>" ]
|
||||
edition = "2021"
|
||||
rust-version = "1.77"
|
||||
rust-version = "1.77.0"
|
||||
|
||||
[[bin]]
|
||||
name = "bcachefs"
|
||||
|
18
INSTALL.md
18
INSTALL.md
@ -39,12 +39,19 @@ Starting from Debian Trixie and Ubuntu 23.10, you will additionally need:
|
||||
apt install -y systemd-dev
|
||||
```
|
||||
|
||||
Fedora: install the "Development tools" group along with:
|
||||
Fedora: install build dependencies either with `dnf builddep bcachefs-tools` or with:
|
||||
```shell
|
||||
dnf install -y libaio-devel libsodium-devel \
|
||||
dnf install -y @c-development libaio-devel libsodium-devel \
|
||||
libblkid-devel libzstd-devel zlib-devel userspace-rcu-devel \
|
||||
lz4-devel libuuid-devel valgrind-devel keyutils-libs-devel \
|
||||
findutils udev systemd-devel llvm-devel
|
||||
findutils systemd-devel clang-devel llvm-devel rust cargo
|
||||
```
|
||||
|
||||
openSUSE: install build dependencies with:
|
||||
```shell
|
||||
zypper in -y libaio-devel libsodium-devel libblkid-devel liburcu-devel \
|
||||
libzstd-devel zlib-devel liblz4-devel libuuid-devel valgrind-devel \
|
||||
keyutils-devel findutils udev systemd-devel llvm-devel
|
||||
```
|
||||
|
||||
Arch: install bcachefs-tools-git from the AUR.
|
||||
@ -80,6 +87,11 @@ Arch:
|
||||
pacman -S fuse3
|
||||
```
|
||||
|
||||
openSUSE:
|
||||
```shell
|
||||
zypper in -y fuse3-devel
|
||||
```
|
||||
|
||||
Then, make using the `BCACHEFS_FUSE` environment variable (make clean first if
|
||||
previously built without fuse support):
|
||||
|
||||
|
14
Makefile
14
Makefile
@ -1,4 +1,4 @@
|
||||
VERSION=1.12.0
|
||||
VERSION=1.25.1
|
||||
|
||||
PREFIX?=/usr/local
|
||||
LIBEXECDIR?=$(PREFIX)/libexec
|
||||
@ -22,6 +22,13 @@ else
|
||||
CARGO_CLEAN_ARGS = --quiet
|
||||
endif
|
||||
|
||||
# when cross compiling, cargo places the built binary in a different location
|
||||
ifdef CARGO_BUILD_TARGET
|
||||
BUILT_BIN = target/$(CARGO_BUILD_TARGET)/release/bcachefs
|
||||
else
|
||||
BUILT_BIN = target/release/bcachefs
|
||||
endif
|
||||
|
||||
# Prevent recursive expansions of $(CFLAGS) to avoid repeatedly performing
|
||||
# compile tests
|
||||
CFLAGS:=$(CFLAGS)
|
||||
@ -73,12 +80,13 @@ CFLAGS+=$(call cc-disable-warning, zero-length-array)
|
||||
CFLAGS+=$(call cc-disable-warning, shift-overflow)
|
||||
CFLAGS+=$(call cc-disable-warning, enum-conversion)
|
||||
CFLAGS+=$(call cc-disable-warning, gnu-variable-sized-type-not-at-end)
|
||||
export RUSTFLAGS=-C default-linker-libraries
|
||||
|
||||
PKGCONFIG_LIBS="blkid uuid liburcu libsodium zlib liblz4 libzstd libudev libkeyutils"
|
||||
ifdef BCACHEFS_FUSE
|
||||
PKGCONFIG_LIBS+="fuse3 >= 3.7"
|
||||
CFLAGS+=-DBCACHEFS_FUSE
|
||||
export RUSTFLAGS=--cfg fuse
|
||||
RUSTFLAGS+=--cfg feature="fuse"
|
||||
endif
|
||||
|
||||
PKGCONFIG_CFLAGS:=$(shell $(PKG_CONFIG) --cflags $(PKGCONFIG_LIBS))
|
||||
@ -194,7 +202,7 @@ cmd_version.o : .version
|
||||
install: INITRAMFS_HOOK=$(INITRAMFS_DIR)/hooks/bcachefs
|
||||
install: INITRAMFS_SCRIPT=$(INITRAMFS_DIR)/scripts/local-premount/bcachefs
|
||||
install: bcachefs $(optional_install)
|
||||
$(INSTALL) -m0755 -D target/release/bcachefs -t $(DESTDIR)$(ROOT_SBINDIR)
|
||||
$(INSTALL) -m0755 -D $(BUILT_BIN) -t $(DESTDIR)$(ROOT_SBINDIR)
|
||||
$(INSTALL) -m0644 -D bcachefs.8 -t $(DESTDIR)$(PREFIX)/share/man/man8/
|
||||
$(INSTALL) -m0755 -D initramfs/script $(DESTDIR)$(INITRAMFS_SCRIPT)
|
||||
$(INSTALL) -m0755 -D initramfs/hook $(DESTDIR)$(INITRAMFS_HOOK)
|
||||
|
@ -13,7 +13,7 @@ cc-cross-prefix = $(firstword $(foreach c, $(1), \
|
||||
$(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c))))
|
||||
|
||||
# output directory for tests below
|
||||
TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$
|
||||
TMPOUT = .tmp_$$$$
|
||||
|
||||
# try-run
|
||||
# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
|
||||
@ -53,13 +53,11 @@ cc-option = $(call __cc-option, $(CC),\
|
||||
|
||||
# cc-option-yn
|
||||
# Usage: flag := $(call cc-option-yn,-march=winchip-c6)
|
||||
cc-option-yn = $(call try-run,\
|
||||
$(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
|
||||
cc-option-yn = $(if $(call cc-option,$1),y,n)
|
||||
|
||||
# cc-disable-warning
|
||||
# Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
|
||||
cc-disable-warning = $(call try-run,\
|
||||
$(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
|
||||
cc-disable-warning = $(if $(call cc-option,-W$(strip $1)),-Wno-$(strip $1))
|
||||
|
||||
# gcc-min-version
|
||||
# Usage: cflags-$(call gcc-min-version, 70100) += -foo
|
||||
@ -75,8 +73,11 @@ ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
|
||||
|
||||
# __rustc-option
|
||||
# Usage: MY_RUSTFLAGS += $(call __rustc-option,$(RUSTC),$(MY_RUSTFLAGS),-Cinstrument-coverage,-Zinstrument-coverage)
|
||||
# TODO: remove RUSTC_BOOTSTRAP=1 when we raise the minimum GNU Make version to 4.4
|
||||
__rustc-option = $(call try-run,\
|
||||
$(1) $(2) $(3) --crate-type=rlib /dev/null --out-dir=$$TMPOUT -o "$$TMP",$(3),$(4))
|
||||
echo '#![allow(missing_docs)]#![feature(no_core)]#![no_core]' | RUSTC_BOOTSTRAP=1\
|
||||
$(1) --sysroot=/dev/null $(filter-out --sysroot=/dev/null,$(2)) $(3)\
|
||||
--crate-type=rlib --out-dir=$(TMPOUT) --emit=obj=- - >/dev/null,$(3),$(4))
|
||||
|
||||
# rustc-option
|
||||
# Usage: rustflags-y += $(call rustc-option,-Cinstrument-coverage,-Zinstrument-coverage)
|
||||
@ -85,5 +86,4 @@ rustc-option = $(call __rustc-option, $(RUSTC),\
|
||||
|
||||
# rustc-option-yn
|
||||
# Usage: flag := $(call rustc-option-yn,-Cinstrument-coverage)
|
||||
rustc-option-yn = $(call try-run,\
|
||||
$(RUSTC) $(KBUILD_RUSTFLAGS) $(1) --crate-type=rlib /dev/null --out-dir=$$TMPOUT -o "$$TMP",y,n)
|
||||
rustc-option-yn = $(if $(call rustc-option,$1),y,n)
|
||||
|
15
arch/etc/initcpio/hooks/bcachefs
Normal file
15
arch/etc/initcpio/hooks/bcachefs
Normal file
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/ash
|
||||
|
||||
run_hook() {
|
||||
local rootdev
|
||||
if rootdev="$(resolve_device "$root")" && bcachefs unlock -c "$rootdev" >/dev/null 2>&1
|
||||
then
|
||||
echo "Unlocking $rootdev:"
|
||||
while true
|
||||
do
|
||||
bcachefs unlock "$rootdev" && break
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
# vim: set ft=sh ts=4 sw=4 et:
|
16
arch/etc/initcpio/install/bcachefs
Normal file
16
arch/etc/initcpio/install/bcachefs
Normal file
@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
build() {
|
||||
add_module bcachefs
|
||||
add_binary bcachefs
|
||||
|
||||
add_runscript
|
||||
}
|
||||
|
||||
help() {
|
||||
cat <<HELPEOF
|
||||
This hook is for getting the bcachefs unlock prompt at boot
|
||||
HELPEOF
|
||||
}
|
||||
|
||||
# vim set ft=sh ts=4 sw=4 et:
|
66
arch/etc/mkinitcpio.conf
Normal file
66
arch/etc/mkinitcpio.conf
Normal file
@ -0,0 +1,66 @@
|
||||
# vim:set ft=sh
|
||||
# MODULES
|
||||
# The following modules are loaded before any boot hooks are
|
||||
# run. Advanced users may wish to specify all system modules
|
||||
# in this array. For instance:
|
||||
# MODULES=(piix ide_disk reiserfs)
|
||||
MODULES=(bcachefs)
|
||||
|
||||
# BINARIES
|
||||
# This setting includes any additional binaries a given user may
|
||||
# wish into the CPIO image. This is run last, so it may be used to
|
||||
# override the actual binaries included by a given hook
|
||||
# BINARIES are dependency parsed, so you may safely ignore libraries
|
||||
BINARIES=(bcachefs)
|
||||
|
||||
# FILES
|
||||
# This setting is similar to BINARIES above, however, files are added
|
||||
# as-is and are not parsed in any way. This is useful for config files.
|
||||
FILES=()
|
||||
|
||||
# HOOKS
|
||||
# This is the most important setting in this file. The HOOKS control the
|
||||
# modules and scripts added to the image, and what happens at boot time.
|
||||
# Order is important, and it is recommended that you do not change the
|
||||
# order in which HOOKS are added. Run 'mkinitcpio -H <hook name>' for
|
||||
# help on a given hook.
|
||||
# 'base' is _required_ unless you know precisely what you are doing.
|
||||
# 'udev' is _required_ in order to automatically load modules
|
||||
# 'filesystems' is _required_ unless you specify your fs modules in MODULES
|
||||
# Examples:
|
||||
## This setup specifies all modules in the MODULES setting above.
|
||||
## No raid, lvm2, or encrypted root is needed.
|
||||
# HOOKS="base"
|
||||
#
|
||||
## This setup will autodetect all modules for your system and should
|
||||
## work as a sane default
|
||||
# HOOKS="base udev autodetect block filesystems"
|
||||
#
|
||||
## This setup will generate a 'full' image which supports most systems.
|
||||
## No autodetection is done.
|
||||
# HOOKS="base udev block filesystems"
|
||||
#
|
||||
## This setup assembles a pata mdadm array with an encrypted root FS.
|
||||
## Note: See 'mkinitcpio -H mdadm' for more information on raid devices.
|
||||
# HOOKS="base udev block mdadm encrypt filesystems"
|
||||
#
|
||||
## This setup loads an lvm2 volume group on a usb device.
|
||||
# HOOKS="base udev block lvm2 filesystems"
|
||||
#
|
||||
## NOTE: If you have /usr on a separate partition, you MUST include the
|
||||
# usr, fsck and shutdown hooks.
|
||||
HOOKS=(base udev autodetect modconf block filesystems bcachefs keyboard fsck)
|
||||
|
||||
# COMPRESSION
|
||||
# Use this to compress the initramfs image. By default, gzip compression
|
||||
# is used. Use 'cat' to create an uncompressed image.
|
||||
#COMPRESSION="gzip"
|
||||
#COMPRESSION="bzip2"
|
||||
#COMPRESSION="lzma"
|
||||
#COMPRESSION="xz"
|
||||
#COMPRESSION="lzop"
|
||||
#COMPRESSION="lz4"
|
||||
|
||||
# COMPRESSION_OPTIONS
|
||||
# Additional options for the compressor
|
||||
#COMPRESSION_OPTIONS=""
|
@ -37,6 +37,8 @@ Check an existing filesystem for errors.
|
||||
.Bl -tag -width 18n -compact
|
||||
.It Ic fs usage
|
||||
Show disk usage
|
||||
.It Ic fs top
|
||||
Show runtime performance information
|
||||
.El
|
||||
.Ss Commands for managing devices within a running filesystem
|
||||
.Bl -tag -width 22n -compact
|
||||
@ -91,7 +93,7 @@ Add default superblock, after bcachefs migrate
|
||||
.El
|
||||
.Ss Commands for operating on files in a bcachefs filesystem
|
||||
.Bl -tag -width 18n -compact
|
||||
.It Ic setattr
|
||||
.It Ic set-file-option
|
||||
Set various per file attributes
|
||||
.El
|
||||
.Ss Commands for debugging
|
||||
@ -574,7 +576,7 @@ Offset of existing superblock
|
||||
.El
|
||||
.Sh Commands for operating on files in a bcachefs filesystem
|
||||
.Bl -tag -width Ds
|
||||
.It Nm Ic setattr Oo Ar options Oc Ar devices\ ...
|
||||
.It Nm Ic set-file-option Oo Ar options Oc Ar devices\ ...
|
||||
.Bl -tag -width Ds
|
||||
.It Fl -data_replicas Ns = Ns Ar number
|
||||
Number of data replicas
|
||||
|
@ -63,7 +63,6 @@ fn main() {
|
||||
.allowlist_function("printbuf.*")
|
||||
.blocklist_type("rhash_lock_head")
|
||||
.blocklist_type("srcu_struct")
|
||||
.blocklist_type("bch_ioctl_data.*")
|
||||
.allowlist_var("BCH_.*")
|
||||
.allowlist_var("KEY_SPEC_.*")
|
||||
.allowlist_var("Fix753_.*")
|
||||
|
@ -1,3 +1,8 @@
|
||||
#![allow(clippy::missing_safety_doc)]
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
#![allow(clippy::transmute_int_to_bool)]
|
||||
#![allow(clippy::unnecessary_cast)]
|
||||
#![allow(clippy::useless_transmute)]
|
||||
#![allow(non_upper_case_globals)]
|
||||
#![allow(non_camel_case_types)]
|
||||
#![allow(non_snake_case)]
|
||||
|
@ -50,6 +50,7 @@ pub enum BkeyValC<'a> {
|
||||
logged_op_truncate(&'a c::bch_logged_op_truncate),
|
||||
logged_op_finsert(&'a c::bch_logged_op_finsert),
|
||||
accounting(&'a c::bch_accounting),
|
||||
inode_alloc_cursor(&'a c::bch_inode_alloc_cursor),
|
||||
}
|
||||
|
||||
impl<'a, 'b> BkeySC<'a> {
|
||||
@ -64,7 +65,8 @@ impl<'a, 'b> BkeySC<'a> {
|
||||
BkeySCToText { k: self, fs }
|
||||
}
|
||||
|
||||
pub fn v(&'a self) -> BkeyValC {
|
||||
#[allow(clippy::missing_transmute_annotations)]
|
||||
pub fn v(&'a self) -> BkeyValC<'a> {
|
||||
unsafe {
|
||||
let ty: c::bch_bkey_type = transmute(self.k.type_ as u32);
|
||||
|
||||
@ -106,6 +108,7 @@ impl<'a, 'b> BkeySC<'a> {
|
||||
KEY_TYPE_logged_op_truncate => logged_op_truncate(transmute(self.v)),
|
||||
KEY_TYPE_logged_op_finsert => logged_op_finsert(transmute(self.v)),
|
||||
KEY_TYPE_accounting => accounting(transmute(self.v)),
|
||||
KEY_TYPE_inode_alloc_cursor => inode_alloc_cursor(transmute(self.v)),
|
||||
KEY_TYPE_MAX => unreachable!(),
|
||||
}
|
||||
}
|
||||
@ -127,7 +130,7 @@ pub struct BkeySCToText<'a, 'b> {
|
||||
fs: &'b Fs,
|
||||
}
|
||||
|
||||
impl<'a, 'b> fmt::Display for BkeySCToText<'a, 'b> {
|
||||
impl fmt::Display for BkeySCToText<'_, '_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
unsafe {
|
||||
printbuf_to_formatter(f, |buf| {
|
||||
|
@ -15,7 +15,7 @@ pub struct BtreeTrans<'f> {
|
||||
}
|
||||
|
||||
impl<'f> BtreeTrans<'f> {
|
||||
pub fn new(fs: &'f Fs) -> BtreeTrans {
|
||||
pub fn new(fs: &'f Fs) -> BtreeTrans<'f> {
|
||||
unsafe {
|
||||
BtreeTrans {
|
||||
raw: &mut *c::__bch2_trans_get(fs.raw, 0),
|
||||
@ -25,7 +25,7 @@ impl<'f> BtreeTrans<'f> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'f> Drop for BtreeTrans<'f> {
|
||||
impl Drop for BtreeTrans<'_> {
|
||||
fn drop(&mut self) {
|
||||
unsafe { c::bch2_trans_put(&mut *self.raw) }
|
||||
}
|
||||
@ -53,7 +53,7 @@ bitflags! {
|
||||
|
||||
pub struct BtreeIter<'t> {
|
||||
raw: c::btree_iter,
|
||||
trans: PhantomData<&'t BtreeTrans<'t>>,
|
||||
trans: &'t BtreeTrans<'t>,
|
||||
}
|
||||
|
||||
impl<'t> BtreeIter<'t> {
|
||||
@ -76,14 +76,14 @@ impl<'t> BtreeIter<'t> {
|
||||
|
||||
BtreeIter {
|
||||
raw: iter.assume_init(),
|
||||
trans: PhantomData,
|
||||
trans: trans,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn peek_upto<'i>(&'i mut self, end: c::bpos) -> Result<Option<BkeySC>, bch_errcode> {
|
||||
pub fn peek_max(&mut self, end: c::bpos) -> Result<Option<BkeySC<'_>>, bch_errcode> {
|
||||
unsafe {
|
||||
let k = c::bch2_btree_iter_peek_upto(&mut self.raw, end);
|
||||
let k = c::bch2_btree_iter_peek_max(self.trans.raw, &mut self.raw, end);
|
||||
errptr_to_result_c(k.k).map(|_| {
|
||||
if !k.k.is_null() {
|
||||
Some(BkeySC {
|
||||
@ -99,12 +99,12 @@ impl<'t> BtreeIter<'t> {
|
||||
}
|
||||
|
||||
pub fn peek(&mut self) -> Result<Option<BkeySC>, bch_errcode> {
|
||||
self.peek_upto(SPOS_MAX)
|
||||
self.peek_max(SPOS_MAX)
|
||||
}
|
||||
|
||||
pub fn peek_and_restart(&mut self) -> Result<Option<BkeySC>, bch_errcode> {
|
||||
unsafe {
|
||||
let k = c::bch2_btree_iter_peek_and_restart_outlined(&mut self.raw);
|
||||
let k = c::bch2_btree_iter_peek_and_restart_outlined(self.trans.raw, &mut self.raw);
|
||||
|
||||
errptr_to_result_c(k.k).map(|_| {
|
||||
if !k.k.is_null() {
|
||||
@ -122,20 +122,20 @@ impl<'t> BtreeIter<'t> {
|
||||
|
||||
pub fn advance(&mut self) {
|
||||
unsafe {
|
||||
c::bch2_btree_iter_advance(&mut self.raw);
|
||||
c::bch2_btree_iter_advance(self.trans.raw, &mut self.raw);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Drop for BtreeIter<'t> {
|
||||
impl Drop for BtreeIter<'_> {
|
||||
fn drop(&mut self) {
|
||||
unsafe { c::bch2_trans_iter_exit(self.raw.trans, &mut self.raw) }
|
||||
unsafe { c::bch2_trans_iter_exit(self.trans.raw, &mut self.raw) }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BtreeNodeIter<'t> {
|
||||
raw: c::btree_iter,
|
||||
trans: PhantomData<&'t BtreeTrans<'t>>,
|
||||
trans: &'t BtreeTrans<'t>,
|
||||
}
|
||||
|
||||
impl<'t> BtreeNodeIter<'t> {
|
||||
@ -146,7 +146,7 @@ impl<'t> BtreeNodeIter<'t> {
|
||||
locks_want: u32,
|
||||
depth: u32,
|
||||
flags: BtreeIterFlags,
|
||||
) -> BtreeNodeIter {
|
||||
) -> BtreeNodeIter<'t> {
|
||||
unsafe {
|
||||
let mut iter: MaybeUninit<c::btree_iter> = MaybeUninit::uninit();
|
||||
c::bch2_trans_node_iter_init(
|
||||
@ -161,52 +161,53 @@ impl<'t> BtreeNodeIter<'t> {
|
||||
|
||||
BtreeNodeIter {
|
||||
raw: iter.assume_init(),
|
||||
trans: PhantomData,
|
||||
trans: trans,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn peek<'i>(&'i mut self) -> Result<Option<&'i c::btree>, bch_errcode> {
|
||||
pub fn peek(&mut self) -> Result<Option<&c::btree>, bch_errcode> {
|
||||
unsafe {
|
||||
let b = c::bch2_btree_iter_peek_node(&mut self.raw);
|
||||
let b = c::bch2_btree_iter_peek_node(self.trans.raw, &mut self.raw);
|
||||
errptr_to_result_c(b).map(|b| if !b.is_null() { Some(&*b) } else { None })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn peek_and_restart<'i>(&'i mut self) -> Result<Option<&'i c::btree>, bch_errcode> {
|
||||
pub fn peek_and_restart(&mut self) -> Result<Option<&c::btree>, bch_errcode> {
|
||||
unsafe {
|
||||
let b = c::bch2_btree_iter_peek_node_and_restart(&mut self.raw);
|
||||
let b = c::bch2_btree_iter_peek_node_and_restart(self.trans.raw, &mut self.raw);
|
||||
errptr_to_result_c(b).map(|b| if !b.is_null() { Some(&*b) } else { None })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn advance<'i>(&'i mut self) {
|
||||
pub fn advance(&mut self) {
|
||||
unsafe {
|
||||
c::bch2_btree_iter_next_node(&mut self.raw);
|
||||
c::bch2_btree_iter_next_node(self.trans.raw, &mut self.raw);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next<'i>(&'i mut self) -> Result<Option<&'i c::btree>, bch_errcode> {
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn next(&mut self) -> Result<Option<&c::btree>, bch_errcode> {
|
||||
unsafe {
|
||||
let b = c::bch2_btree_iter_next_node(&mut self.raw);
|
||||
let b = c::bch2_btree_iter_next_node(self.trans.raw, &mut self.raw);
|
||||
errptr_to_result_c(b).map(|b| if !b.is_null() { Some(&*b) } else { None })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Drop for BtreeNodeIter<'t> {
|
||||
impl Drop for BtreeNodeIter<'_> {
|
||||
fn drop(&mut self) {
|
||||
unsafe { c::bch2_trans_iter_exit(self.raw.trans, &mut self.raw) }
|
||||
unsafe { c::bch2_trans_iter_exit(self.trans.raw, &mut self.raw) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'b, 'f> c::btree {
|
||||
pub fn to_text(&'b self, fs: &'f Fs) -> BtreeNodeToText<'b, 'f> {
|
||||
BtreeNodeToText { b: &self, fs }
|
||||
BtreeNodeToText { b: self, fs }
|
||||
}
|
||||
|
||||
pub fn ondisk_to_text(&'b self, fs: &'f Fs) -> BtreeNodeOndiskToText<'b, 'f> {
|
||||
BtreeNodeOndiskToText { b: &self, fs }
|
||||
BtreeNodeOndiskToText { b: self, fs }
|
||||
}
|
||||
}
|
||||
|
||||
@ -215,7 +216,7 @@ pub struct BtreeNodeToText<'b, 'f> {
|
||||
fs: &'f Fs,
|
||||
}
|
||||
|
||||
impl<'b, 'f> fmt::Display for BtreeNodeToText<'b, 'f> {
|
||||
impl fmt::Display for BtreeNodeToText<'_, '_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
printbuf_to_formatter(f, |buf| unsafe {
|
||||
c::bch2_btree_node_to_text(buf, self.fs.raw, self.b)
|
||||
@ -228,7 +229,7 @@ pub struct BtreeNodeOndiskToText<'b, 'f> {
|
||||
fs: &'f Fs,
|
||||
}
|
||||
|
||||
impl<'b, 'f> fmt::Display for BtreeNodeOndiskToText<'b, 'f> {
|
||||
impl fmt::Display for BtreeNodeOndiskToText<'_, '_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
printbuf_to_formatter(f, |buf| unsafe {
|
||||
c::bch2_btree_node_ondisk_to_text(buf, self.fs.raw, self.b)
|
||||
|
@ -9,7 +9,7 @@ pub struct Fs {
|
||||
}
|
||||
|
||||
impl Fs {
|
||||
pub fn open(devs: &Vec<PathBuf>, opts: c::bch_opts) -> Result<Fs, bch_errcode> {
|
||||
pub fn open(devs: &[PathBuf], opts: c::bch_opts) -> Result<Fs, bch_errcode> {
|
||||
let devs: Vec<_> = devs
|
||||
.iter()
|
||||
.map(|i| CString::new(i.as_os_str().as_bytes()).unwrap().into_raw())
|
||||
|
@ -109,15 +109,10 @@ impl FromStr for c::btree_id {
|
||||
let s = CString::new(s).unwrap();
|
||||
let p = s.as_ptr();
|
||||
|
||||
let v = unsafe {
|
||||
c::match_string(
|
||||
c::__bch2_btree_ids[..].as_ptr(),
|
||||
(-(1 as isize)) as usize,
|
||||
p,
|
||||
)
|
||||
};
|
||||
let v =
|
||||
unsafe { c::match_string(c::__bch2_btree_ids[..].as_ptr(), (-1_isize) as usize, p) };
|
||||
if v >= 0 {
|
||||
Ok(unsafe { std::mem::transmute(v) })
|
||||
Ok(unsafe { std::mem::transmute::<i32, bcachefs::btree_id>(v) })
|
||||
} else {
|
||||
Err(BchToolsErr::InvalidBtreeId)
|
||||
}
|
||||
@ -131,11 +126,9 @@ impl FromStr for c::bch_bkey_type {
|
||||
let s = CString::new(s).unwrap();
|
||||
let p = s.as_ptr();
|
||||
|
||||
let v = unsafe {
|
||||
c::match_string(c::bch2_bkey_types[..].as_ptr(), (-(1 as isize)) as usize, p)
|
||||
};
|
||||
let v = unsafe { c::match_string(c::bch2_bkey_types[..].as_ptr(), (-1_isize) as usize, p) };
|
||||
if v >= 0 {
|
||||
Ok(unsafe { std::mem::transmute(v) })
|
||||
Ok(unsafe { std::mem::transmute::<i32, bcachefs::bch_bkey_type>(v) })
|
||||
} else {
|
||||
Err(BchToolsErr::InvalidBkeyType)
|
||||
}
|
||||
@ -192,7 +185,7 @@ impl FromStr for c::bpos {
|
||||
|
||||
let ino: u64 = ino_str.parse().map_err(|_| BchToolsErr::InvalidBpos)?;
|
||||
let off: u64 = off_str.parse().map_err(|_| BchToolsErr::InvalidBpos)?;
|
||||
let snp: u32 = snp_str.map(|s| s.parse().ok()).flatten().unwrap_or(0);
|
||||
let snp: u32 = snp_str.and_then(|s| s.parse().ok()).unwrap_or(0);
|
||||
|
||||
Ok(c::bpos {
|
||||
inode: ino,
|
||||
|
@ -1,3 +1,7 @@
|
||||
use crate::c;
|
||||
use crate::fs::Fs;
|
||||
use std::ffi::{CString, c_char};
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! opt_set {
|
||||
($opts:ident, $n:ident, $v:expr) => {
|
||||
@ -33,3 +37,29 @@ macro_rules! opt_get {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn parse_mount_opts(fs: Option<&mut Fs>, optstr: Option<&str>, ignore_unknown: bool)
|
||||
-> Result<c::bch_opts, c::bch_errcode> {
|
||||
let mut opts: c::bch_opts = Default::default();
|
||||
|
||||
if let Some(optstr) = optstr {
|
||||
let optstr = CString::new(optstr).unwrap();
|
||||
let optstr_ptr = optstr.as_ptr();
|
||||
|
||||
let ret = unsafe {
|
||||
c::bch2_parse_mount_opts(fs.map_or(std::ptr::null_mut(), |f| f.raw),
|
||||
&mut opts as *mut c::bch_opts,
|
||||
std::ptr::null_mut(),
|
||||
optstr_ptr as *mut c_char,
|
||||
ignore_unknown)
|
||||
};
|
||||
|
||||
drop(optstr);
|
||||
|
||||
if ret != 0 {
|
||||
let err: c::bch_errcode = unsafe { std::mem::transmute(-ret) };
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
Ok(opts)
|
||||
}
|
||||
|
@ -33,6 +33,7 @@ void bcachefs_usage(void)
|
||||
"Superblock commands:\n"
|
||||
" format Format a new filesystem\n"
|
||||
" show-super Dump superblock information to stdout\n"
|
||||
" recover-super Attempt to recover overwritten superblock from backups\n"
|
||||
" set-fs-option Set a filesystem option\n"
|
||||
" reset-counters Reset all counters on an unmounted device\n"
|
||||
"\n"
|
||||
@ -52,6 +53,7 @@ void bcachefs_usage(void)
|
||||
#endif
|
||||
"Commands for managing a running filesystem:\n"
|
||||
" fs usage Show disk usage\n"
|
||||
" fs top Show runtime performance information\n"
|
||||
"\n"
|
||||
"Commands for managing devices within a running filesystem:\n"
|
||||
" device add Add a new device to an existing filesystem\n"
|
||||
@ -70,6 +72,7 @@ void bcachefs_usage(void)
|
||||
"\n"
|
||||
"Commands for managing filesystem data:\n"
|
||||
" data rereplicate Rereplicate degraded data\n"
|
||||
" data scrub Verify checksums and correct errors, if possible\n"
|
||||
" data job Kick off low level data jobs\n"
|
||||
"\n"
|
||||
"Encryption:\n"
|
||||
@ -90,9 +93,11 @@ void bcachefs_usage(void)
|
||||
" list List filesystem metadata in textual form\n"
|
||||
" list_journal List contents of journal\n"
|
||||
"\n"
|
||||
#ifdef BCACHEFS_FUSE
|
||||
"FUSE:\n"
|
||||
" fusemount Mount a filesystem via FUSE\n"
|
||||
"\n"
|
||||
#endif
|
||||
"Miscellaneous:\n"
|
||||
" completions Generate shell completions\n"
|
||||
" version Display the version of the invoked bcachefs tool\n");
|
||||
@ -113,14 +118,15 @@ int fs_cmds(int argc, char *argv[])
|
||||
{
|
||||
char *cmd = pop_cmd(&argc, argv);
|
||||
|
||||
if (argc < 1) {
|
||||
bcachefs_usage();
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (argc < 1)
|
||||
return fs_usage();
|
||||
if (!strcmp(cmd, "usage"))
|
||||
return cmd_fs_usage(argc, argv);
|
||||
if (!strcmp(cmd, "top"))
|
||||
return cmd_fs_top(argc, argv);
|
||||
|
||||
return 0;
|
||||
fs_usage();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int device_cmds(int argc, char *argv[])
|
||||
@ -146,7 +152,8 @@ int device_cmds(int argc, char *argv[])
|
||||
if (!strcmp(cmd, "resize-journal"))
|
||||
return cmd_device_resize_journal(argc, argv);
|
||||
|
||||
return 0;
|
||||
device_usage();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int data_cmds(int argc, char *argv[])
|
||||
@ -157,8 +164,11 @@ int data_cmds(int argc, char *argv[])
|
||||
return data_usage();
|
||||
if (!strcmp(cmd, "rereplicate"))
|
||||
return cmd_data_rereplicate(argc, argv);
|
||||
if (!strcmp(cmd, "scrub"))
|
||||
return cmd_data_scrub(argc, argv);
|
||||
if (!strcmp(cmd, "job"))
|
||||
return cmd_data_job(argc, argv);
|
||||
|
||||
return 0;
|
||||
data_usage();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
203
c_src/cmd_data.c
203
c_src/cmd_data.c
@ -1,5 +1,5 @@
|
||||
|
||||
|
||||
#include <getopt.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
@ -64,6 +64,207 @@ int cmd_data_rereplicate(int argc, char *argv[])
|
||||
});
|
||||
}
|
||||
|
||||
static void data_scrub_usage(void)
|
||||
{
|
||||
puts("bcachefs data scrub\n"
|
||||
"Usage: bcachefs data scrub [filesystem|device]\n"
|
||||
"\n"
|
||||
"Check data for errors, fix from another replica if possible\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -m, --metadata check metadata only\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_data_scrub(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "metadata", no_argument, NULL, 'm' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ NULL }
|
||||
};
|
||||
struct bch_ioctl_data cmd = {
|
||||
.op = BCH_DATA_OP_scrub,
|
||||
.scrub.data_types = ~0,
|
||||
};
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "hm", longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'm':
|
||||
cmd.scrub.data_types = BIT(BCH_DATA_btree);
|
||||
break;
|
||||
case 'h':
|
||||
data_scrub_usage();
|
||||
break;
|
||||
}
|
||||
args_shift(optind);
|
||||
|
||||
char *path = arg_pop();
|
||||
if (!path)
|
||||
die("Please supply a filesystem");
|
||||
|
||||
if (argc)
|
||||
die("too many arguments");
|
||||
|
||||
printf("Starting scrub on");
|
||||
|
||||
struct bchfs_handle fs = bcache_fs_open(path);
|
||||
dev_names dev_names = bchu_fs_get_devices(fs);
|
||||
|
||||
struct scrub_device {
|
||||
const char *name;
|
||||
int progress_fd;
|
||||
u64 done, corrected, uncorrected, total;
|
||||
enum bch_ioctl_data_event_ret ret;
|
||||
};
|
||||
DARRAY(struct scrub_device) scrub_devs = {};
|
||||
|
||||
if (fs.dev_idx >= 0) {
|
||||
cmd.scrub.dev = fs.dev_idx;
|
||||
struct scrub_device d = {
|
||||
.name = dev_idx_to_name(&dev_names, fs.dev_idx)->dev,
|
||||
.progress_fd = xioctl(fs.ioctl_fd, BCH_IOCTL_DATA, &cmd),
|
||||
};
|
||||
darray_push(&scrub_devs, d);
|
||||
} else {
|
||||
/* Scrubbing every device */
|
||||
darray_for_each(dev_names, dev) {
|
||||
cmd.scrub.dev = dev->idx;
|
||||
struct scrub_device d = {
|
||||
.name = dev->dev,
|
||||
.progress_fd = xioctl(fs.ioctl_fd, BCH_IOCTL_DATA, &cmd),
|
||||
};
|
||||
darray_push(&scrub_devs, d);
|
||||
}
|
||||
}
|
||||
|
||||
printf(" %zu devices: ", scrub_devs.nr);
|
||||
darray_for_each(scrub_devs, dev)
|
||||
printf(" %s", dev->name);
|
||||
printf("\n");
|
||||
|
||||
struct timespec now, last;
|
||||
bool first = true;
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
printbuf_tabstop_push(&buf, 16);
|
||||
printbuf_tabstop_push(&buf, 12);
|
||||
printbuf_tabstop_push(&buf, 12);
|
||||
printbuf_tabstop_push(&buf, 12);
|
||||
printbuf_tabstop_push(&buf, 12);
|
||||
printbuf_tabstop_push(&buf, 6);
|
||||
|
||||
prt_printf(&buf, "device\t");
|
||||
prt_printf(&buf, "checked\r");
|
||||
prt_printf(&buf, "corrected\r");
|
||||
prt_printf(&buf, "uncorrected\r");
|
||||
prt_printf(&buf, "total\r");
|
||||
puts(buf.buf);
|
||||
|
||||
while (1) {
|
||||
bool done = true;
|
||||
|
||||
printbuf_reset_keep_tabstops(&buf);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
u64 ns_since_last = 0;
|
||||
if (!first)
|
||||
ns_since_last = (now.tv_sec - last.tv_sec) * NSEC_PER_SEC +
|
||||
now.tv_nsec - last.tv_nsec;
|
||||
|
||||
darray_for_each(scrub_devs, dev) {
|
||||
struct bch_ioctl_data_event e;
|
||||
|
||||
if (dev->progress_fd >= 0 &&
|
||||
read(dev->progress_fd, &e, sizeof(e)) != sizeof(e)) {
|
||||
close(dev->progress_fd);
|
||||
dev->progress_fd = -1;
|
||||
}
|
||||
|
||||
u64 rate = 0;
|
||||
|
||||
if (dev->progress_fd >= 0) {
|
||||
if (ns_since_last)
|
||||
rate = ((e.p.sectors_done - dev->done) << 9)
|
||||
* NSEC_PER_SEC
|
||||
/ ns_since_last;
|
||||
|
||||
dev->done = e.p.sectors_done;
|
||||
dev->corrected = e.p.sectors_error_corrected;
|
||||
dev->uncorrected= e.p.sectors_error_uncorrected;
|
||||
dev->total = e.p.sectors_total;
|
||||
}
|
||||
|
||||
if (dev->progress_fd >= 0 && e.ret) {
|
||||
close(dev->progress_fd);
|
||||
dev->progress_fd = -1;
|
||||
dev->ret = e.ret;
|
||||
}
|
||||
|
||||
if (dev->progress_fd >= 0)
|
||||
done = false;
|
||||
|
||||
prt_printf(&buf, "%s\t", dev->name ?: "(offline)");
|
||||
|
||||
prt_human_readable_u64(&buf, dev->done << 9);
|
||||
prt_tab_rjust(&buf);
|
||||
|
||||
prt_human_readable_u64(&buf, dev->corrected << 9);
|
||||
prt_tab_rjust(&buf);
|
||||
|
||||
prt_human_readable_u64(&buf, dev->uncorrected << 9);
|
||||
prt_tab_rjust(&buf);
|
||||
|
||||
prt_human_readable_u64(&buf, dev->total << 9);
|
||||
prt_tab_rjust(&buf);
|
||||
|
||||
prt_printf(&buf, "%llu%%",
|
||||
dev->total
|
||||
? dev->done * 100 / dev->total
|
||||
: 0);
|
||||
prt_tab_rjust(&buf);
|
||||
|
||||
prt_str(&buf, " ");
|
||||
|
||||
if (dev->progress_fd >= 0) {
|
||||
prt_human_readable_u64(&buf, rate);
|
||||
prt_str(&buf, "/sec");
|
||||
} else if (dev->ret == BCH_IOCTL_DATA_EVENT_RET_device_offline) {
|
||||
prt_str(&buf, "offline");
|
||||
} else {
|
||||
prt_str(&buf, "complete");
|
||||
}
|
||||
|
||||
if (dev != &darray_last(scrub_devs))
|
||||
prt_newline(&buf);
|
||||
}
|
||||
|
||||
fputs(buf.buf, stdout);
|
||||
fflush(stdout);
|
||||
|
||||
if (done)
|
||||
break;
|
||||
|
||||
last = now;
|
||||
first = false;
|
||||
sleep(1);
|
||||
|
||||
for (unsigned i = 0; i < scrub_devs.nr; i++) {
|
||||
if (i)
|
||||
printf("\033[1A");
|
||||
printf("\33[2K\r");
|
||||
}
|
||||
}
|
||||
|
||||
fputs("\n", stdout);
|
||||
printbuf_exit(&buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void data_job_usage(void)
|
||||
{
|
||||
puts("bcachefs data job\n"
|
||||
|
@ -47,11 +47,11 @@ static void device_add_usage(void)
|
||||
puts("bcachefs device add - add a device to an existing filesystem\n"
|
||||
"Usage: bcachefs device add [OPTION]... filesystem device\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -S, --fs_size=size Size of filesystem on device\n"
|
||||
" -B, --bucket=size Bucket size\n"
|
||||
" -D, --discard Enable discards\n"
|
||||
" -l, --label=label Disk label\n"
|
||||
"Options:\n");
|
||||
|
||||
bch2_opts_usage(OPT_FORMAT|OPT_DEVICE);
|
||||
|
||||
puts(" -l, --label=label Disk label\n"
|
||||
" -f, --force Use device even if it appears to already be formatted\n"
|
||||
" -h, --help Display this help and exit\n"
|
||||
"\n"
|
||||
@ -61,9 +61,6 @@ static void device_add_usage(void)
|
||||
int cmd_device_add(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "fs_size", required_argument, NULL, 'S' },
|
||||
{ "bucket", required_argument, NULL, 'B' },
|
||||
{ "discard", no_argument, NULL, 'D' },
|
||||
{ "label", required_argument, NULL, 'l' },
|
||||
{ "force", no_argument, NULL, 'f' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
@ -72,22 +69,31 @@ int cmd_device_add(int argc, char *argv[])
|
||||
struct format_opts format_opts = format_opts_default();
|
||||
struct dev_opts dev_opts = dev_opts_default();
|
||||
bool force = false;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "S:B:Dl:fh",
|
||||
longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'S':
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.size))
|
||||
die("invalid filesystem size");
|
||||
break;
|
||||
case 'B':
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.bucket_size))
|
||||
die("bad bucket_size %s", optarg);
|
||||
break;
|
||||
case 'D':
|
||||
dev_opts.discard = true;
|
||||
while (true) {
|
||||
const struct bch_option *opt =
|
||||
bch2_cmdline_opt_parse(argc, argv, OPT_FORMAT|OPT_DEVICE);
|
||||
if (opt) {
|
||||
unsigned id = opt - bch2_opt_table;
|
||||
u64 v;
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_opt_parse(NULL, opt, optarg, &v, &err);
|
||||
if (ret)
|
||||
die("invalid %s: %s", opt->attr.name, err.buf);
|
||||
|
||||
if (opt->flags & OPT_DEVICE)
|
||||
bch2_opt_set_by_id(&dev_opts.opts, id, v);
|
||||
else
|
||||
die("got bch_opt of wrong type %s", opt->attr.name);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
int optid = getopt_long(argc, argv, "S:B:Dl:fh", longopts, NULL);
|
||||
if (optid == -1)
|
||||
break;
|
||||
|
||||
switch (optid) {
|
||||
case 'l':
|
||||
dev_opts.label = strdup(optarg);
|
||||
break;
|
||||
@ -97,6 +103,10 @@ int cmd_device_add(int argc, char *argv[])
|
||||
case 'h':
|
||||
device_add_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
case '?':
|
||||
exit(EXIT_FAILURE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
args_shift(optind);
|
||||
|
||||
@ -127,10 +137,11 @@ int cmd_device_add(int argc, char *argv[])
|
||||
opt_set(fs_opts, btree_node_size,
|
||||
read_file_u64(fs.sysfs_fd, "options/btree_node_size"));
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs,
|
||||
fs_opts,
|
||||
format_opts,
|
||||
&dev_opts, 1);
|
||||
dev_opts_list devs = {};
|
||||
darray_push(&devs, dev_opts);
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs);
|
||||
darray_exit(&devs);
|
||||
free(sb);
|
||||
bchu_disk_add(fs, dev_opts.path);
|
||||
return 0;
|
||||
@ -533,7 +544,7 @@ int cmd_device_resize(int argc, char *argv[])
|
||||
if (resize)
|
||||
die("confused: more than one online device?");
|
||||
resize = ca;
|
||||
percpu_ref_get(&resize->io_ref);
|
||||
percpu_ref_get(&resize->io_ref[READ]);
|
||||
}
|
||||
|
||||
u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
|
||||
@ -546,7 +557,7 @@ int cmd_device_resize(int argc, char *argv[])
|
||||
if (ret)
|
||||
fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
|
||||
|
||||
percpu_ref_put(&resize->io_ref);
|
||||
percpu_ref_put(&resize->io_ref[READ]);
|
||||
bch2_fs_stop(c);
|
||||
}
|
||||
return 0;
|
||||
@ -630,7 +641,7 @@ int cmd_device_resize_journal(int argc, char *argv[])
|
||||
if (resize)
|
||||
die("confused: more than one online device?");
|
||||
resize = ca;
|
||||
percpu_ref_get(&resize->io_ref);
|
||||
percpu_ref_get(&resize->io_ref[READ]);
|
||||
}
|
||||
|
||||
u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
|
||||
@ -640,7 +651,7 @@ int cmd_device_resize_journal(int argc, char *argv[])
|
||||
if (ret)
|
||||
fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
|
||||
|
||||
percpu_ref_put(&resize->io_ref);
|
||||
percpu_ref_put(&resize->io_ref[READ]);
|
||||
bch2_fs_stop(c);
|
||||
}
|
||||
return 0;
|
||||
|
@ -116,8 +116,7 @@ int cmd_dump(int argc, char *argv[])
|
||||
opt_set(opts, read_only, true);
|
||||
opt_set(opts, nochanges, true);
|
||||
opt_set(opts, norecovery, true);
|
||||
opt_set(opts, degraded, true);
|
||||
opt_set(opts, very_degraded, true);
|
||||
opt_set(opts, degraded, BCH_DEGRADED_very);
|
||||
opt_set(opts, errors, BCH_ON_ERROR_continue);
|
||||
opt_set(opts, fix_errors, FSCK_FIX_no);
|
||||
|
||||
|
@ -39,11 +39,7 @@ x('L', fs_label, required_argument) \
|
||||
x('U', uuid, required_argument) \
|
||||
x(0, fs_size, required_argument) \
|
||||
x(0, superblock_size, required_argument) \
|
||||
x(0, bucket_size, required_argument) \
|
||||
x('l', label, required_argument) \
|
||||
x(0, discard, no_argument) \
|
||||
x(0, data_allowed, required_argument) \
|
||||
x(0, durability, required_argument) \
|
||||
x(0, version, required_argument) \
|
||||
x(0, no_initialize, no_argument) \
|
||||
x(0, source, required_argument) \
|
||||
@ -52,17 +48,16 @@ x('q', quiet, no_argument) \
|
||||
x('v', verbose, no_argument) \
|
||||
x('h', help, no_argument)
|
||||
|
||||
static void usage(void)
|
||||
static void format_usage(void)
|
||||
{
|
||||
puts("bcachefs format - create a new bcachefs filesystem on one or more devices\n"
|
||||
"Usage: bcachefs format [OPTION]... <devices>\n"
|
||||
"\n"
|
||||
"Options:");
|
||||
|
||||
bch2_opts_usage(OPT_FORMAT);
|
||||
bch2_opts_usage(OPT_FORMAT|OPT_FS);
|
||||
|
||||
puts(
|
||||
" --replicas=# Sets both data and metadata replicas\n"
|
||||
puts(" --replicas=# Sets both data and metadata replicas\n"
|
||||
" --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
|
||||
" --no_passphrase Don't encrypt master encryption key\n"
|
||||
" -L, --fs_label=label\n"
|
||||
@ -72,9 +67,10 @@ static void usage(void)
|
||||
"\n"
|
||||
"Device specific options:");
|
||||
|
||||
bch2_opts_usage(OPT_DEVICE);
|
||||
bch2_opts_usage(OPT_FORMAT|OPT_DEVICE);
|
||||
|
||||
puts(" -l, --label=label Disk label\n"
|
||||
puts(" --fs_size=size Size of filesystem on device\n"
|
||||
" -l, --label=label Disk label\n"
|
||||
"\n"
|
||||
" -f, --force\n"
|
||||
" -q, --quiet Only print errors\n"
|
||||
@ -125,32 +121,60 @@ void build_fs(struct bch_fs *c, const char *src_path)
|
||||
if (!S_ISDIR(stat.st_mode))
|
||||
die("%s is not a directory", src_path);
|
||||
|
||||
copy_fs(c, src_fd, src_path, &s);
|
||||
copy_fs(c, src_fd, src_path, &s, 0);
|
||||
}
|
||||
|
||||
int cmd_format(int argc, char *argv[])
|
||||
{
|
||||
DARRAY(struct dev_opts) devices = { 0 };
|
||||
DARRAY(char *) device_paths = { 0 };
|
||||
dev_opts_list devices = {};
|
||||
darray_str device_paths = {};
|
||||
struct format_opts opts = format_opts_default();
|
||||
struct dev_opts dev_opts = dev_opts_default();
|
||||
bool force = false, no_passphrase = false, quiet = false, initialize = true, verbose = false;
|
||||
bool unconsumed_dev_option = false;
|
||||
unsigned v;
|
||||
int opt;
|
||||
|
||||
struct bch_opt_strs fs_opt_strs =
|
||||
bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
|
||||
struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
|
||||
struct bch_opt_strs fs_opt_strs = {};
|
||||
struct bch_opts fs_opts = bch2_opts_empty();
|
||||
|
||||
if (getenv("BCACHEFS_KERNEL_ONLY"))
|
||||
initialize = false;
|
||||
|
||||
while ((opt = getopt_long(argc, argv,
|
||||
"-L:U:g:fqhv",
|
||||
while (true) {
|
||||
const struct bch_option *opt =
|
||||
bch2_cmdline_opt_parse(argc, argv, OPT_FORMAT|OPT_FS|OPT_DEVICE);
|
||||
if (opt) {
|
||||
unsigned id = opt - bch2_opt_table;
|
||||
u64 v;
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_opt_parse(NULL, opt, optarg, &v, &err);
|
||||
if (ret == -BCH_ERR_option_needs_open_fs) {
|
||||
fs_opt_strs.by_id[id] = strdup(optarg);
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
die("invalid option: %s", err.buf);
|
||||
|
||||
if (opt->flags & OPT_DEVICE) {
|
||||
bch2_opt_set_by_id(&dev_opts.opts, id, v);
|
||||
unconsumed_dev_option = true;
|
||||
} else if (opt->flags & OPT_FS) {
|
||||
bch2_opt_set_by_id(&fs_opts, id, v);
|
||||
} else {
|
||||
die("got bch_opt of wrong type %s", opt->attr.name);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
int optid = getopt_long(argc, argv,
|
||||
"-L:l:U:g:fqhv",
|
||||
format_opts,
|
||||
NULL)) != -1)
|
||||
switch (opt) {
|
||||
NULL);
|
||||
if (optid == -1)
|
||||
break;
|
||||
|
||||
switch (optid) {
|
||||
case O_replicas:
|
||||
if (kstrtouint(optarg, 10, &v) ||
|
||||
!v ||
|
||||
@ -183,7 +207,7 @@ int cmd_format(int argc, char *argv[])
|
||||
force = true;
|
||||
break;
|
||||
case O_fs_size:
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.size))
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.fs_size))
|
||||
die("invalid filesystem size");
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
@ -193,35 +217,13 @@ int cmd_format(int argc, char *argv[])
|
||||
|
||||
opts.superblock_size >>= 9;
|
||||
break;
|
||||
case O_bucket_size:
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.bucket_size))
|
||||
die("bad bucket_size %s", optarg);
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_label:
|
||||
case 'l':
|
||||
dev_opts.label = optarg;
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_discard:
|
||||
dev_opts.discard = true;
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_data_allowed:
|
||||
dev_opts.data_allowed =
|
||||
read_flag_list_or_die(optarg,
|
||||
__bch2_data_types, "data type");
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_durability:
|
||||
if (kstrtouint(optarg, 10, &dev_opts.durability) ||
|
||||
dev_opts.durability > BCH_REPLICAS_MAX)
|
||||
die("invalid durability");
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_version:
|
||||
if (kstrtouint(optarg, 10, &opts.version))
|
||||
die("invalid version");
|
||||
opts.version = version_parse(optarg);
|
||||
break;
|
||||
case O_no_initialize:
|
||||
initialize = false;
|
||||
@ -230,7 +232,7 @@ int cmd_format(int argc, char *argv[])
|
||||
darray_push(&device_paths, optarg);
|
||||
dev_opts.path = optarg;
|
||||
darray_push(&devices, dev_opts);
|
||||
dev_opts.size = 0;
|
||||
dev_opts.fs_size = 0;
|
||||
unconsumed_dev_option = false;
|
||||
break;
|
||||
case O_quiet:
|
||||
@ -239,14 +241,18 @@ int cmd_format(int argc, char *argv[])
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
case O_help:
|
||||
case 'h':
|
||||
usage();
|
||||
format_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
break;
|
||||
case '?':
|
||||
exit(EXIT_FAILURE);
|
||||
break;
|
||||
default:
|
||||
die("getopt ret %i %c", optid, optid);
|
||||
}
|
||||
}
|
||||
|
||||
if (unconsumed_dev_option)
|
||||
@ -269,11 +275,7 @@ int cmd_format(int argc, char *argv[])
|
||||
die("Error opening %s: %s", dev_opts.path, strerror(-ret));
|
||||
}
|
||||
|
||||
struct bch_sb *sb =
|
||||
bch2_format(fs_opt_strs,
|
||||
fs_opts,
|
||||
opts,
|
||||
devices.data, devices.nr);
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, opts, devices);
|
||||
bch2_opt_strs_free(&fs_opt_strs);
|
||||
|
||||
if (!quiet) {
|
||||
@ -433,3 +435,204 @@ int cmd_show_super(int argc, char *argv[])
|
||||
printbuf_exit(&buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "libbcachefs/super-io.h"
|
||||
#include "libbcachefs/sb-members.h"
|
||||
|
||||
typedef DARRAY(struct bch_sb *) probed_sb_list;
|
||||
|
||||
static void probe_one_super(int dev_fd, unsigned sb_size, u64 offset,
|
||||
probed_sb_list *sbs, bool verbose)
|
||||
{
|
||||
darray_char sb_buf = {};
|
||||
darray_resize(&sb_buf, sb_size);
|
||||
|
||||
xpread(dev_fd, sb_buf.data, sb_buf.size, offset);
|
||||
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_sb_validate((void *) sb_buf.data, offset >> 9, 0, &err);
|
||||
printbuf_exit(&err);
|
||||
|
||||
if (!ret) {
|
||||
if (verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_human_readable_u64(&buf, offset);
|
||||
printf("found superblock at %s\n", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
darray_push(sbs, (void *) sb_buf.data);
|
||||
sb_buf.data = NULL;
|
||||
}
|
||||
|
||||
darray_exit(&sb_buf);
|
||||
}
|
||||
|
||||
static void probe_sb_range(int dev_fd, u64 start_offset, u64 end_offset,
|
||||
probed_sb_list *sbs, bool verbose)
|
||||
{
|
||||
start_offset &= ~((u64) 511);
|
||||
end_offset &= ~((u64) 511);
|
||||
|
||||
size_t buflen = end_offset - start_offset;
|
||||
void *buf = malloc(buflen);
|
||||
xpread(dev_fd, buf, buflen, start_offset);
|
||||
|
||||
for (u64 offset = 0; offset < buflen; offset += 512) {
|
||||
struct bch_sb *sb = buf + offset;
|
||||
|
||||
if (!uuid_equal(&sb->magic, &BCACHE_MAGIC) &&
|
||||
!uuid_equal(&sb->magic, &BCHFS_MAGIC))
|
||||
continue;
|
||||
|
||||
size_t bytes = vstruct_bytes(sb);
|
||||
if (offset + bytes > buflen) {
|
||||
fprintf(stderr, "found sb %llu size %zu that overran buffer\n",
|
||||
start_offset + offset, bytes);
|
||||
continue;
|
||||
}
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_sb_validate(sb, (start_offset + offset) >> 9, 0, &err);
|
||||
if (ret)
|
||||
fprintf(stderr, "found sb %llu that failed to validate: %s\n",
|
||||
start_offset + offset, err.buf);
|
||||
printbuf_exit(&err);
|
||||
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
if (verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_human_readable_u64(&buf, start_offset + offset);
|
||||
printf("found superblock at %s\n", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
void *sb_copy = malloc(bytes);
|
||||
memcpy(sb_copy, sb, bytes);
|
||||
darray_push(sbs, sb_copy);
|
||||
}
|
||||
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static u64 bch2_sb_last_mount_time(struct bch_sb *sb)
|
||||
{
|
||||
u64 ret = 0;
|
||||
for (unsigned i = 0; i < sb->nr_devices; i++)
|
||||
ret = max(ret, le64_to_cpu(bch2_sb_member_get(sb, i).last_mount));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_sb_time_cmp(struct bch_sb *l, struct bch_sb *r)
|
||||
{
|
||||
return cmp_int(bch2_sb_last_mount_time(l),
|
||||
bch2_sb_last_mount_time(r));
|
||||
}
|
||||
|
||||
static void recover_super_usage(void)
|
||||
{
|
||||
puts("bcachefs recover-super \n"
|
||||
"Usage: bcachefs recover-super [OPTION].. device\n"
|
||||
"\n"
|
||||
"Attempt to recover a filesystem on a device that has had the main superblock\n"
|
||||
"and superblock layout overwritten.\n"
|
||||
"All options will be guessed if not provided\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -d, --dev_size size of filessytem on device, in bytes \n"
|
||||
" -o, --offset offset to probe, in bytes\n"
|
||||
" -y, --yes Recover without prompting\n"
|
||||
" -v, --verbose Increase logging level\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_recover_super(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "dev_size", 1, NULL, 'd' },
|
||||
{ "offset", 1, NULL, 'o' },
|
||||
{ "yes", 0, NULL, 'y' },
|
||||
{ "verbose", 0, NULL, 'v' },
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ NULL }
|
||||
};
|
||||
u64 dev_size = 0, offset = 0;
|
||||
bool yes = false, verbose = false;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "d:o:yvh", longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'd':
|
||||
if (bch2_strtoull_h(optarg, &dev_size))
|
||||
die("invalid offset");
|
||||
break;
|
||||
case 'o':
|
||||
if (bch2_strtoull_h(optarg, &offset))
|
||||
die("invalid offset");
|
||||
|
||||
if (offset & 511)
|
||||
die("offset must be a multiple of 512");
|
||||
break;
|
||||
case 'y':
|
||||
yes = true;
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
case 'h':
|
||||
recover_super_usage();
|
||||
break;
|
||||
}
|
||||
args_shift(optind);
|
||||
|
||||
char *dev_path = arg_pop();
|
||||
if (!dev_path)
|
||||
die("please supply a device");
|
||||
if (argc)
|
||||
die("too many arguments");
|
||||
|
||||
int dev_fd = xopen(dev_path, O_RDWR);
|
||||
|
||||
if (!dev_size)
|
||||
dev_size = get_size(dev_fd);
|
||||
|
||||
probed_sb_list sbs = {};
|
||||
|
||||
if (offset) {
|
||||
probe_one_super(dev_fd, SUPERBLOCK_SIZE_DEFAULT, offset, &sbs, verbose);
|
||||
} else {
|
||||
unsigned scan_len = 16 << 20; /* 16MB, start and end of device */
|
||||
|
||||
probe_sb_range(dev_fd, 4096, scan_len, &sbs, verbose);
|
||||
probe_sb_range(dev_fd, dev_size - scan_len, dev_size, &sbs, verbose);
|
||||
}
|
||||
|
||||
if (!sbs.nr) {
|
||||
printf("Found no bcachefs superblocks\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
struct bch_sb *best = NULL;
|
||||
darray_for_each(sbs, sb)
|
||||
if (!best || bch2_sb_time_cmp(best, *sb) < 0)
|
||||
best = *sb;
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_sb_to_text(&buf, best, true, BIT_ULL(BCH_SB_FIELD_members_v2));
|
||||
|
||||
printf("Found superblock:\n%s", buf.buf);
|
||||
printf("Recover?");
|
||||
|
||||
if (yes || ask_yn())
|
||||
bch2_super_write(dev_fd, best);
|
||||
|
||||
printbuf_exit(&buf);
|
||||
darray_for_each(sbs, sb)
|
||||
kfree(*sb);
|
||||
darray_exit(&sbs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -122,14 +122,6 @@ static int dev_by_label_cmp(const void *_l, const void *_r)
|
||||
cmp_int(l->idx, r->idx);
|
||||
}
|
||||
|
||||
static struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx)
|
||||
{
|
||||
darray_for_each(*dev_names, dev)
|
||||
if (dev->idx == idx)
|
||||
return dev;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void devs_usage_to_text(struct printbuf *out,
|
||||
struct bchfs_handle fs,
|
||||
dev_names dev_names)
|
||||
@ -230,8 +222,6 @@ static int accounting_p_cmp(const void *_l, const void *_r)
|
||||
|
||||
struct bpos lp = (*l)->k.p, rp = (*r)->k.p;
|
||||
|
||||
bch2_bpos_swab(&lp);
|
||||
bch2_bpos_swab(&rp);
|
||||
return bpos_cmp(lp, rp);
|
||||
}
|
||||
|
||||
@ -247,6 +237,18 @@ static void accounting_sort(darray_accounting_p *sorted,
|
||||
sort(sorted->data, sorted->nr, sizeof(sorted->data[0]), accounting_p_cmp, NULL);
|
||||
}
|
||||
|
||||
static void accounting_swab_if_old(struct bch_ioctl_query_accounting *in)
|
||||
{
|
||||
unsigned kernel_version = bcachefs_kernel_version();
|
||||
|
||||
if (kernel_version &&
|
||||
kernel_version < bcachefs_metadata_version_disk_accounting_big_endian)
|
||||
for (struct bkey_i_accounting *a = in->accounting;
|
||||
a < (struct bkey_i_accounting *) ((u64 *) in->accounting + in->accounting_u64s);
|
||||
a = bkey_i_to_accounting(bkey_next(&a->k_i)))
|
||||
bch2_bpos_swab(&a->k.p);
|
||||
}
|
||||
|
||||
static int fs_usage_v1_to_text(struct printbuf *out,
|
||||
struct bchfs_handle fs,
|
||||
dev_names dev_names)
|
||||
@ -261,6 +263,8 @@ static int fs_usage_v1_to_text(struct printbuf *out,
|
||||
if (!a)
|
||||
return -1;
|
||||
|
||||
accounting_swab_if_old(a);
|
||||
|
||||
darray_accounting_p a_sorted = {};
|
||||
|
||||
accounting_sort(&a_sorted, a);
|
||||
@ -487,6 +491,19 @@ devs:
|
||||
bcache_fs_close(fs);
|
||||
}
|
||||
|
||||
int fs_usage(void)
|
||||
{
|
||||
puts("bcachefs fs - manage a running filesystem\n"
|
||||
"Usage: bcachefs fs <CMD> [OPTIONS]\n"
|
||||
"\n"
|
||||
"Commands:\n"
|
||||
" usage Display detailed filesystem usage\n"
|
||||
" top Show runtime performance information\n"
|
||||
"\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fs_usage_usage(void)
|
||||
{
|
||||
puts("bcachefs fs usage - display detailed filesystem usage\n"
|
||||
|
@ -21,7 +21,6 @@ static void fsck_usage(void)
|
||||
" -y Assume \"yes\" to all questions\n"
|
||||
" -f Force checking even if filesystem is marked clean\n"
|
||||
" -r, --ratelimit_errors Don't display more than 10 errors of a given type\n"
|
||||
" -R, --reconstruct_alloc Reconstruct the alloc btree\n"
|
||||
" -k, --kernel Use the in-kernel fsck implementation\n"
|
||||
" -v Be verbose\n"
|
||||
" -h, --help Display this help and exit\n"
|
||||
@ -90,12 +89,14 @@ static int splice_fd_to_stdinout(int fd)
|
||||
return close(fd);
|
||||
}
|
||||
|
||||
static int fsck_online(const char *dev_path)
|
||||
static int fsck_online(const char *dev_path, const char *opt_str)
|
||||
{
|
||||
int dev_idx;
|
||||
struct bchfs_handle fs = bchu_fs_open_by_dev(dev_path, &dev_idx);
|
||||
|
||||
struct bch_ioctl_fsck_online fsck = { 0 };
|
||||
struct bch_ioctl_fsck_online fsck = {
|
||||
.opts = (unsigned long) opt_str
|
||||
};
|
||||
|
||||
int fsck_fd = ioctl(fs.ioctl_fd, BCH_IOCTL_FSCK_ONLINE, &fsck);
|
||||
if (fsck_fd < 0)
|
||||
@ -115,9 +116,7 @@ static bool should_use_kernel_fsck(darray_str devs)
|
||||
{
|
||||
system("modprobe bcachefs");
|
||||
|
||||
unsigned kernel_version = !access("/sys/module/bcachefs/parameters/version", R_OK)
|
||||
? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
|
||||
: 0;
|
||||
unsigned kernel_version = bcachefs_kernel_version();
|
||||
|
||||
if (!kernel_version)
|
||||
return false;
|
||||
@ -160,11 +159,49 @@ static bool should_use_kernel_fsck(darray_str devs)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool is_blockdev(const char *path)
|
||||
{
|
||||
struct stat s;
|
||||
if (stat(path, &s))
|
||||
return true;
|
||||
return S_ISBLK(s.st_mode);
|
||||
}
|
||||
|
||||
static void loopdev_free(const char *path)
|
||||
{
|
||||
char *cmd = mprintf("losetup -d %s", path);
|
||||
system(cmd);
|
||||
free(cmd);
|
||||
}
|
||||
|
||||
static char *loopdev_alloc(const char *path)
|
||||
{
|
||||
char *cmd = mprintf("losetup --show -f %s", path);
|
||||
FILE *f = popen(cmd, "r");
|
||||
free(cmd);
|
||||
if (!f) {
|
||||
fprintf(stderr, "error executing losetup: %m\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *line = NULL;
|
||||
size_t n = 0;
|
||||
getline(&line, &n, f);
|
||||
int ret = pclose(f);
|
||||
if (ret) {
|
||||
fprintf(stderr, "error executing losetup: %i\n", ret);
|
||||
free(line);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
strim(line);
|
||||
return line;
|
||||
}
|
||||
|
||||
int cmd_fsck(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "ratelimit_errors", no_argument, NULL, 'r' },
|
||||
{ "reconstruct_alloc", no_argument, NULL, 'R' },
|
||||
{ "kernel", no_argument, NULL, 'k' },
|
||||
{ "no-kernel", no_argument, NULL, 'K' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
@ -183,10 +220,13 @@ int cmd_fsck(int argc, char *argv[])
|
||||
append_opt(&opts_str, "read_only");
|
||||
|
||||
while ((opt = getopt_long(argc, argv,
|
||||
"apynfo:rRkvh",
|
||||
"apynfo:rkKvh",
|
||||
longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'a': /* outdated alias for -p */
|
||||
case 'a':
|
||||
/* "automatic" run, called by the system, for us to do checks as needed.
|
||||
* we don't need checks here: */
|
||||
exit(EXIT_SUCCESS);
|
||||
case 'p':
|
||||
case 'y':
|
||||
append_opt(&opts_str, "fix_errors=yes");
|
||||
@ -204,9 +244,6 @@ int cmd_fsck(int argc, char *argv[])
|
||||
case 'r':
|
||||
append_opt(&opts_str, "ratelimit_errors");
|
||||
break;
|
||||
case 'R':
|
||||
append_opt(&opts_str, "reconstruct_alloc");
|
||||
break;
|
||||
case 'k':
|
||||
kernel = true;
|
||||
break;
|
||||
@ -232,7 +269,7 @@ int cmd_fsck(int argc, char *argv[])
|
||||
darray_for_each(devs, i)
|
||||
if (dev_mounted(*i)) {
|
||||
printf("Running fsck online\n");
|
||||
return fsck_online(*i);
|
||||
return fsck_online(*i, opts_str.buf);
|
||||
}
|
||||
|
||||
int kernel_probed = kernel;
|
||||
@ -243,19 +280,35 @@ int cmd_fsck(int argc, char *argv[])
|
||||
struct printbuf parse_later = PRINTBUF;
|
||||
|
||||
if (kernel_probed) {
|
||||
darray_str loopdevs = {};
|
||||
int fsck_fd = -1;
|
||||
|
||||
printf("Running in-kernel offline fsck\n");
|
||||
struct bch_ioctl_fsck_offline *fsck = calloc(sizeof(*fsck) +
|
||||
sizeof(u64) * devs.nr, 1);
|
||||
struct bch_ioctl_fsck_offline *fsck = calloc(sizeof(*fsck) + sizeof(u64) * devs.nr, 1);
|
||||
|
||||
fsck->opts = (unsigned long)opts_str.buf;
|
||||
darray_for_each(devs, i)
|
||||
darray_for_each(devs, i) {
|
||||
if (is_blockdev(*i)) {
|
||||
fsck->devs[i - devs.data] = (unsigned long) *i;
|
||||
} else {
|
||||
char *l = loopdev_alloc(*i);
|
||||
if (!l)
|
||||
goto kernel_fsck_err;
|
||||
darray_push(&loopdevs, l);
|
||||
fsck->devs[i - devs.data] = (unsigned long) l;
|
||||
}
|
||||
}
|
||||
fsck->nr_devs = devs.nr;
|
||||
|
||||
int ctl_fd = bcachectl_open();
|
||||
int fsck_fd = ioctl(ctl_fd, BCH_IOCTL_FSCK_OFFLINE, fsck);
|
||||
fsck_fd = ioctl(ctl_fd, BCH_IOCTL_FSCK_OFFLINE, fsck);
|
||||
kernel_fsck_err:
|
||||
free(fsck);
|
||||
|
||||
darray_for_each(loopdevs, i)
|
||||
loopdev_free(*i);
|
||||
darray_exit(&loopdevs);
|
||||
|
||||
if (fsck_fd < 0 && kernel < 0)
|
||||
goto userland_fsck;
|
||||
|
||||
@ -266,7 +319,7 @@ int cmd_fsck(int argc, char *argv[])
|
||||
} else {
|
||||
userland_fsck:
|
||||
printf("Running userspace offline fsck\n");
|
||||
ret = bch2_parse_mount_opts(NULL, &opts, &parse_later, opts_str.buf);
|
||||
ret = bch2_parse_mount_opts(NULL, &opts, &parse_later, opts_str.buf, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include "libbcachefs/dirent.h"
|
||||
#include "libbcachefs/errcode.h"
|
||||
#include "libbcachefs/error.h"
|
||||
#include "libbcachefs/fs-common.h"
|
||||
#include "libbcachefs/namei.h"
|
||||
#include "libbcachefs/inode.h"
|
||||
#include "libbcachefs/io_read.h"
|
||||
#include "libbcachefs/io_write.h"
|
||||
@ -31,9 +31,6 @@
|
||||
|
||||
#include <linux/dcache.h>
|
||||
|
||||
/* XXX cut and pasted from fsck.c */
|
||||
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
|
||||
|
||||
/* used by write_aligned function for waiting on bch2_write closure */
|
||||
struct write_aligned_op_t {
|
||||
struct closure cl;
|
||||
@ -242,7 +239,7 @@ static int do_create(struct bch_fs *c, subvol_inum dir,
|
||||
|
||||
bch2_inode_init_early(c, new_inode);
|
||||
|
||||
return bch2_trans_do(c, NULL, NULL, 0,
|
||||
return bch2_trans_commit_do(c, NULL, NULL, 0,
|
||||
bch2_create_trans(trans,
|
||||
dir, &dir_u,
|
||||
new_inode, &qstr,
|
||||
@ -295,7 +292,7 @@ static void bcachefs_fuse_unlink(fuse_req_t req, fuse_ino_t dir_ino,
|
||||
|
||||
fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_unlink(%llu, %s)\n", dir.inum, name);
|
||||
|
||||
int ret = bch2_trans_do(c, NULL, NULL,
|
||||
int ret = bch2_trans_commit_do(c, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_unlink_trans(trans, dir, &dir_u,
|
||||
&inode_u, &qstr, false));
|
||||
@ -330,7 +327,7 @@ static void bcachefs_fuse_rename(fuse_req_t req,
|
||||
src_dir.inum, srcname, dst_dir.inum, dstname, flags);
|
||||
|
||||
/* XXX handle overwrites */
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
ret = bch2_trans_commit_do(c, NULL, NULL, 0,
|
||||
bch2_rename_trans(trans,
|
||||
src_dir, &src_dir_u,
|
||||
dst_dir, &dst_dir_u,
|
||||
@ -354,7 +351,7 @@ static void bcachefs_fuse_link(fuse_req_t req, fuse_ino_t ino,
|
||||
fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_link(%llu, %llu, %s)\n",
|
||||
inum.inum, newparent.inum, newname);
|
||||
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
ret = bch2_trans_commit_do(c, NULL, NULL, 0,
|
||||
bch2_link_trans(trans, newparent, &dir_u,
|
||||
inum, &inode_u, &qstr));
|
||||
|
||||
@ -478,10 +475,9 @@ static int read_aligned(struct bch_fs *c, subvol_inum inum, size_t aligned_size,
|
||||
closure_init_stack(&cl);
|
||||
|
||||
closure_get(&cl);
|
||||
rbio.bio.bi_end_io = bcachefs_fuse_read_endio;
|
||||
rbio.bio.bi_private = &cl;
|
||||
|
||||
bch2_read(c, rbio_init(&rbio.bio, io_opts), inum);
|
||||
bch2_read(c, rbio_init(&rbio.bio, c, io_opts, bcachefs_fuse_read_endio), inum);
|
||||
|
||||
closure_sync(&cl);
|
||||
|
||||
@ -1222,9 +1218,9 @@ int cmd_fusemount(int argc, char *argv[])
|
||||
goto out;
|
||||
}
|
||||
if (fuse_opts.show_version) {
|
||||
/* TODO: Show bcachefs version. */
|
||||
printf("FUSE library version %s\n", fuse_pkgversion());
|
||||
fuse_lowlevel_version();
|
||||
printf("bcachefs version: %s\n", VERSION_STRING);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
@ -104,24 +104,19 @@ int cmd_set_passphrase(int argc, char *argv[])
|
||||
if (IS_ERR(c))
|
||||
die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c)));
|
||||
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
|
||||
struct bch_sb *sb = c->disk_sb.sb;
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_field_get(sb, crypt);
|
||||
if (!crypt)
|
||||
die("Filesystem does not have encryption enabled");
|
||||
|
||||
struct bch_encrypted_key new_key;
|
||||
new_key.magic = BCH_KEY_MAGIC;
|
||||
|
||||
int ret = bch2_decrypt_sb_key(c, crypt, &new_key.key);
|
||||
struct bch_key key;
|
||||
int ret = bch2_decrypt_sb_key(c, crypt, &key);
|
||||
if (ret)
|
||||
die("Error getting current key");
|
||||
|
||||
char *new_passphrase = read_passphrase_twice("Enter new passphrase: ");
|
||||
struct bch_key passphrase_key = derive_passphrase(crypt, new_passphrase);
|
||||
|
||||
if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(c->disk_sb.sb),
|
||||
&new_key, sizeof(new_key)))
|
||||
die("error encrypting key");
|
||||
crypt->key = new_key;
|
||||
bch_crypt_update_passphrase(sb, crypt, &key, new_passphrase);
|
||||
|
||||
bch2_revoke_key(c->disk_sb.sb);
|
||||
bch2_write_super(c);
|
||||
@ -142,18 +137,17 @@ int cmd_remove_passphrase(int argc, char *argv[])
|
||||
if (IS_ERR(c))
|
||||
die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c)));
|
||||
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
|
||||
struct bch_sb *sb = c->disk_sb.sb;
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_field_get(sb, crypt);
|
||||
if (!crypt)
|
||||
die("Filesystem does not have encryption enabled");
|
||||
|
||||
struct bch_encrypted_key new_key;
|
||||
new_key.magic = BCH_KEY_MAGIC;
|
||||
|
||||
int ret = bch2_decrypt_sb_key(c, crypt, &new_key.key);
|
||||
struct bch_key key;
|
||||
int ret = bch2_decrypt_sb_key(c, crypt, &key);
|
||||
if (ret)
|
||||
die("Error getting current key");
|
||||
|
||||
crypt->key = new_key;
|
||||
bch_crypt_update_passphrase(sb, crypt, &key, NULL);
|
||||
|
||||
bch2_write_super(c);
|
||||
bch2_fs_stop(c);
|
||||
|
@ -27,30 +27,44 @@ static void kill_btree_node_usage(void)
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
}
|
||||
|
||||
struct kill_node {
|
||||
unsigned btree;
|
||||
unsigned level;
|
||||
u64 idx;
|
||||
};
|
||||
|
||||
int cmd_kill_btree_node(int argc, char *argv[])
|
||||
{
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
enum btree_id btree_id = 0;
|
||||
unsigned level = 0;
|
||||
u64 node_index = 0;
|
||||
DARRAY(struct kill_node) kill_nodes = {};
|
||||
int opt;
|
||||
|
||||
opt_set(opts, read_only, true);
|
||||
|
||||
while ((opt = getopt(argc, argv, "b:l:i:h")) != -1)
|
||||
while ((opt = getopt(argc, argv, "n:h")) != -1)
|
||||
switch (opt) {
|
||||
case 'b':
|
||||
btree_id = read_string_list_or_die(optarg,
|
||||
__bch2_btree_ids, "btree id");
|
||||
break;
|
||||
case 'l':
|
||||
if (kstrtouint(optarg, 10, &level) || level >= BTREE_MAX_DEPTH)
|
||||
case 'n': {
|
||||
char *p = optarg;
|
||||
const char *str_btree = strsep(&p, ":");
|
||||
const char *str_level = strsep(&p, ":");
|
||||
const char *str_idx = strsep(&p, ":");
|
||||
|
||||
struct kill_node n = {
|
||||
.btree = read_string_list_or_die(str_btree,
|
||||
__bch2_btree_ids, "btree id"),
|
||||
};
|
||||
|
||||
if (str_level &&
|
||||
(kstrtouint(str_level, 10, &n.level) || n.level >= BTREE_MAX_DEPTH))
|
||||
die("invalid level");
|
||||
|
||||
if (str_idx &&
|
||||
kstrtoull(str_idx, 10, &n.idx))
|
||||
die("invalid index %s", str_idx);
|
||||
|
||||
darray_push(&kill_nodes, n);
|
||||
break;
|
||||
case 'i':
|
||||
if (kstrtoull(optarg, 10, &node_index))
|
||||
die("invalid index %s", optarg);
|
||||
break;
|
||||
}
|
||||
case 'h':
|
||||
kill_btree_node_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
@ -71,16 +85,19 @@ int cmd_kill_btree_node(int argc, char *argv[])
|
||||
if (ret)
|
||||
die("error %s from posix_memalign", bch2_err_str(ret));
|
||||
|
||||
ret = bch2_trans_run(c,
|
||||
__for_each_btree_node(trans, iter, btree_id, POS_MIN, 0, level, 0, b, ({
|
||||
if (b->c.level != level)
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
darray_for_each(kill_nodes, i) {
|
||||
ret = __for_each_btree_node(trans, iter, i->btree, POS_MIN, 0, i->level, 0, b, ({
|
||||
if (b->c.level != i->level)
|
||||
continue;
|
||||
|
||||
int ret2 = 0;
|
||||
if (!node_index) {
|
||||
if (!i->idx) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
bch_info(c, "killing btree node %s", buf.buf);
|
||||
bch_info(c, "killing btree node %s l=%u %s",
|
||||
bch2_btree_id_str(i->btree), i->level, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
||||
ret2 = 1;
|
||||
@ -102,16 +119,22 @@ int cmd_kill_btree_node(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
|
||||
node_index--;
|
||||
i->idx--;
|
||||
ret2;
|
||||
})));
|
||||
if (ret < 0)
|
||||
}));
|
||||
|
||||
if (ret < 0) {
|
||||
bch_err(c, "error %i walking btree nodes", ret);
|
||||
else if (!ret) {
|
||||
break;
|
||||
} else if (!ret) {
|
||||
bch_err(c, "node at specified index not found");
|
||||
ret = EXIT_FAILURE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_trans_put(trans);
|
||||
bch2_fs_stop(c);
|
||||
darray_exit(&kill_nodes);
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
|
@ -51,6 +51,11 @@ static inline bool entry_is_transaction_start(struct jset_entry *entry)
|
||||
return entry->type == BCH_JSET_ENTRY_log && !entry->level;
|
||||
}
|
||||
|
||||
static inline bool entry_is_log_msg(struct jset_entry *entry)
|
||||
{
|
||||
return entry->type == BCH_JSET_ENTRY_log && entry->level;
|
||||
}
|
||||
|
||||
typedef DARRAY(struct bbpos_range) d_bbpos_range;
|
||||
typedef DARRAY(enum btree_id) d_btree_id;
|
||||
|
||||
@ -60,9 +65,21 @@ static bool bkey_matches_filter(d_bbpos_range filter, struct jset_entry *entry,
|
||||
struct bbpos k_start = BBPOS(entry->btree_id, bkey_start_pos(&k->k));
|
||||
struct bbpos k_end = BBPOS(entry->btree_id, k->k.p);
|
||||
|
||||
if (bbpos_cmp(k_start, i->end) < 0 &&
|
||||
bbpos_cmp(k_end, i->start) > 0)
|
||||
if (!i->start.pos.snapshot &&
|
||||
!i->end.pos.snapshot) {
|
||||
k_start.pos.snapshot = 0;
|
||||
k_end.pos.snapshot = 0;
|
||||
}
|
||||
|
||||
if (!k->k.size) {
|
||||
if (bbpos_cmp(k_start, i->start) >= 0 &&
|
||||
bbpos_cmp(k_end, i->end) <= 0)
|
||||
return true;
|
||||
} else {
|
||||
if (bbpos_cmp(i->start, k_end) <= 0 &&
|
||||
bbpos_cmp(i->end, k_start) >= 0)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -70,9 +87,9 @@ static bool bkey_matches_filter(d_bbpos_range filter, struct jset_entry *entry,
|
||||
static bool entry_matches_transaction_filter(struct jset_entry *entry,
|
||||
d_bbpos_range filter)
|
||||
{
|
||||
if (entry->type == BCH_JSET_ENTRY_btree_root ||
|
||||
entry->type == BCH_JSET_ENTRY_btree_keys ||
|
||||
entry->type == BCH_JSET_ENTRY_overwrite)
|
||||
if (!entry->level &&
|
||||
(entry->type == BCH_JSET_ENTRY_btree_keys ||
|
||||
entry->type == BCH_JSET_ENTRY_overwrite))
|
||||
jset_entry_for_each_key(entry, k)
|
||||
if (bkey_matches_filter(filter, entry, k))
|
||||
return true;
|
||||
@ -80,15 +97,34 @@ static bool entry_matches_transaction_filter(struct jset_entry *entry,
|
||||
}
|
||||
|
||||
static bool should_print_transaction(struct jset_entry *entry, struct jset_entry *end,
|
||||
d_bbpos_range filter)
|
||||
darray_str msg_filter,
|
||||
d_bbpos_range key_filter)
|
||||
{
|
||||
if (!filter.nr)
|
||||
struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry);
|
||||
unsigned b = jset_entry_log_msg_bytes(l);
|
||||
bool have_log_messages = false;
|
||||
bool have_non_log_messages = false;
|
||||
|
||||
darray_for_each(msg_filter, i)
|
||||
if (!strncmp(*i, l->d, b))
|
||||
return false;
|
||||
|
||||
if (!key_filter.nr)
|
||||
return true;
|
||||
|
||||
for (entry = vstruct_next(entry);
|
||||
entry != end && !entry_is_transaction_start(entry);
|
||||
entry = vstruct_next(entry))
|
||||
if (entry_matches_transaction_filter(entry, filter))
|
||||
entry = vstruct_next(entry)) {
|
||||
if (entry_matches_transaction_filter(entry, key_filter))
|
||||
return true;
|
||||
|
||||
if (entry_is_log_msg(entry))
|
||||
have_log_messages = true;
|
||||
else
|
||||
have_non_log_messages = true;
|
||||
}
|
||||
|
||||
if (have_log_messages && !have_non_log_messages)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@ -112,8 +148,41 @@ static bool should_print_entry(struct jset_entry *entry, d_btree_id filter)
|
||||
return false;
|
||||
}
|
||||
|
||||
static void journal_entry_header_to_text(struct printbuf *out,
|
||||
struct bch_fs *c,
|
||||
struct journal_replay *p, bool blacklisted)
|
||||
{
|
||||
if (blacklisted)
|
||||
prt_str(out, "blacklisted ");
|
||||
|
||||
prt_printf(out,
|
||||
"\n"
|
||||
"journal entry %llu\n"
|
||||
" version %u\n"
|
||||
" last seq %llu\n"
|
||||
" flush %u\n"
|
||||
" written at ",
|
||||
le64_to_cpu(p->j.seq),
|
||||
le32_to_cpu(p->j.version),
|
||||
le64_to_cpu(p->j.last_seq),
|
||||
!JSET_NO_FLUSH(&p->j));
|
||||
bch2_journal_ptrs_to_text(out, c, p);
|
||||
|
||||
if (blacklisted)
|
||||
star_start_of_lines(out->buf);
|
||||
}
|
||||
|
||||
static void journal_entry_header_print(struct bch_fs *c, struct journal_replay *p, bool blacklisted)
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
journal_entry_header_to_text(&buf, c, p, blacklisted);
|
||||
printf("%s\n", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
static void journal_entries_print(struct bch_fs *c, unsigned nr_entries,
|
||||
d_bbpos_range transaction_filter,
|
||||
darray_str transaction_msg_filter,
|
||||
d_bbpos_range transaction_key_filter,
|
||||
d_btree_id key_filter)
|
||||
{
|
||||
struct journal_replay *p, **_p;
|
||||
@ -121,6 +190,8 @@ static void journal_entries_print(struct bch_fs *c, unsigned nr_entries,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
genradix_for_each(&c->journal_entries, iter, _p) {
|
||||
bool printed_header = false;
|
||||
|
||||
p = *_p;
|
||||
if (!p)
|
||||
continue;
|
||||
@ -132,28 +203,10 @@ static void journal_entries_print(struct bch_fs *c, unsigned nr_entries,
|
||||
bch2_journal_seq_is_blacklisted(c,
|
||||
le64_to_cpu(p->j.seq), false);
|
||||
|
||||
if (!transaction_filter.nr) {
|
||||
if (blacklisted)
|
||||
printf("blacklisted ");
|
||||
|
||||
printf("journal entry %llu\n", le64_to_cpu(p->j.seq));
|
||||
|
||||
printbuf_reset(&buf);
|
||||
|
||||
prt_printf(&buf,
|
||||
" version %u\n"
|
||||
" last seq %llu\n"
|
||||
" flush %u\n"
|
||||
" written at ",
|
||||
le32_to_cpu(p->j.version),
|
||||
le64_to_cpu(p->j.last_seq),
|
||||
!JSET_NO_FLUSH(&p->j));
|
||||
bch2_journal_ptrs_to_text(&buf, c, p);
|
||||
|
||||
if (blacklisted)
|
||||
star_start_of_lines(buf.buf);
|
||||
printf("%s\n", buf.buf);
|
||||
printbuf_reset(&buf);
|
||||
if (!transaction_msg_filter.nr &&
|
||||
!transaction_key_filter.nr) {
|
||||
journal_entry_header_print(c, p, blacklisted);
|
||||
printed_header = true;
|
||||
}
|
||||
|
||||
struct jset_entry *entry = p->j.start;
|
||||
@ -165,7 +218,9 @@ static void journal_entries_print(struct bch_fs *c, unsigned nr_entries,
|
||||
* commit:
|
||||
*/
|
||||
if (entry_is_transaction_start(entry)) {
|
||||
if (!should_print_transaction(entry, end, transaction_filter)) {
|
||||
if (!should_print_transaction(entry, end,
|
||||
transaction_msg_filter,
|
||||
transaction_key_filter)) {
|
||||
do {
|
||||
entry = vstruct_next(entry);
|
||||
} while (entry != end && !entry_is_transaction_start(entry));
|
||||
@ -179,7 +234,11 @@ static void journal_entries_print(struct bch_fs *c, unsigned nr_entries,
|
||||
if (!should_print_entry(entry, key_filter))
|
||||
goto next;
|
||||
|
||||
bool highlight = entry_matches_transaction_filter(entry, transaction_filter);
|
||||
if (!printed_header)
|
||||
journal_entry_header_print(c, p, blacklisted);
|
||||
printed_header = true;
|
||||
|
||||
bool highlight = entry_matches_transaction_filter(entry, transaction_key_filter);
|
||||
if (highlight)
|
||||
fputs(RED, stdout);
|
||||
|
||||
@ -213,22 +272,22 @@ int cmd_list_journal(int argc, char *argv[])
|
||||
};
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
u32 nr_entries = U32_MAX;
|
||||
d_bbpos_range transaction_filter = { 0 };
|
||||
d_btree_id key_filter = { 0 };
|
||||
darray_str transaction_msg_filter = {};
|
||||
d_bbpos_range transaction_key_filter = {};
|
||||
d_btree_id key_filter = {};
|
||||
int opt;
|
||||
|
||||
opt_set(opts, noexcl, true);
|
||||
opt_set(opts, nochanges, true);
|
||||
opt_set(opts, norecovery, true);
|
||||
opt_set(opts, read_only, true);
|
||||
opt_set(opts, degraded, true);
|
||||
opt_set(opts, very_degraded, true);
|
||||
opt_set(opts, degraded, BCH_DEGRADED_very);
|
||||
opt_set(opts, errors, BCH_ON_ERROR_continue);
|
||||
opt_set(opts, fix_errors, FSCK_FIX_yes);
|
||||
opt_set(opts, retain_recovery_info ,true);
|
||||
opt_set(opts, read_journal_only,true);
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "an:t:k:vh",
|
||||
while ((opt = getopt_long(argc, argv, "an:m:t:k:vh",
|
||||
longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'a':
|
||||
@ -239,8 +298,11 @@ int cmd_list_journal(int argc, char *argv[])
|
||||
die("error parsing nr_entries");
|
||||
opt_set(opts, read_entire_journal, true);
|
||||
break;
|
||||
case 'm':
|
||||
darray_push(&transaction_msg_filter, strdup(optarg));
|
||||
break;
|
||||
case 't':
|
||||
darray_push(&transaction_filter, bbpos_range_parse(optarg));
|
||||
darray_push(&transaction_key_filter, bbpos_range_parse(optarg));
|
||||
break;
|
||||
case 'k':
|
||||
darray_push(&key_filter, read_string_list_or_die(optarg, __bch2_btree_ids, "btree id"));
|
||||
@ -263,7 +325,10 @@ int cmd_list_journal(int argc, char *argv[])
|
||||
if (IS_ERR(c))
|
||||
die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
|
||||
|
||||
journal_entries_print(c, nr_entries, transaction_filter, key_filter);
|
||||
journal_entries_print(c, nr_entries,
|
||||
transaction_msg_filter,
|
||||
transaction_key_filter,
|
||||
key_filter);
|
||||
bch2_fs_stop(c);
|
||||
return 0;
|
||||
}
|
||||
|
@ -31,9 +31,6 @@
|
||||
#include "libbcachefs/replicas.h"
|
||||
#include "libbcachefs/super.h"
|
||||
|
||||
/* XXX cut and pasted from fsck.c */
|
||||
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
|
||||
|
||||
static char *dev_t_to_path(dev_t dev)
|
||||
{
|
||||
char link[PATH_MAX], *p;
|
||||
@ -162,9 +159,9 @@ static void find_superblock_space(ranges extents,
|
||||
{
|
||||
darray_for_each(extents, i) {
|
||||
u64 start = round_up(max(256ULL << 10, i->start),
|
||||
dev->bucket_size << 9);
|
||||
dev->opts.bucket_size << 9);
|
||||
u64 end = round_down(i->end,
|
||||
dev->bucket_size << 9);
|
||||
dev->opts.bucket_size << 9);
|
||||
|
||||
/* Need space for two superblocks: */
|
||||
if (start + (opts.superblock_size << 9) * 2 <= end) {
|
||||
@ -212,38 +209,43 @@ static int migrate_fs(const char *fs_path,
|
||||
if (!S_ISDIR(stat.st_mode))
|
||||
die("%s is not a directory", fs_path);
|
||||
|
||||
struct dev_opts dev = dev_opts_default();
|
||||
dev_opts_list devs = {};
|
||||
darray_push(&devs, dev_opts_default());
|
||||
|
||||
dev.path = dev_t_to_path(stat.st_dev);
|
||||
dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
|
||||
struct dev_opts *dev = &devs.data[0];
|
||||
|
||||
int ret = PTR_ERR_OR_ZERO(dev.file);
|
||||
dev->path = dev_t_to_path(stat.st_dev);
|
||||
dev->file = bdev_file_open_by_path(dev->path, BLK_OPEN_READ|BLK_OPEN_WRITE, dev, NULL);
|
||||
|
||||
int ret = PTR_ERR_OR_ZERO(dev->file);
|
||||
if (ret < 0)
|
||||
die("Error opening device to format %s: %s", dev.path, strerror(-ret));
|
||||
dev.bdev = file_bdev(dev.file);
|
||||
die("Error opening device to format %s: %s", dev->path, strerror(-ret));
|
||||
dev->bdev = file_bdev(dev->file);
|
||||
|
||||
opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));
|
||||
opt_set(fs_opts, block_size, get_blocksize(dev->bdev->bd_fd));
|
||||
|
||||
char *file_path = mprintf("%s/bcachefs", fs_path);
|
||||
printf("Creating new filesystem on %s in space reserved at %s\n",
|
||||
dev.path, file_path);
|
||||
dev->path, file_path);
|
||||
|
||||
dev.size = get_size(dev.bdev->bd_fd);
|
||||
dev.bucket_size = bch2_pick_bucket_size(fs_opts, &dev);
|
||||
dev.nbuckets = dev.size / dev.bucket_size;
|
||||
dev->fs_size = get_size(dev->bdev->bd_fd);
|
||||
opt_set(dev->opts, bucket_size, bch2_pick_bucket_size(fs_opts, devs));
|
||||
|
||||
bch2_check_bucket_size(fs_opts, &dev);
|
||||
dev->nbuckets = dev->fs_size / dev->opts.bucket_size;
|
||||
|
||||
bch2_check_bucket_size(fs_opts, dev);
|
||||
|
||||
u64 bcachefs_inum;
|
||||
ranges extents = reserve_new_fs_space(file_path,
|
||||
fs_opts.block_size >> 9,
|
||||
get_size(dev.bdev->bd_fd) / 5,
|
||||
get_size(dev->bdev->bd_fd) / 5,
|
||||
&bcachefs_inum, stat.st_dev, force);
|
||||
|
||||
find_superblock_space(extents, format_opts, &dev);
|
||||
find_superblock_space(extents, format_opts, dev);
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs);
|
||||
darray_exit(&devs);
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs,
|
||||
fs_opts, format_opts, &dev, 1);
|
||||
u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
|
||||
|
||||
if (format_opts.passphrase)
|
||||
@ -251,16 +253,14 @@ static int migrate_fs(const char *fs_path,
|
||||
|
||||
free(sb);
|
||||
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_fs *c = NULL;
|
||||
char *path[1] = { dev.path };
|
||||
char *path[1] = { dev->path };
|
||||
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
opt_set(opts, sb, sb_offset);
|
||||
opt_set(opts, nostart, true);
|
||||
opt_set(opts, noexcl, true);
|
||||
opt_set(opts, nostart, true);
|
||||
|
||||
c = bch2_fs_open(path, 1, opts);
|
||||
struct bch_fs *c = bch2_fs_open(path, 1, opts);
|
||||
if (IS_ERR(c))
|
||||
die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
|
||||
|
||||
@ -268,10 +268,6 @@ static int migrate_fs(const char *fs_path,
|
||||
if (ret)
|
||||
die("Error allocating buckets_nouse: %s", bch2_err_str(ret));
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
if (IS_ERR(c))
|
||||
die("Error starting new filesystem: %s", bch2_err_str(ret));
|
||||
|
||||
mark_unreserved_space(c, extents);
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
@ -285,7 +281,10 @@ static int migrate_fs(const char *fs_path,
|
||||
.type = BCH_MIGRATE_migrate,
|
||||
};
|
||||
|
||||
copy_fs(c, fs_fd, fs_path, &s);
|
||||
u64 reserve_start = round_up((format_opts.superblock_size * 2 + 8) << 9,
|
||||
dev->opts.bucket_size);
|
||||
|
||||
copy_fs(c, fs_fd, fs_path, &s, reserve_start);
|
||||
|
||||
bch2_fs_stop(c);
|
||||
|
||||
@ -313,7 +312,7 @@ static int migrate_fs(const char *fs_path,
|
||||
"filesystem. That file can be deleted once the old filesystem is\n"
|
||||
"no longer needed (and should be deleted prior to running\n"
|
||||
"bcachefs migrate-superblock)\n",
|
||||
sb_offset, dev.path, dev.path, sb_offset);
|
||||
sb_offset, dev->path, dev->path, sb_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -377,7 +376,7 @@ static void migrate_superblock_usage(void)
|
||||
int cmd_migrate_superblock(int argc, char *argv[])
|
||||
{
|
||||
char *dev = NULL;
|
||||
u64 offset = 0;
|
||||
u64 sb_offset = 0;
|
||||
int opt, ret;
|
||||
|
||||
while ((opt = getopt(argc, argv, "d:o:h")) != -1)
|
||||
@ -386,7 +385,7 @@ int cmd_migrate_superblock(int argc, char *argv[])
|
||||
dev = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
ret = kstrtou64(optarg, 10, &offset);
|
||||
ret = kstrtou64(optarg, 10, &sb_offset);
|
||||
if (ret)
|
||||
die("Invalid offset");
|
||||
break;
|
||||
@ -398,29 +397,72 @@ int cmd_migrate_superblock(int argc, char *argv[])
|
||||
if (!dev)
|
||||
die("Please specify a device");
|
||||
|
||||
if (!offset)
|
||||
if (!sb_offset)
|
||||
die("Please specify offset of existing superblock");
|
||||
|
||||
int fd = xopen(dev, O_RDWR);
|
||||
struct bch_sb *sb = __bch2_super_read(fd, offset);
|
||||
struct bch_sb *sb = __bch2_super_read(fd, sb_offset);
|
||||
unsigned sb_size = 1U << sb->layout.sb_max_size_bits;
|
||||
|
||||
if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
|
||||
die("Can't add superblock: no space left in superblock layout");
|
||||
|
||||
unsigned i;
|
||||
for (i = 0; i < sb->layout.nr_superblocks; i++)
|
||||
if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
|
||||
die("Superblock layout already has default superblock");
|
||||
for (unsigned i = 0; i < sb->layout.nr_superblocks; i++)
|
||||
if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR ||
|
||||
le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR + sb_size)
|
||||
die("Superblock layout already has default superblocks");
|
||||
|
||||
memmove(&sb->layout.sb_offset[1],
|
||||
memmove(&sb->layout.sb_offset[2],
|
||||
&sb->layout.sb_offset[0],
|
||||
sb->layout.nr_superblocks * sizeof(u64));
|
||||
sb->layout.nr_superblocks++;
|
||||
|
||||
sb->layout.nr_superblocks += 2;
|
||||
sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
|
||||
sb->layout.sb_offset[1] = cpu_to_le64(BCH_SB_SECTOR + sb_size);
|
||||
|
||||
/* also write first 0-3.5k bytes with zeroes, ensure we blow away old
|
||||
* superblock */
|
||||
static const char zeroes[BCH_SB_SECTOR << 9];
|
||||
xpwrite(fd, zeroes, BCH_SB_SECTOR << 9, 0, "zeroing start of disk");
|
||||
|
||||
bch2_super_write(fd, sb);
|
||||
close(fd);
|
||||
|
||||
/* mark new superblocks */
|
||||
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
opt_set(opts, nostart, true);
|
||||
opt_set(opts, sb, sb_offset);
|
||||
|
||||
struct bch_fs *c = bch2_fs_open(&dev, 1, opts);
|
||||
ret = PTR_ERR_OR_ZERO(c) ?:
|
||||
bch2_buckets_nouse_alloc(c);
|
||||
if (ret)
|
||||
die("error opening filesystem: %s", bch2_err_str(ret));
|
||||
|
||||
struct bch_dev *ca = c->devs[0];
|
||||
for (u64 b = 0; bucket_to_sector(ca, b) < BCH_SB_SECTOR + sb_size * 2; b++)
|
||||
set_bit(b, ca->buckets_nouse);
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
if (ret)
|
||||
die("Error starting filesystem: %s", bch2_err_str(ret));
|
||||
|
||||
bch2_fs_stop(c);
|
||||
|
||||
opts = bch2_opts_empty();
|
||||
opt_set(opts, fsck, true);
|
||||
opt_set(opts, fix_errors, true);
|
||||
|
||||
/*
|
||||
* Hack: the free space counters are coming out wrong after marking the
|
||||
* new superblock, but it's just the device counters so it's
|
||||
* inconsequential:
|
||||
*/
|
||||
|
||||
c = bch2_fs_open(&dev, 1, opts);
|
||||
ret = PTR_ERR_OR_ZERO(c);
|
||||
if (ret)
|
||||
die("error opening filesystem: %s", bch2_err_str(ret));
|
||||
bch2_fs_stop(c);
|
||||
return 0;
|
||||
}
|
||||
|
@ -30,8 +30,9 @@ static void set_option_usage(void)
|
||||
"Usage: bcachefs set-fs-option [OPTION].. device\n"
|
||||
"\n"
|
||||
"Options:\n");
|
||||
bch2_opts_usage(OPT_MOUNT);
|
||||
puts(" -h, --help display this help and exit\n"
|
||||
bch2_opts_usage(OPT_MOUNT|OPT_RUNTIME);
|
||||
puts(" -d, --dev-idx index for device specific options\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
@ -55,11 +56,24 @@ int cmd_set_option(int argc, char *argv[])
|
||||
{
|
||||
struct bch_opt_strs new_opt_strs = bch2_cmdline_opts_get(&argc, argv, OPT_MOUNT|OPT_DEVICE);
|
||||
struct bch_opts new_opts = bch2_parse_opts(new_opt_strs);
|
||||
unsigned i;
|
||||
DARRAY(unsigned) dev_idxs = {};
|
||||
int opt, ret = 0;
|
||||
|
||||
while ((opt = getopt(argc, argv, "h")) != -1)
|
||||
static const struct option longopts[] = {
|
||||
{ "dev-idx", required_argument, NULL, 'd' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "d:h", longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'd': {
|
||||
unsigned dev_idx;
|
||||
if (kstrtoint(optarg, 10, &dev_idx))
|
||||
die("error parsing %s", optarg);
|
||||
darray_push(&dev_idxs, dev_idx);
|
||||
break;
|
||||
}
|
||||
case 'h':
|
||||
set_option_usage();
|
||||
break;
|
||||
@ -72,6 +86,7 @@ int cmd_set_option(int argc, char *argv[])
|
||||
}
|
||||
|
||||
bool online = false;
|
||||
unsigned i;
|
||||
for (i = 0; i < argc; i++)
|
||||
if (dev_mounted(argv[i])) {
|
||||
online = true;
|
||||
@ -96,20 +111,38 @@ int cmd_set_option(int argc, char *argv[])
|
||||
if (!bch2_opt_defined_by_id(&new_opts, i))
|
||||
continue;
|
||||
|
||||
ret = bch2_opt_check_may_set(c, i, v);
|
||||
if (!(opt->flags & (OPT_FS|OPT_DEVICE)))
|
||||
fprintf(stderr, "Can't set option %s\n", opt->attr.name);
|
||||
|
||||
if (opt->flags & OPT_FS) {
|
||||
ret = bch2_opt_check_may_set(c, NULL, i, v);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(opt->flags & (OPT_FS|OPT_DEVICE)))
|
||||
fprintf(stderr, "Can't set option %s\n", opt->attr.name);
|
||||
|
||||
if (opt->flags & OPT_FS) {
|
||||
bch2_opt_set_sb(c, NULL, opt, v);
|
||||
}
|
||||
|
||||
if (opt->flags & OPT_DEVICE) {
|
||||
if (dev_idxs.nr) {
|
||||
darray_for_each(dev_idxs, dev) {
|
||||
struct bch_dev *ca = bch2_dev_tryget_noerror(c, *dev);
|
||||
if (!ca) {
|
||||
fprintf(stderr, "Couldn't look up device %u\n", *dev);
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = bch2_opt_check_may_set(c, ca, i, v);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
|
||||
continue;
|
||||
}
|
||||
|
||||
bch2_opt_set_sb(c, ca, opt, v);
|
||||
bch2_dev_put(ca);
|
||||
}
|
||||
} else {
|
||||
for (unsigned dev = 0; dev < argc; dev++) {
|
||||
int dev_idx = name_to_dev_idx(c, argv[dev]);
|
||||
if (dev_idx < 0) {
|
||||
@ -121,6 +154,7 @@ int cmd_set_option(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bch2_fs_stop(c);
|
||||
return ret;
|
||||
|
98
c_src/cmd_top.c
Normal file
98
c_src/cmd_top.c
Normal file
@ -0,0 +1,98 @@
|
||||
#include <dirent.h>
|
||||
#include <getopt.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "cmds.h"
|
||||
#include "libbcachefs.h"
|
||||
#include "libbcachefs/sb-counters.h"
|
||||
|
||||
static const u8 counters_to_stable_map[] = {
|
||||
#define x(n, id, ...) [BCH_COUNTER_##n] = BCH_COUNTER_STABLE_##n,
|
||||
BCH_PERSISTENT_COUNTERS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
static struct bch_ioctl_query_counters *read_counters(struct bchfs_handle fs)
|
||||
{
|
||||
struct bch_ioctl_query_counters *ret =
|
||||
kzalloc(sizeof(*ret) + sizeof(ret->d[0]) * BCH_COUNTER_NR, GFP_KERNEL);
|
||||
|
||||
ret->nr = BCH_COUNTER_NR;
|
||||
|
||||
xioctl(fs.ioctl_fd, BCH_IOCTL_QUERY_COUNTERS, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void fs_top(const char *path, bool human_readable)
|
||||
{
|
||||
struct bchfs_handle fs = bcache_fs_open(path);
|
||||
|
||||
struct bch_ioctl_query_counters *curr, *prev = NULL;
|
||||
|
||||
curr = read_counters(fs);
|
||||
|
||||
while (true) {
|
||||
sleep(1);
|
||||
kfree(prev);
|
||||
prev = curr;
|
||||
curr = read_counters(fs);
|
||||
|
||||
printf("\033[2J");
|
||||
printf("\033[H");
|
||||
|
||||
for (unsigned i = 0; i < BCH_COUNTER_NR; i++) {
|
||||
unsigned stable = counters_to_stable_map[i];
|
||||
u64 v = stable < curr->nr
|
||||
? curr->d[stable] - prev->d[stable]
|
||||
: 0;
|
||||
printf("%-48s %llu\n",
|
||||
bch2_counter_names[i],
|
||||
v);
|
||||
}
|
||||
}
|
||||
|
||||
bcache_fs_close(fs);
|
||||
}
|
||||
|
||||
static void fs_top_usage(void)
|
||||
{
|
||||
puts("bcachefs fs top - display runtime perfomance info\n"
|
||||
"Usage: bcachefs fs top [OPTION]... <mountpoint>\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -h, --human-readable Human readable units\n"
|
||||
" -H, --help Display this help and exit\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
}
|
||||
|
||||
int cmd_fs_top(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "help", no_argument, NULL, 'H' },
|
||||
{ "human-readable", no_argument, NULL, 'h' },
|
||||
{ NULL }
|
||||
};
|
||||
bool human_readable = false;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "Hh",
|
||||
longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'h':
|
||||
human_readable = true;
|
||||
break;
|
||||
case 'H':
|
||||
fs_top_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
default:
|
||||
fs_top_usage();
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
args_shift(optind);
|
||||
|
||||
fs_top(arg_pop() ?: ".", human_readable) ;
|
||||
return 0;
|
||||
}
|
@ -11,10 +11,13 @@
|
||||
|
||||
int cmd_format(int argc, char *argv[]);
|
||||
int cmd_show_super(int argc, char *argv[]);
|
||||
int cmd_recover_super(int argc, char *argv[]);
|
||||
int cmd_reset_counters(int argc, char *argv[]);
|
||||
int cmd_set_option(int argc, char *argv[]);
|
||||
|
||||
int fs_usage(void);
|
||||
int cmd_fs_usage(int argc, char *argv[]);
|
||||
int cmd_fs_top(int argc, char *argv[]);
|
||||
|
||||
int device_usage(void);
|
||||
int cmd_device_add(int argc, char *argv[]);
|
||||
@ -28,6 +31,7 @@ int cmd_device_resize_journal(int argc, char *argv[]);
|
||||
|
||||
int data_usage(void);
|
||||
int cmd_data_rereplicate(int argc, char *argv[]);
|
||||
int cmd_data_scrub(int argc, char *argv[]);
|
||||
int cmd_data_job(int argc, char *argv[]);
|
||||
|
||||
int cmd_unlock(int argc, char *argv[]);
|
||||
|
@ -176,26 +176,47 @@ void bch_sb_crypt_init(struct bch_sb *sb,
|
||||
struct bch_sb_field_crypt *crypt,
|
||||
const char *passphrase)
|
||||
{
|
||||
struct bch_key key;
|
||||
get_random_bytes(&key, sizeof(key));
|
||||
|
||||
crypt->key.magic = BCH_KEY_MAGIC;
|
||||
get_random_bytes(&crypt->key.key, sizeof(crypt->key.key));
|
||||
crypt->key.key = key;
|
||||
|
||||
if (passphrase) {
|
||||
bch_crypt_update_passphrase(sb, crypt, &key, passphrase);
|
||||
}
|
||||
|
||||
void bch_crypt_update_passphrase(
|
||||
struct bch_sb *sb,
|
||||
struct bch_sb_field_crypt *crypt,
|
||||
struct bch_key *key,
|
||||
const char *new_passphrase)
|
||||
{
|
||||
|
||||
struct bch_encrypted_key new_key;
|
||||
new_key.magic = BCH_KEY_MAGIC;
|
||||
new_key.key = *key;
|
||||
|
||||
if(!new_passphrase) {
|
||||
crypt->key = new_key;
|
||||
return;
|
||||
}
|
||||
|
||||
// If crypt already has an encrypted key reuse it's encryption params
|
||||
if (!bch2_key_is_encrypted(&crypt->key)) {
|
||||
SET_BCH_CRYPT_KDF_TYPE(crypt, BCH_KDF_SCRYPT);
|
||||
SET_BCH_KDF_SCRYPT_N(crypt, ilog2(16384));
|
||||
SET_BCH_KDF_SCRYPT_R(crypt, ilog2(8));
|
||||
SET_BCH_KDF_SCRYPT_P(crypt, ilog2(16));
|
||||
}
|
||||
|
||||
struct bch_key passphrase_key = derive_passphrase(crypt, passphrase);
|
||||
|
||||
assert(!bch2_key_is_encrypted(&crypt->key));
|
||||
struct bch_key passphrase_key = derive_passphrase(crypt, new_passphrase);
|
||||
|
||||
if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(sb),
|
||||
&crypt->key, sizeof(crypt->key)))
|
||||
&new_key, sizeof(new_key)))
|
||||
die("error encrypting key");
|
||||
|
||||
assert(bch2_key_is_encrypted(&crypt->key));
|
||||
|
||||
memzero_explicit(&passphrase_key, sizeof(passphrase_key));
|
||||
}
|
||||
|
||||
crypt->key = new_key;
|
||||
assert(bch2_key_is_encrypted(&crypt->key));
|
||||
}
|
||||
|
@ -19,4 +19,7 @@ void bch2_add_key(struct bch_sb *, const char *, const char *, const char *);
|
||||
void bch_sb_crypt_init(struct bch_sb *sb, struct bch_sb_field_crypt *,
|
||||
const char *);
|
||||
|
||||
void bch_crypt_update_passphrase(struct bch_sb *sb, struct bch_sb_field_crypt *crypt,
|
||||
struct bch_key *key, const char *new_passphrase);
|
||||
|
||||
#endif /* _CRYPTO_H */
|
||||
|
@ -16,6 +16,8 @@
|
||||
|
||||
#include <uuid/uuid.h>
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include "libbcachefs.h"
|
||||
#include "crypto.h"
|
||||
#include "libbcachefs/bcachefs_format.h"
|
||||
@ -31,7 +33,7 @@
|
||||
|
||||
#define NSEC_PER_SEC 1000000000L
|
||||
|
||||
static void init_layout(struct bch_sb_layout *l,
|
||||
void bch2_sb_layout_init(struct bch_sb_layout *l,
|
||||
unsigned block_size,
|
||||
unsigned sb_size,
|
||||
u64 sb_start, u64 sb_end)
|
||||
@ -60,83 +62,86 @@ static void init_layout(struct bch_sb_layout *l,
|
||||
sb_start, sb_pos, sb_end, sb_size);
|
||||
}
|
||||
|
||||
/* minimum size filesystem we can create, given a bucket size: */
|
||||
static u64 min_size(unsigned bucket_size)
|
||||
static u64 dev_max_bucket_size(u64 dev_size)
|
||||
{
|
||||
return BCH_MIN_NR_NBUCKETS * bucket_size;
|
||||
return rounddown_pow_of_two(dev_size / (BCH_MIN_NR_NBUCKETS * 4));
|
||||
}
|
||||
|
||||
u64 bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev)
|
||||
u64 bch2_pick_bucket_size(struct bch_opts opts, dev_opts_list devs)
|
||||
{
|
||||
u64 bucket_size;
|
||||
|
||||
if (dev->size < min_size(opts.block_size))
|
||||
die("cannot format %s, too small (%llu bytes, min %llu)",
|
||||
dev->path, dev->size, min_size(opts.block_size));
|
||||
|
||||
/* Bucket size must be >= block size: */
|
||||
bucket_size = opts.block_size;
|
||||
u64 bucket_size = opts.block_size;
|
||||
|
||||
/* Bucket size must be >= btree node size: */
|
||||
if (opt_defined(opts, btree_node_size))
|
||||
bucket_size = max_t(unsigned, bucket_size,
|
||||
opts.btree_node_size);
|
||||
bucket_size = max_t(u64, bucket_size, opts.btree_node_size);
|
||||
|
||||
/* Want a bucket size of at least 128k, if possible: */
|
||||
bucket_size = max(bucket_size, 128ULL << 10);
|
||||
u64 min_dev_size = BCH_MIN_NR_NBUCKETS * bucket_size;
|
||||
darray_for_each(devs, i)
|
||||
if (i->fs_size < min_dev_size)
|
||||
die("cannot format %s, too small (%llu bytes, min %llu)",
|
||||
i->path, i->fs_size, min_dev_size);
|
||||
|
||||
if (dev->size >= min_size(bucket_size)) {
|
||||
unsigned scale = max(1,
|
||||
ilog2(dev->size / min_size(bucket_size)) / 4);
|
||||
u64 total_fs_size = 0;
|
||||
darray_for_each(devs, i)
|
||||
total_fs_size += i->fs_size;
|
||||
|
||||
scale = rounddown_pow_of_two(scale);
|
||||
struct sysinfo info;
|
||||
si_meminfo(&info);
|
||||
|
||||
/* max bucket size 1 mb */
|
||||
bucket_size = min(bucket_size * scale, 1ULL << 20);
|
||||
} else {
|
||||
do {
|
||||
bucket_size /= 2;
|
||||
} while (dev->size < min_size(bucket_size));
|
||||
}
|
||||
/*
|
||||
* Large fudge factor to allow for other fsck processes and devices
|
||||
* being added after creation
|
||||
*/
|
||||
u64 mem_available_for_fsck = info.totalram / 8;
|
||||
u64 buckets_can_fsck = mem_available_for_fsck / (sizeof(struct bucket) * 1.5);
|
||||
u64 mem_lower_bound = roundup_pow_of_two(total_fs_size / buckets_can_fsck);
|
||||
|
||||
/*
|
||||
* Lower bound to avoid fragmenting encoded (checksummed, compressed)
|
||||
* extents too much as they're moved:
|
||||
*/
|
||||
bucket_size = max(bucket_size, opt_get(opts, encoded_extent_max) * 4);
|
||||
|
||||
/* Lower bound to ensure we can fsck: */
|
||||
bucket_size = max(bucket_size, mem_lower_bound);
|
||||
|
||||
u64 perf_lower_bound = min(2ULL << 20, total_fs_size / (1ULL << 20));
|
||||
|
||||
/* We also prefer larger buckets for performance, up to 2MB at 2T */
|
||||
bucket_size = max(bucket_size, perf_lower_bound);
|
||||
|
||||
return bucket_size;
|
||||
}
|
||||
|
||||
void bch2_check_bucket_size(struct bch_opts opts, struct dev_opts *dev)
|
||||
{
|
||||
if (dev->bucket_size < opts.block_size)
|
||||
die("Bucket size (%llu) cannot be smaller than block size (%u)",
|
||||
dev->bucket_size, opts.block_size);
|
||||
if (dev->opts.bucket_size < opts.block_size)
|
||||
die("Bucket size (%u) cannot be smaller than block size (%u)",
|
||||
dev->opts.bucket_size, opts.block_size);
|
||||
|
||||
if (opt_defined(opts, btree_node_size) &&
|
||||
dev->bucket_size < opts.btree_node_size)
|
||||
die("Bucket size (%llu) cannot be smaller than btree node size (%u)",
|
||||
dev->bucket_size, opts.btree_node_size);
|
||||
dev->opts.bucket_size < opts.btree_node_size)
|
||||
die("Bucket size (%u) cannot be smaller than btree node size (%u)",
|
||||
dev->opts.bucket_size, opts.btree_node_size);
|
||||
|
||||
if (dev->nbuckets < BCH_MIN_NR_NBUCKETS)
|
||||
die("Not enough buckets: %llu, need %u (bucket size %llu)",
|
||||
dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->bucket_size);
|
||||
|
||||
if (dev->bucket_size > (u32) U16_MAX << 9)
|
||||
die("Bucket size (%llu) too big (max %u)",
|
||||
dev->bucket_size, (u32) U16_MAX << 9);
|
||||
die("Not enough buckets: %llu, need %u (bucket size %u)",
|
||||
dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->opts.bucket_size);
|
||||
}
|
||||
|
||||
static unsigned parse_target(struct bch_sb_handle *sb,
|
||||
struct dev_opts *devs, size_t nr_devs,
|
||||
dev_opts_list devs,
|
||||
const char *s)
|
||||
{
|
||||
struct dev_opts *i;
|
||||
int idx;
|
||||
|
||||
if (!s)
|
||||
return 0;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
darray_for_each(devs, i)
|
||||
if (!strcmp(s, i->path))
|
||||
return dev_to_target(i - devs);
|
||||
return dev_to_target(i - devs.data);
|
||||
|
||||
idx = bch2_disk_path_find(sb, s);
|
||||
int idx = bch2_disk_path_find(sb, s);
|
||||
if (idx >= 0)
|
||||
return group_to_target(idx);
|
||||
|
||||
@ -144,56 +149,61 @@ static unsigned parse_target(struct bch_sb_handle *sb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bch2_opt_set_sb_all(struct bch_sb *sb, int dev_idx, struct bch_opts *opts)
|
||||
{
|
||||
for (unsigned id = 0; id < bch2_opts_nr; id++) {
|
||||
u64 v = bch2_opt_defined_by_id(opts, id)
|
||||
? bch2_opt_get_by_id(opts, id)
|
||||
: bch2_opt_get_by_id(&bch2_opts_default, id);
|
||||
|
||||
__bch2_opt_set_sb(sb, dev_idx, &bch2_opt_table[id], v);
|
||||
}
|
||||
}
|
||||
|
||||
struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
struct bch_opts fs_opts,
|
||||
struct format_opts opts,
|
||||
struct dev_opts *devs,
|
||||
size_t nr_devs)
|
||||
dev_opts_list devs)
|
||||
{
|
||||
struct bch_sb_handle sb = { NULL };
|
||||
struct dev_opts *i;
|
||||
unsigned max_dev_block_size = 0;
|
||||
unsigned opt_id;
|
||||
u64 min_bucket_size = U64_MAX;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
darray_for_each(devs, i)
|
||||
max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd));
|
||||
|
||||
/* calculate block size: */
|
||||
if (!opt_defined(fs_opts, block_size)) {
|
||||
if (!opt_defined(fs_opts, block_size))
|
||||
opt_set(fs_opts, block_size, max_dev_block_size);
|
||||
} else if (fs_opts.block_size < max_dev_block_size)
|
||||
|
||||
if (fs_opts.block_size < max_dev_block_size)
|
||||
die("blocksize too small: %u, must be greater than device blocksize %u",
|
||||
fs_opts.block_size, max_dev_block_size);
|
||||
|
||||
/* get device size, if it wasn't specified: */
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
if (!i->size)
|
||||
i->size = get_size(i->bdev->bd_fd);
|
||||
darray_for_each(devs, i)
|
||||
if (!i->fs_size)
|
||||
i->fs_size = get_size(i->bdev->bd_fd);
|
||||
|
||||
/* calculate bucket sizes: */
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
min_bucket_size = min(min_bucket_size,
|
||||
i->bucket_size ?: bch2_pick_bucket_size(fs_opts, i));
|
||||
u64 fs_bucket_size = bch2_pick_bucket_size(fs_opts, devs);
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
if (!i->bucket_size)
|
||||
i->bucket_size = min_bucket_size;
|
||||
darray_for_each(devs, i)
|
||||
if (!opt_defined(i->opts, bucket_size))
|
||||
opt_set(i->opts, bucket_size,
|
||||
min(fs_bucket_size, dev_max_bucket_size(i->fs_size)));
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
i->nbuckets = i->size / i->bucket_size;
|
||||
darray_for_each(devs, i) {
|
||||
i->nbuckets = i->fs_size / i->opts.bucket_size;
|
||||
bch2_check_bucket_size(fs_opts, i);
|
||||
}
|
||||
|
||||
/* calculate btree node size: */
|
||||
if (!opt_defined(fs_opts, btree_node_size)) {
|
||||
/* 256k default btree node size */
|
||||
opt_set(fs_opts, btree_node_size, 256 << 10);
|
||||
unsigned s = bch2_opts_default.btree_node_size;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
fs_opts.btree_node_size =
|
||||
min_t(unsigned, fs_opts.btree_node_size,
|
||||
i->bucket_size);
|
||||
darray_for_each(devs, i)
|
||||
s = min(s, i->opts.bucket_size);
|
||||
opt_set(fs_opts, btree_node_size, s);
|
||||
}
|
||||
|
||||
if (uuid_is_null(opts.uuid.b))
|
||||
@ -206,7 +216,8 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
sb.sb->version_min = le16_to_cpu(opts.version);
|
||||
sb.sb->magic = BCHFS_MAGIC;
|
||||
sb.sb->user_uuid = opts.uuid;
|
||||
sb.sb->nr_devices = nr_devs;
|
||||
sb.sb->nr_devices = devs.nr;
|
||||
SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb.sb, opts.version);
|
||||
|
||||
if (opts.version == bcachefs_metadata_version_current)
|
||||
sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
|
||||
@ -218,17 +229,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
opts.label,
|
||||
min(strlen(opts.label), sizeof(sb.sb->label)));
|
||||
|
||||
for (opt_id = 0;
|
||||
opt_id < bch2_opts_nr;
|
||||
opt_id++) {
|
||||
u64 v;
|
||||
|
||||
v = bch2_opt_defined_by_id(&fs_opts, opt_id)
|
||||
? bch2_opt_get_by_id(&fs_opts, opt_id)
|
||||
: bch2_opt_get_by_id(&bch2_opts_default, opt_id);
|
||||
|
||||
__bch2_opt_set_sb(sb.sb, -1, &bch2_opt_table[opt_id], v);
|
||||
}
|
||||
bch2_opt_set_sb_all(sb.sb, -1, &fs_opts);
|
||||
|
||||
struct timespec now;
|
||||
if (clock_gettime(CLOCK_REALTIME, &now))
|
||||
@ -240,31 +241,25 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
/* Member info: */
|
||||
struct bch_sb_field_members_v2 *mi =
|
||||
bch2_sb_field_resize(&sb, members_v2,
|
||||
(sizeof(*mi) + sizeof(struct bch_member) *
|
||||
nr_devs) / sizeof(u64));
|
||||
(sizeof(*mi) + sizeof(struct bch_member) * devs.nr) / sizeof(u64));
|
||||
|
||||
mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
struct bch_member *m = bch2_members_v2_get_mut(sb.sb, (i - devs));
|
||||
darray_for_each(devs, i) {
|
||||
unsigned idx = i - devs.data;
|
||||
struct bch_member *m = bch2_members_v2_get_mut(sb.sb, idx);
|
||||
|
||||
uuid_generate(m->uuid.b);
|
||||
m->nbuckets = cpu_to_le64(i->nbuckets);
|
||||
m->first_bucket = 0;
|
||||
m->bucket_size = cpu_to_le16(i->bucket_size >> 9);
|
||||
|
||||
SET_BCH_MEMBER_DISCARD(m, i->discard);
|
||||
SET_BCH_MEMBER_DATA_ALLOWED(m, i->data_allowed);
|
||||
SET_BCH_MEMBER_DURABILITY(m, i->durability + 1);
|
||||
bch2_opt_set_sb_all(sb.sb, idx, &i->opts);
|
||||
}
|
||||
|
||||
/* Disk labels*/
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
struct bch_member *m;
|
||||
int idx;
|
||||
|
||||
darray_for_each(devs, i) {
|
||||
if (!i->label)
|
||||
continue;
|
||||
|
||||
idx = bch2_disk_path_find_or_create(&sb, i->label);
|
||||
int idx = bch2_disk_path_find_or_create(&sb, i->label);
|
||||
if (idx < 0)
|
||||
die("error creating disk path: %s", strerror(-idx));
|
||||
|
||||
@ -272,18 +267,18 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
* Recompute mi and m after each sb modification: its location
|
||||
* in memory may have changed due to reallocation.
|
||||
*/
|
||||
m = bch2_members_v2_get_mut(sb.sb, (i - devs));
|
||||
struct bch_member *m = bch2_members_v2_get_mut(sb.sb, (i - devs.data));
|
||||
SET_BCH_MEMBER_GROUP(m, idx + 1);
|
||||
}
|
||||
|
||||
SET_BCH_SB_FOREGROUND_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.foreground_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.foreground_target));
|
||||
SET_BCH_SB_BACKGROUND_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.background_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.background_target));
|
||||
SET_BCH_SB_PROMOTE_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.promote_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.promote_target));
|
||||
SET_BCH_SB_METADATA_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.metadata_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.metadata_target));
|
||||
|
||||
/* Crypt: */
|
||||
if (opts.encrypted) {
|
||||
@ -296,17 +291,17 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
|
||||
bch2_sb_members_cpy_v2_v1(&sb);
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
u64 size_sectors = i->size >> 9;
|
||||
darray_for_each(devs, i) {
|
||||
u64 size_sectors = i->fs_size >> 9;
|
||||
|
||||
sb.sb->dev_idx = i - devs;
|
||||
sb.sb->dev_idx = i - devs.data;
|
||||
|
||||
if (!i->sb_offset) {
|
||||
i->sb_offset = BCH_SB_SECTOR;
|
||||
i->sb_end = size_sectors;
|
||||
}
|
||||
|
||||
init_layout(&sb.sb->layout, fs_opts.block_size,
|
||||
bch2_sb_layout_init(&sb.sb->layout, fs_opts.block_size,
|
||||
opts.superblock_size,
|
||||
i->sb_offset, i->sb_end);
|
||||
|
||||
@ -321,7 +316,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
struct bch_sb_layout *l = &sb.sb->layout;
|
||||
u64 backup_sb = size_sectors - (1 << l->sb_max_size_bits);
|
||||
|
||||
backup_sb = rounddown(backup_sb, i->bucket_size >> 9);
|
||||
backup_sb = rounddown(backup_sb, i->opts.bucket_size >> 9);
|
||||
l->sb_offset[l->nr_superblocks++] = cpu_to_le64(backup_sb);
|
||||
}
|
||||
|
||||
@ -352,9 +347,9 @@ void bch2_super_write(int fd, struct bch_sb *sb)
|
||||
if (sb->offset == BCH_SB_SECTOR) {
|
||||
/* Write backup layout */
|
||||
|
||||
BUG_ON(bs > 4096);
|
||||
unsigned buflen = max(bs, 4096);
|
||||
|
||||
char *buf = aligned_alloc(bs, bs);
|
||||
char *buf = aligned_alloc(buflen, buflen);
|
||||
xpread(fd, buf, bs, 4096 - bs);
|
||||
memcpy(buf + bs - sizeof(sb->layout),
|
||||
&sb->layout,
|
||||
@ -411,43 +406,107 @@ void bcache_fs_close(struct bchfs_handle fs)
|
||||
close(fs.sysfs_fd);
|
||||
}
|
||||
|
||||
struct bchfs_handle bcache_fs_open(const char *path)
|
||||
static int bcache_fs_open_by_uuid(const char *uuid_str, struct bchfs_handle *fs)
|
||||
{
|
||||
struct bchfs_handle ret;
|
||||
|
||||
if (!uuid_parse(path, ret.uuid.b)) {
|
||||
/* It's a UUID, look it up in sysfs: */
|
||||
char *sysfs = mprintf(SYSFS_BASE "%s", path);
|
||||
ret.sysfs_fd = xopen(sysfs, O_RDONLY);
|
||||
|
||||
char *minor = read_file_str(ret.sysfs_fd, "minor");
|
||||
char *ctl = mprintf("/dev/bcachefs%s-ctl", minor);
|
||||
ret.ioctl_fd = xopen(ctl, O_RDWR);
|
||||
if (uuid_parse(uuid_str, fs->uuid.b))
|
||||
return -1;
|
||||
|
||||
char *sysfs = mprintf(SYSFS_BASE "%s", uuid_str);
|
||||
fs->sysfs_fd = open(sysfs, O_RDONLY);
|
||||
free(sysfs);
|
||||
|
||||
if (fs->sysfs_fd < 0)
|
||||
return -errno;
|
||||
|
||||
char *minor = read_file_str(fs->sysfs_fd, "minor");
|
||||
char *ctl = mprintf("/dev/bcachefs%s-ctl", minor);
|
||||
fs->ioctl_fd = open(ctl, O_RDWR);
|
||||
free(minor);
|
||||
free(ctl);
|
||||
} else {
|
||||
|
||||
return fs->ioctl_fd < 0 ? -errno : 0;
|
||||
}
|
||||
|
||||
int bcache_fs_open_fallible(const char *path, struct bchfs_handle *fs)
|
||||
{
|
||||
memset(fs, 0, sizeof(*fs));
|
||||
fs->dev_idx = -1;
|
||||
|
||||
if (!uuid_parse(path, fs->uuid.b))
|
||||
return bcache_fs_open_by_uuid(path, fs);
|
||||
|
||||
/* It's a path: */
|
||||
ret.ioctl_fd = open(path, O_RDONLY);
|
||||
if (ret.ioctl_fd < 0)
|
||||
die("Error opening filesystem at %s: %m", path);
|
||||
int path_fd = open(path, O_RDONLY);
|
||||
if (path_fd < 0)
|
||||
return -errno;
|
||||
|
||||
struct bch_ioctl_query_uuid uuid;
|
||||
if (ioctl(ret.ioctl_fd, BCH_IOCTL_QUERY_UUID, &uuid) < 0)
|
||||
die("error opening %s: not a bcachefs filesystem", path);
|
||||
if (!ioctl(path_fd, BCH_IOCTL_QUERY_UUID, &uuid)) {
|
||||
/* It's a path to the mounted filesystem: */
|
||||
fs->ioctl_fd = path_fd;
|
||||
|
||||
ret.uuid = uuid.uuid;
|
||||
fs->uuid = uuid.uuid;
|
||||
|
||||
char uuid_str[40];
|
||||
uuid_unparse(uuid.uuid.b, uuid_str);
|
||||
|
||||
char *sysfs = mprintf(SYSFS_BASE "%s", uuid_str);
|
||||
ret.sysfs_fd = xopen(sysfs, O_RDONLY);
|
||||
fs->sysfs_fd = xopen(sysfs, O_RDONLY);
|
||||
free(sysfs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
char buf[1024], *uuid_str;
|
||||
|
||||
struct stat stat = xstat(path);
|
||||
close(path_fd);
|
||||
|
||||
if (S_ISBLK(stat.st_mode)) {
|
||||
char *sysfs = mprintf("/sys/dev/block/%u:%u/bcachefs",
|
||||
major(stat.st_rdev),
|
||||
minor(stat.st_rdev));
|
||||
|
||||
ssize_t len = readlink(sysfs, buf, sizeof(buf));
|
||||
free(sysfs);
|
||||
|
||||
if (len <= 0)
|
||||
goto read_super;
|
||||
|
||||
char *p = strrchr(buf, '/');
|
||||
if (!p || sscanf(p + 1, "dev-%u", &fs->dev_idx) != 1)
|
||||
die("error parsing sysfs");
|
||||
|
||||
*p = '\0';
|
||||
p = strrchr(buf, '/');
|
||||
uuid_str = p + 1;
|
||||
} else {
|
||||
read_super:
|
||||
opt_set(opts, noexcl, true);
|
||||
opt_set(opts, nochanges, true);
|
||||
|
||||
struct bch_sb_handle sb;
|
||||
int ret = bch2_read_super(path, &opts, &sb);
|
||||
if (ret)
|
||||
die("Error opening %s: %s", path, strerror(-ret));
|
||||
|
||||
fs->dev_idx = sb.sb->dev_idx;
|
||||
uuid_str = buf;
|
||||
uuid_unparse(sb.sb->user_uuid.b, uuid_str);
|
||||
|
||||
bch2_free_super(&sb);
|
||||
}
|
||||
|
||||
return bcache_fs_open_by_uuid(uuid_str, fs);
|
||||
}
|
||||
|
||||
struct bchfs_handle bcache_fs_open(const char *path)
|
||||
{
|
||||
struct bchfs_handle fs;
|
||||
int ret = bcache_fs_open_fallible(path, &fs);
|
||||
if (ret)
|
||||
die("Error opening filesystem at %s: %s", path, strerror(-ret));
|
||||
return fs;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -523,7 +582,7 @@ int bchu_data(struct bchfs_handle fs, struct bch_ioctl_data cmd)
|
||||
if (e.type)
|
||||
continue;
|
||||
|
||||
if (e.p.data_type == U8_MAX)
|
||||
if (e.ret || e.p.data_type == U8_MAX)
|
||||
break;
|
||||
|
||||
printf("\33[2K\r");
|
||||
@ -554,6 +613,8 @@ int bchu_data(struct bchfs_handle fs, struct bch_ioctl_data cmd)
|
||||
|
||||
/* option parsing */
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
void bch2_opt_strs_free(struct bch_opt_strs *opts)
|
||||
{
|
||||
unsigned i;
|
||||
@ -564,6 +625,64 @@ void bch2_opt_strs_free(struct bch_opt_strs *opts)
|
||||
}
|
||||
}
|
||||
|
||||
static bool opt_type_filter(const struct bch_option *opt, unsigned opt_types)
|
||||
{
|
||||
if (!(opt->flags & opt_types))
|
||||
return false;
|
||||
|
||||
if ((opt_types & OPT_FORMAT) &&
|
||||
!opt->set_sb && !opt->set_member)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const struct bch_option *bch2_cmdline_opt_parse(int argc, char *argv[],
|
||||
unsigned opt_types)
|
||||
{
|
||||
if (optind >= argc)
|
||||
return NULL;
|
||||
|
||||
if (argv[optind][0] != '-' ||
|
||||
argv[optind][1] != '-')
|
||||
return NULL;
|
||||
|
||||
char *optstr = strdup(argv[optind] + 2);
|
||||
optarg = argv[optind + 1];
|
||||
|
||||
char *eq = strchr(optstr, '=');
|
||||
if (eq) {
|
||||
*eq = '\0';
|
||||
optarg = eq + 1;
|
||||
}
|
||||
|
||||
if (!optarg)
|
||||
optarg = "1";
|
||||
|
||||
|
||||
int optid = bch2_opt_lookup(optstr);
|
||||
if (optid < 0)
|
||||
goto noopt;
|
||||
|
||||
const struct bch_option *opt = bch2_opt_table + optid;
|
||||
if (!opt_type_filter(opt, opt_types))
|
||||
goto noopt;
|
||||
|
||||
optind++;
|
||||
|
||||
if (opt->type != BCH_OPT_BOOL) {
|
||||
if (optarg == argv[optind])
|
||||
optind++;
|
||||
} else {
|
||||
optarg = NULL;
|
||||
}
|
||||
|
||||
return opt;
|
||||
noopt:
|
||||
free(optstr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct bch_opt_strs bch2_cmdline_opts_get(int *argc, char *argv[],
|
||||
unsigned opt_types)
|
||||
{
|
||||
@ -658,12 +777,10 @@ void bch2_opts_usage(unsigned opt_types)
|
||||
const struct bch_option *opt;
|
||||
unsigned i, c = 0, helpcol = 30;
|
||||
|
||||
|
||||
|
||||
for (opt = bch2_opt_table;
|
||||
opt < bch2_opt_table + bch2_opts_nr;
|
||||
opt++) {
|
||||
if (!(opt->flags & opt_types))
|
||||
if (!opt_type_filter(opt, opt_types))
|
||||
continue;
|
||||
|
||||
c += printf(" --%s", opt->attr.name);
|
||||
@ -733,6 +850,8 @@ dev_names bchu_fs_get_devices(struct bchfs_handle fs)
|
||||
if (r > 0) {
|
||||
sysfs_block_buf[r] = '\0';
|
||||
n.dev = strdup(basename(sysfs_block_buf));
|
||||
} else {
|
||||
n.dev = mprintf("(offline dev %u)", n.idx);
|
||||
}
|
||||
|
||||
free(block_attr);
|
||||
@ -752,3 +871,11 @@ dev_names bchu_fs_get_devices(struct bchfs_handle fs)
|
||||
|
||||
return devs;
|
||||
}
|
||||
|
||||
struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx)
|
||||
{
|
||||
darray_for_each(*dev_names, dev)
|
||||
if (dev->idx == idx)
|
||||
return dev;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -28,6 +28,9 @@ struct {
|
||||
};
|
||||
|
||||
void bch2_opt_strs_free(struct bch_opt_strs *);
|
||||
|
||||
const struct bch_option *bch2_cmdline_opt_parse(int argc, char *argv[],
|
||||
unsigned opt_types);
|
||||
struct bch_opt_strs bch2_cmdline_opts_get(int *, char *[], unsigned);
|
||||
struct bch_opts bch2_parse_opts(struct bch_opt_strs);
|
||||
void bch2_opts_usage(unsigned);
|
||||
@ -42,14 +45,24 @@ struct format_opts {
|
||||
char *source;
|
||||
};
|
||||
|
||||
static inline unsigned bcachefs_kernel_version(void)
|
||||
{
|
||||
return !access("/sys/module/bcachefs/parameters/version", R_OK)
|
||||
? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
|
||||
: 0;
|
||||
}
|
||||
|
||||
static inline struct format_opts format_opts_default()
|
||||
{
|
||||
unsigned version = !access( "/sys/module/bcachefs/parameters/version", R_OK)
|
||||
? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
|
||||
: bcachefs_metadata_version_current;
|
||||
/*
|
||||
* Ensure bcachefs module is loaded so we know the supported on disk
|
||||
* format version:
|
||||
*/
|
||||
system("modprobe bcachefs > /dev/null 2>&1");
|
||||
|
||||
return (struct format_opts) {
|
||||
.version = version,
|
||||
.version = bcachefs_kernel_version() ?:
|
||||
bcachefs_metadata_version_current,
|
||||
.superblock_size = SUPERBLOCK_SIZE_DEFAULT,
|
||||
};
|
||||
}
|
||||
@ -58,33 +71,35 @@ struct dev_opts {
|
||||
struct file *file;
|
||||
struct block_device *bdev;
|
||||
char *path;
|
||||
u64 size; /* bytes*/
|
||||
u64 bucket_size; /* bytes */
|
||||
const char *label;
|
||||
unsigned data_allowed;
|
||||
unsigned durability;
|
||||
bool discard;
|
||||
|
||||
u64 nbuckets;
|
||||
|
||||
u64 sb_offset;
|
||||
u64 sb_end;
|
||||
|
||||
u64 nbuckets;
|
||||
u64 fs_size;
|
||||
|
||||
const char *label; /* make this a bch_opt */
|
||||
|
||||
struct bch_opts opts;
|
||||
};
|
||||
|
||||
typedef DARRAY(struct dev_opts) dev_opts_list;
|
||||
|
||||
static inline struct dev_opts dev_opts_default()
|
||||
{
|
||||
return (struct dev_opts) {
|
||||
.data_allowed = ~0U << 2,
|
||||
.durability = 1,
|
||||
};
|
||||
return (struct dev_opts) { .opts = bch2_opts_empty() };
|
||||
}
|
||||
|
||||
u64 bch2_pick_bucket_size(struct bch_opts, struct dev_opts *);
|
||||
void bch2_sb_layout_init(struct bch_sb_layout *,
|
||||
unsigned, unsigned, u64, u64);
|
||||
|
||||
u64 bch2_pick_bucket_size(struct bch_opts, dev_opts_list);
|
||||
void bch2_check_bucket_size(struct bch_opts, struct dev_opts *);
|
||||
|
||||
struct bch_sb *bch2_format(struct bch_opt_strs,
|
||||
struct bch_opts,
|
||||
struct format_opts, struct dev_opts *, size_t);
|
||||
struct format_opts,
|
||||
dev_opts_list devs);
|
||||
|
||||
void bch2_super_write(int, struct bch_sb *);
|
||||
struct bch_sb *__bch2_super_read(int, u64);
|
||||
@ -97,11 +112,16 @@ struct bchfs_handle {
|
||||
__uuid_t uuid;
|
||||
int ioctl_fd;
|
||||
int sysfs_fd;
|
||||
int dev_idx;
|
||||
};
|
||||
|
||||
void bcache_fs_close(struct bchfs_handle);
|
||||
|
||||
int bcache_fs_open_fallible(const char *, struct bchfs_handle *);
|
||||
|
||||
struct bchfs_handle bcache_fs_open(const char *);
|
||||
struct bchfs_handle bchu_fs_open_by_dev(const char *, int *);
|
||||
|
||||
int bchu_dev_path_to_idx(struct bchfs_handle, const char *);
|
||||
|
||||
static inline void bchu_disk_add(struct bchfs_handle fs, char *dev)
|
||||
@ -296,5 +316,6 @@ struct dev_name {
|
||||
typedef DARRAY(struct dev_name) dev_names;
|
||||
|
||||
dev_names bchu_fs_get_devices(struct bchfs_handle);
|
||||
struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx);
|
||||
|
||||
#endif /* _LIBBCACHE_H */
|
||||
|
@ -1,12 +1,14 @@
|
||||
#include <dirent.h>
|
||||
#include <sys/xattr.h>
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/xattr.h>
|
||||
|
||||
#include "posix_to_bcachefs.h"
|
||||
#include "libbcachefs/alloc_foreground.h"
|
||||
#include "libbcachefs/buckets.h"
|
||||
#include "libbcachefs/fs-common.h"
|
||||
#include "libbcachefs/io_write.h"
|
||||
#include "libbcachefs/namei.h"
|
||||
#include "libbcachefs/str_hash.h"
|
||||
#include "libbcachefs/xattr.h"
|
||||
|
||||
@ -32,7 +34,7 @@ void create_link(struct bch_fs *c,
|
||||
struct bch_inode_unpacked parent_u;
|
||||
struct bch_inode_unpacked inode;
|
||||
|
||||
int ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
int ret = bch2_trans_commit_do(c, NULL, NULL, 0,
|
||||
bch2_link_trans(trans,
|
||||
(subvol_inum) { 1, parent->bi_inum }, &parent_u,
|
||||
(subvol_inum) { 1, inum }, &inode, &qstr));
|
||||
@ -51,7 +53,7 @@ struct bch_inode_unpacked create_file(struct bch_fs *c,
|
||||
|
||||
bch2_inode_init_early(c, &new_inode);
|
||||
|
||||
int ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
int ret = bch2_trans_commit_do(c, NULL, NULL, 0,
|
||||
bch2_create_trans(trans,
|
||||
(subvol_inum) { 1, parent->bi_inum }, parent,
|
||||
&new_inode, &qstr,
|
||||
@ -125,7 +127,7 @@ void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
if (IS_ERR(h))
|
||||
continue;
|
||||
|
||||
int ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
int ret = bch2_trans_commit_do(c, NULL, NULL, 0,
|
||||
bch2_xattr_set(trans,
|
||||
(subvol_inum) { 1, dst->bi_inum },
|
||||
dst, &hash_info, attr,
|
||||
@ -158,7 +160,7 @@ static void write_data(struct bch_fs *c,
|
||||
op.nr_replicas = 1;
|
||||
op.subvol = 1;
|
||||
op.pos = SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX);
|
||||
op.flags |= BCH_WRITE_SYNC;
|
||||
op.flags |= BCH_WRITE_sync;
|
||||
|
||||
int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
|
||||
c->opts.data_replicas, 0);
|
||||
@ -167,7 +169,7 @@ static void write_data(struct bch_fs *c,
|
||||
|
||||
closure_call(&op.cl, bch2_write, NULL, NULL);
|
||||
|
||||
BUG_ON(!(op.flags & BCH_WRITE_SUBMITTED));
|
||||
BUG_ON(!(op.flags & BCH_WRITE_submitted));
|
||||
dst_inode->bi_sectors += len >> 9;
|
||||
|
||||
if (op.error)
|
||||
@ -263,7 +265,8 @@ void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
|
||||
static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
int src_fd, u64 src_size,
|
||||
char *src_path, struct copy_fs_state *s)
|
||||
char *src_path, struct copy_fs_state *s,
|
||||
u64 reserve_start)
|
||||
{
|
||||
struct fiemap_iter iter;
|
||||
struct fiemap_extent e;
|
||||
@ -294,11 +297,8 @@ static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* if the data is below 1 MB, copy it so it doesn't conflict
|
||||
* with bcachefs's potentially larger superblock:
|
||||
*/
|
||||
if (e.fe_physical < 1 << 20) {
|
||||
/* If the data is in bcachefs's superblock region, copy it: */
|
||||
if (e.fe_physical < reserve_start) {
|
||||
copy_data(c, dst, src_fd, e.fe_logical,
|
||||
e.fe_logical + min(src_size - e.fe_logical,
|
||||
e.fe_length));
|
||||
@ -314,15 +314,34 @@ static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
fiemap_iter_exit(&iter);
|
||||
}
|
||||
|
||||
static int dirent_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct dirent *l = _l;
|
||||
const struct dirent *r = _r;
|
||||
|
||||
return strcmp(l->d_name, r->d_name);
|
||||
}
|
||||
|
||||
static void copy_dir(struct copy_fs_state *s,
|
||||
struct bch_fs *c,
|
||||
struct bch_inode_unpacked *dst,
|
||||
int src_fd, const char *src_path)
|
||||
int src_fd, const char *src_path,
|
||||
u64 reserve_start)
|
||||
{
|
||||
DIR *dir = fdopendir(src_fd);
|
||||
struct dirent *d;
|
||||
DARRAY(struct dirent) dirents = {};
|
||||
|
||||
while ((errno = 0), (d = readdir(dir))) {
|
||||
darray_push(&dirents, *d);
|
||||
}
|
||||
|
||||
if (errno)
|
||||
die("readdir error: %m");
|
||||
|
||||
sort(dirents.data, dirents.nr, sizeof(dirents.data[0]), dirent_cmp, NULL);
|
||||
|
||||
darray_for_each(dirents, d) {
|
||||
struct bch_inode_unpacked inode;
|
||||
int fd;
|
||||
|
||||
@ -368,7 +387,7 @@ static void copy_dir(struct copy_fs_state *s,
|
||||
switch (mode_to_type(stat.st_mode)) {
|
||||
case DT_DIR:
|
||||
fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
|
||||
copy_dir(s, c, &inode, fd, child_path);
|
||||
copy_dir(s, c, &inode, fd, child_path, reserve_start);
|
||||
close(fd);
|
||||
break;
|
||||
case DT_REG:
|
||||
@ -376,7 +395,7 @@ static void copy_dir(struct copy_fs_state *s,
|
||||
|
||||
fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
|
||||
copy_file(c, &inode, fd, stat.st_size,
|
||||
child_path, s);
|
||||
child_path, s, reserve_start);
|
||||
close(fd);
|
||||
break;
|
||||
case DT_LNK:
|
||||
@ -401,14 +420,14 @@ next:
|
||||
free(child_path);
|
||||
}
|
||||
|
||||
if (errno)
|
||||
die("readdir error: %m");
|
||||
darray_exit(&dirents);
|
||||
closedir(dir);
|
||||
}
|
||||
|
||||
static void reserve_old_fs_space(struct bch_fs *c,
|
||||
struct bch_inode_unpacked *root_inode,
|
||||
ranges *extents)
|
||||
ranges *extents,
|
||||
u64 reserve_start)
|
||||
{
|
||||
struct bch_dev *ca = c->devs[0];
|
||||
struct bch_inode_unpacked dst;
|
||||
@ -421,14 +440,20 @@ static void reserve_old_fs_space(struct bch_fs *c,
|
||||
|
||||
ranges_sort_merge(extents);
|
||||
|
||||
for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
|
||||
link_data(c, &dst, i.start, i.start, i.end - i.start);
|
||||
for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
|
||||
if (i.end <= reserve_start)
|
||||
continue;
|
||||
|
||||
u64 start = max(i.start, reserve_start);
|
||||
|
||||
link_data(c, &dst, start, start, i.end - start);
|
||||
}
|
||||
|
||||
update_inode(c, &dst);
|
||||
}
|
||||
|
||||
void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
struct copy_fs_state *s)
|
||||
struct copy_fs_state *s, u64 reserve_start)
|
||||
{
|
||||
syncfs(src_fd);
|
||||
|
||||
@ -447,10 +472,10 @@ void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
|
||||
|
||||
/* now, copy: */
|
||||
copy_dir(s, c, &root_inode, src_fd, src_path);
|
||||
copy_dir(s, c, &root_inode, src_fd, src_path, reserve_start);
|
||||
|
||||
if (BCH_MIGRATE_migrate == s->type)
|
||||
reserve_old_fs_space(c, &root_inode, &s->extents);
|
||||
reserve_old_fs_space(c, &root_inode, &s->extents, reserve_start);
|
||||
|
||||
update_inode(c, &root_inode);
|
||||
|
||||
|
@ -50,5 +50,5 @@ struct copy_fs_state {
|
||||
* initialized (`hardlinks` is initialized with zeroes).
|
||||
*/
|
||||
void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
struct copy_fs_state *s);
|
||||
struct copy_fs_state *s, u64);
|
||||
#endif /* _LIBBCACHE_H */
|
||||
|
@ -185,6 +185,24 @@ unsigned get_blocksize(int fd)
|
||||
/* Open a block device, do magic blkid stuff to probe for existing filesystems: */
|
||||
int open_for_format(struct dev_opts *dev, bool force)
|
||||
{
|
||||
int blkid_version_code = blkid_get_library_version(NULL, NULL);
|
||||
if (blkid_version_code < 2401) {
|
||||
if (force) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"Continuing with out of date libblkid %s because --force was passed.\n",
|
||||
BLKID_VERSION);
|
||||
} else {
|
||||
// Reference for picking 2.40.1:
|
||||
// https://mirrors.edge.kernel.org/pub/linux/utils/util-linux/v2.40/v2.40.1-ReleaseNotes
|
||||
// https://github.com/util-linux/util-linux/issues/3103
|
||||
die(
|
||||
"Refusing to format when using libblkid %s\n"
|
||||
"libblkid >= 2.40.1 is required to check for existing filesystems\n"
|
||||
"Earlier versions may not recognize some bcachefs filesystems.\n", BLKID_VERSION);
|
||||
}
|
||||
}
|
||||
|
||||
blkid_probe pr;
|
||||
const char *fs_type = NULL, *fs_label = NULL;
|
||||
size_t fs_type_len, fs_label_len;
|
||||
@ -708,6 +726,28 @@ struct bbpos_range bbpos_range_parse(char *buf)
|
||||
return (struct bbpos_range) { .start = start, .end = end };
|
||||
}
|
||||
|
||||
unsigned version_parse(char *buf)
|
||||
{
|
||||
char *s = buf;
|
||||
char *major_str = strsep(&s, ".");
|
||||
char *minor_str = strsep(&s, ".");
|
||||
|
||||
unsigned major, minor;
|
||||
|
||||
if (!minor_str) {
|
||||
major = 0;
|
||||
if (kstrtouint(major_str, 10, &minor))
|
||||
die("invalid version %s", buf);
|
||||
} else {
|
||||
|
||||
if (kstrtouint(major_str, 10, &major) ||
|
||||
kstrtouint(minor_str, 10, &minor))
|
||||
die("invalid version %s", buf);
|
||||
}
|
||||
|
||||
return BCH_VERSION(major, minor);
|
||||
}
|
||||
|
||||
darray_str get_or_split_cmdline_devs(int argc, char *argv[])
|
||||
{
|
||||
darray_str ret = {};
|
||||
|
@ -174,6 +174,8 @@ struct fiemap_extent fiemap_iter_next(struct fiemap_iter *);
|
||||
|
||||
char *strcmp_prefix(char *, const char *);
|
||||
|
||||
/* Avoid conflicts with libblkid's crc32 function in static builds */
|
||||
#define crc32c bch_crc32c
|
||||
u32 crc32c(u32, const void *, size_t);
|
||||
|
||||
char *dev_to_name(dev_t);
|
||||
@ -207,6 +209,8 @@ struct bbpos_range {
|
||||
|
||||
struct bbpos_range bbpos_range_parse(char *);
|
||||
|
||||
unsigned version_parse(char *);
|
||||
|
||||
darray_str get_or_split_cmdline_devs(int argc, char *argv[]);
|
||||
|
||||
#endif /* _TOOLS_UTIL_H */
|
||||
|
117
flake.lock
generated
117
flake.lock
generated
@ -1,17 +1,12 @@
|
||||
{
|
||||
"nodes": {
|
||||
"crane": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1721842668,
|
||||
"narHash": "sha256-k3oiD2z2AAwBFLa4+xfU+7G5fisRXfkvrMTCJrjZzXo=",
|
||||
"lastModified": 1742394900,
|
||||
"narHash": "sha256-vVOAp9ahvnU+fQoKd4SEXB2JG2wbENkpqcwlkIXgUC0=",
|
||||
"owner": "ipetkov",
|
||||
"repo": "crane",
|
||||
"rev": "529c1a0b1f29f0d78fa3086b8f6a134c71ef3aaf",
|
||||
"rev": "70947c1908108c0c551ddfd73d4f750ff2ea67cd",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -20,35 +15,14 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"fenix": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
],
|
||||
"rust-analyzer-src": "rust-analyzer-src"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722320953,
|
||||
"narHash": "sha256-DfGaJtgrzcwPQYLTvjL1KaVIjpvi85b2MpM6yEGvJzM=",
|
||||
"owner": "nix-community",
|
||||
"repo": "fenix",
|
||||
"rev": "483df76def3e5010d709aa3a0418ba2088503994",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-community",
|
||||
"repo": "fenix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-compat": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1696426674,
|
||||
"narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
|
||||
"lastModified": 1733328505,
|
||||
"narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=",
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
|
||||
"rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -62,11 +36,11 @@
|
||||
"nixpkgs-lib": "nixpkgs-lib"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1719994518,
|
||||
"narHash": "sha256-pQMhCCHyQGRzdfAkdJ4cIWiw+JNuWsTX7f0ZYSyz0VY=",
|
||||
"lastModified": 1741352980,
|
||||
"narHash": "sha256-+u2UunDA4Cl5Fci3m7S643HzKmIDAe+fiXrLqYsR2fs=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "flake-parts",
|
||||
"rev": "9227223f6d922fee3c7b190b2cc238a99527bbb7",
|
||||
"rev": "f4330d22f1c5d2ba72d3d22df5597d123fdb60a9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -75,13 +49,33 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nix-github-actions": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1737420293,
|
||||
"narHash": "sha256-F1G5ifvqTpJq7fdkT34e/Jy9VCyzd5XfJ9TO8fHhJWE=",
|
||||
"owner": "nix-community",
|
||||
"repo": "nix-github-actions",
|
||||
"rev": "f4158fa080ef4503c8f4c820967d946c2af31ec9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-community",
|
||||
"repo": "nix-github-actions",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1722185531,
|
||||
"narHash": "sha256-veKR07psFoJjINLC8RK4DiLniGGMgF3QMlS4tb74S6k=",
|
||||
"lastModified": 1742422364,
|
||||
"narHash": "sha256-mNqIplmEohk5jRkqYqG19GA8MbQ/D4gQSK0Mu4LvfRQ=",
|
||||
"owner": "nixos",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "52ec9ac3b12395ad677e8b62106f0b98c1f8569d",
|
||||
"rev": "a84ebe20c6bc2ecbcfb000a50776219f48d134cc",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -93,40 +87,47 @@
|
||||
},
|
||||
"nixpkgs-lib": {
|
||||
"locked": {
|
||||
"lastModified": 1719876945,
|
||||
"narHash": "sha256-Fm2rDDs86sHy0/1jxTOKB1118Q0O3Uc7EC0iXvXKpbI=",
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/5daf0514482af3f97abaefc78a6606365c9108e2.tar.gz"
|
||||
"lastModified": 1740877520,
|
||||
"narHash": "sha256-oiwv/ZK/2FhGxrCkQkB83i7GnWXPPLzoqFHpDD3uYpk=",
|
||||
"owner": "nix-community",
|
||||
"repo": "nixpkgs.lib",
|
||||
"rev": "147dee35aab2193b174e4c0868bd80ead5ce755c",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/5daf0514482af3f97abaefc78a6606365c9108e2.tar.gz"
|
||||
"owner": "nix-community",
|
||||
"repo": "nixpkgs.lib",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"crane": "crane",
|
||||
"fenix": "fenix",
|
||||
"flake-compat": "flake-compat",
|
||||
"flake-parts": "flake-parts",
|
||||
"nix-github-actions": "nix-github-actions",
|
||||
"nixpkgs": "nixpkgs",
|
||||
"rust-overlay": "rust-overlay",
|
||||
"treefmt-nix": "treefmt-nix"
|
||||
}
|
||||
},
|
||||
"rust-analyzer-src": {
|
||||
"flake": false,
|
||||
"rust-overlay": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722262053,
|
||||
"narHash": "sha256-KxjkPVn9rQqYam6DhiN/V2NcMXtYW25maxkJoiVMpmE=",
|
||||
"owner": "rust-lang",
|
||||
"repo": "rust-analyzer",
|
||||
"rev": "a021b85be57d34b1eed687fcafd5d5ec64b2d853",
|
||||
"lastModified": 1742524367,
|
||||
"narHash": "sha256-KzTwk/5ETJavJZYV1DEWdCx05M4duFCxCpRbQSKWpng=",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"rev": "70bf752d176b2ce07417e346d85486acea9040ef",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "rust-lang",
|
||||
"ref": "nightly",
|
||||
"repo": "rust-analyzer",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
@ -137,11 +138,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722330636,
|
||||
"narHash": "sha256-uru7JzOa33YlSRwf9sfXpJG+UAV+bnBEYMjrzKrQZFw=",
|
||||
"lastModified": 1742370146,
|
||||
"narHash": "sha256-XRE8hL4vKIQyVMDXykFh4ceo3KSpuJF3ts8GKwh5bIU=",
|
||||
"owner": "numtide",
|
||||
"repo": "treefmt-nix",
|
||||
"rev": "768acdb06968e53aa1ee8de207fd955335c754b7",
|
||||
"rev": "adc195eef5da3606891cedf80c0d9ce2d3190808",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
197
flake.nix
197
flake.nix
@ -11,13 +11,10 @@
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
crane = {
|
||||
url = "github:ipetkov/crane";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
crane.url = "github:ipetkov/crane";
|
||||
|
||||
fenix = {
|
||||
url = "github:nix-community/fenix";
|
||||
rust-overlay = {
|
||||
url = "github:oxalica/rust-overlay";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
@ -25,6 +22,11 @@
|
||||
url = "github:edolstra/flake-compat";
|
||||
flake = false;
|
||||
};
|
||||
|
||||
nix-github-actions = {
|
||||
url = "github:nix-community/nix-github-actions";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
};
|
||||
|
||||
outputs =
|
||||
@ -33,27 +35,31 @@
|
||||
nixpkgs,
|
||||
flake-parts,
|
||||
treefmt-nix,
|
||||
fenix,
|
||||
crane,
|
||||
...
|
||||
rust-overlay,
|
||||
flake-compat,
|
||||
nix-github-actions,
|
||||
}:
|
||||
let
|
||||
systems = nixpkgs.lib.filter (s: nixpkgs.lib.hasSuffix "-linux" s) nixpkgs.lib.systems.flakeExposed;
|
||||
in
|
||||
flake-parts.lib.mkFlake { inherit inputs; } {
|
||||
imports = [ inputs.treefmt-nix.flakeModule ];
|
||||
|
||||
# can be extended, but these have proper binary cache support in nixpkgs
|
||||
# as of writing.
|
||||
systems = [
|
||||
"aarch64-linux"
|
||||
"x86_64-linux"
|
||||
"i686-linux"
|
||||
];
|
||||
flake = {
|
||||
githubActions = nix-github-actions.lib.mkGithubMatrix {
|
||||
# github actions supports fewer architectures
|
||||
checks = nixpkgs.lib.getAttrs [ "aarch64-linux" "x86_64-linux" ] self.checks;
|
||||
};
|
||||
};
|
||||
|
||||
inherit systems;
|
||||
|
||||
perSystem =
|
||||
{
|
||||
self',
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
system,
|
||||
...
|
||||
}:
|
||||
@ -62,24 +68,63 @@
|
||||
inherit (lib.lists) findFirst;
|
||||
inherit (lib.strings) hasPrefix removePrefix substring;
|
||||
|
||||
pkgs = import nixpkgs {
|
||||
inherit system;
|
||||
overlays = [ (import rust-overlay) ];
|
||||
};
|
||||
|
||||
cargoToml = builtins.fromTOML (builtins.readFile ./Cargo.toml);
|
||||
rustfmtToml = builtins.fromTOML (builtins.readFile ./rustfmt.toml);
|
||||
|
||||
craneLib = crane.mkLib pkgs;
|
||||
|
||||
rev = self.shortRev or self.dirtyShortRev or (substring 0 8 self.lastModifiedDate);
|
||||
makefileVersion = removePrefix "VERSION=" (
|
||||
findFirst (line: hasPrefix "VERSION=" line) "VERSION=0.0.0" (split "\n" (readFile ./Makefile))
|
||||
);
|
||||
version = "${makefileVersion}+${rev}";
|
||||
|
||||
commonArgs = {
|
||||
mkCommon =
|
||||
{
|
||||
crane,
|
||||
pkgs,
|
||||
rustVersion ? "latest",
|
||||
|
||||
# build time
|
||||
buildPackages,
|
||||
pkg-config,
|
||||
rustPlatform,
|
||||
stdenv,
|
||||
|
||||
# run time
|
||||
keyutils,
|
||||
libaio,
|
||||
libsodium,
|
||||
liburcu,
|
||||
libuuid,
|
||||
lz4,
|
||||
udev,
|
||||
zlib,
|
||||
zstd,
|
||||
}:
|
||||
let
|
||||
inherit (stdenv) cc hostPlatform;
|
||||
|
||||
craneLib = (crane.mkLib pkgs).overrideToolchain (
|
||||
p: p.rust-bin.stable."${rustVersion}".minimal.override { extensions = [ "clippy" ]; }
|
||||
);
|
||||
|
||||
args = {
|
||||
inherit version;
|
||||
src = self;
|
||||
strictDeps = true;
|
||||
|
||||
env = {
|
||||
PKG_CONFIG_SYSTEMD_SYSTEMDSYSTEMUNITDIR = "${placeholder "out"}/lib/systemd/system";
|
||||
PKG_CONFIG_UDEV_UDEVDIR = "${placeholder "out"}/lib/udev";
|
||||
|
||||
CARGO_BUILD_TARGET = hostPlatform.rust.rustcTargetSpec;
|
||||
"CARGO_TARGET_${hostPlatform.rust.cargoEnvVarTarget}_LINKER" = "${cc.targetPrefix}cc";
|
||||
HOST_CC = "${cc.nativePrefix}cc";
|
||||
TARGET_CC = "${cc.targetPrefix}cc";
|
||||
};
|
||||
|
||||
makeFlags = [
|
||||
@ -90,13 +135,16 @@
|
||||
|
||||
dontStrip = true;
|
||||
|
||||
nativeBuildInputs = with pkgs; [
|
||||
depsBuildBuild = [
|
||||
buildPackages.stdenv.cc
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
pkg-config
|
||||
rustPlatform.bindgenHook
|
||||
];
|
||||
|
||||
buildInputs = with pkgs; [
|
||||
attr
|
||||
buildInputs = [
|
||||
keyutils
|
||||
libaio
|
||||
libsodium
|
||||
@ -115,19 +163,26 @@
|
||||
};
|
||||
};
|
||||
|
||||
cargoArtifacts = craneLib.buildDepsOnly (commonArgs // { pname = cargoToml.package.name; });
|
||||
cargoArtifacts = craneLib.buildDepsOnly args;
|
||||
in
|
||||
{
|
||||
packages.default = config.packages.bcachefs-tools;
|
||||
packages.bcachefs-tools = craneLib.buildPackage (
|
||||
commonArgs
|
||||
inherit args cargoArtifacts craneLib;
|
||||
};
|
||||
common = pkgs.callPackage mkCommon { inherit crane; };
|
||||
|
||||
mkPackage =
|
||||
{ common, name }:
|
||||
common.craneLib.buildPackage (
|
||||
common.args
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
inherit (common) cargoArtifacts;
|
||||
pname = name;
|
||||
|
||||
enableParallelBuilding = true;
|
||||
buildPhaseCargoCommand = ''
|
||||
make ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}} $makeFlags
|
||||
'';
|
||||
doNotPostBuildInstallCargoBinaries = true;
|
||||
installPhaseCommand = ''
|
||||
make ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}} $makeFlags install
|
||||
'';
|
||||
@ -143,26 +198,66 @@
|
||||
}
|
||||
);
|
||||
|
||||
packages.bcachefs-tools-fuse = config.packages.bcachefs-tools.overrideAttrs (
|
||||
mkPackages =
|
||||
name: systems:
|
||||
let
|
||||
packagesForSystem =
|
||||
crossSystem:
|
||||
let
|
||||
localSystem = system;
|
||||
pkgs' = import nixpkgs {
|
||||
inherit crossSystem localSystem;
|
||||
overlays = [ (import rust-overlay) ];
|
||||
};
|
||||
|
||||
common = pkgs'.callPackage mkCommon { inherit crane; };
|
||||
package = pkgs'.callPackage mkPackage { inherit common name; };
|
||||
packageFuse = package.overrideAttrs (
|
||||
final: prev: {
|
||||
makeFlags = prev.makeFlags ++ [ "BCACHEFS_FUSE=1" ];
|
||||
buildInputs = prev.buildInputs ++ [ pkgs.fuse3 ];
|
||||
buildInputs = prev.buildInputs ++ [ pkgs'.fuse3 ];
|
||||
}
|
||||
);
|
||||
|
||||
checks.cargo-clippy = craneLib.cargoClippy (
|
||||
commonArgs
|
||||
in
|
||||
[
|
||||
(lib.nameValuePair "${name}-${crossSystem}" package)
|
||||
(lib.nameValuePair "${name}-fuse-${crossSystem}" packageFuse)
|
||||
];
|
||||
in
|
||||
lib.listToAttrs (lib.flatten (map packagesForSystem systems));
|
||||
in
|
||||
{
|
||||
packages =
|
||||
let
|
||||
inherit (cargoToml.package) name;
|
||||
in
|
||||
(mkPackages name systems)
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
cargoClippyExtraArgs = "--all-targets -- --deny warnings";
|
||||
${name} = config.packages."${name}-${system}";
|
||||
"${name}-fuse" = config.packages."${name}-fuse-${system}";
|
||||
default = config.packages.${name};
|
||||
};
|
||||
|
||||
checks = {
|
||||
inherit (config.packages)
|
||||
bcachefs-tools
|
||||
bcachefs-tools-fuse
|
||||
bcachefs-tools-fuse-i686-linux
|
||||
;
|
||||
|
||||
cargo-clippy = common.craneLib.cargoClippy (
|
||||
common.args
|
||||
// {
|
||||
inherit (common) cargoArtifacts;
|
||||
cargoClippyExtraArgs = "--all-targets --all-features -- --deny warnings";
|
||||
}
|
||||
);
|
||||
|
||||
# we have to build our own `craneLib.cargoTest`
|
||||
checks.cargo-test = craneLib.mkCargoDerivation (
|
||||
commonArgs
|
||||
cargo-test = common.craneLib.mkCargoDerivation (
|
||||
common.args
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
inherit (common) cargoArtifacts;
|
||||
doCheck = true;
|
||||
|
||||
enableParallelChecking = true;
|
||||
@ -176,6 +271,23 @@
|
||||
}
|
||||
);
|
||||
|
||||
# cargo clippy with the current minimum supported rust version
|
||||
# according to Cargo.toml
|
||||
msrv =
|
||||
let
|
||||
rustVersion = cargoToml.package.rust-version;
|
||||
common = pkgs.callPackage mkCommon { inherit crane rustVersion; };
|
||||
in
|
||||
common.craneLib.cargoClippy (
|
||||
common.args
|
||||
// {
|
||||
pname = "msrv";
|
||||
inherit (common) cargoArtifacts;
|
||||
cargoClippyExtraArgs = "--all-targets --all-features";
|
||||
}
|
||||
);
|
||||
};
|
||||
|
||||
devShells.default = pkgs.mkShell {
|
||||
inputsFrom = [
|
||||
config.packages.default
|
||||
@ -190,9 +302,12 @@
|
||||
cargo-audit
|
||||
cargo-outdated
|
||||
clang-tools
|
||||
clippy
|
||||
rust-analyzer
|
||||
rustc
|
||||
(rust-bin.stable.latest.minimal.override {
|
||||
extensions = [
|
||||
"rust-analyzer"
|
||||
"rust-src"
|
||||
];
|
||||
})
|
||||
];
|
||||
};
|
||||
|
||||
@ -204,7 +319,7 @@
|
||||
nixfmt.enable = true;
|
||||
rustfmt.edition = rustfmtToml.edition;
|
||||
rustfmt.enable = true;
|
||||
rustfmt.package = fenix.packages.${system}.default.rustfmt;
|
||||
rustfmt.package = pkgs.rust-bin.selectLatestNightlyWith (toolchain: toolchain.rustfmt);
|
||||
};
|
||||
};
|
||||
};
|
||||
|
@ -7,6 +7,7 @@
|
||||
#define _CRYPTO_SHA_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <sodium/crypto_hash_sha256.h>
|
||||
|
||||
#define SHA1_DIGEST_SIZE 20
|
||||
#define SHA1_BLOCK_SIZE 64
|
||||
@ -112,4 +113,9 @@ extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
|
||||
|
||||
extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *hash);
|
||||
|
||||
static inline void sha256(const u8 *data, unsigned int len, u8 *out)
|
||||
{
|
||||
crypto_hash_sha256(out, data, len);
|
||||
}
|
||||
#endif
|
||||
|
@ -16,53 +16,6 @@ typedef struct {
|
||||
u64 counter;
|
||||
} atomic64_t;
|
||||
|
||||
#ifndef C11_ATOMICS
|
||||
|
||||
#include <urcu/uatomic.h>
|
||||
|
||||
#if (CAA_BITS_PER_LONG != 64)
|
||||
#define ATOMIC64_SPINLOCK
|
||||
#endif
|
||||
|
||||
#define __ATOMIC_READ(p) uatomic_read(p)
|
||||
#define __ATOMIC_SET(p, v) uatomic_set(p, v)
|
||||
#define __ATOMIC_SET_RELEASE(p, v) uatomic_set(p, v)
|
||||
#define __ATOMIC_ADD_RETURN(v, p) uatomic_add_return(p, v)
|
||||
#define __ATOMIC_SUB_RETURN(v, p) uatomic_sub_return(p, v)
|
||||
#define __ATOMIC_ADD(v, p) uatomic_add(p, v)
|
||||
#define __ATOMIC_SUB(v, p) uatomic_sub(p, v)
|
||||
#define __ATOMIC_INC(p) uatomic_inc(p)
|
||||
#define __ATOMIC_DEC(p) uatomic_dec(p)
|
||||
#define __ATOMIC_AND(v, p) uatomic_and(p, v)
|
||||
#define __ATOMIC_OR(v, p) uatomic_or(p, v)
|
||||
|
||||
#define xchg(p, v) uatomic_xchg(p, v)
|
||||
#define xchg_acquire(p, v) uatomic_xchg(p, v)
|
||||
#define cmpxchg(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
#define cmpxchg_acquire(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
#define cmpxchg_release(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
|
||||
#define try_cmpxchg(p, _old, _new) \
|
||||
({ \
|
||||
typeof(*(_old)) _v = cmpxchg(p, *(_old), _new); \
|
||||
bool _ret = _v == *(_old); \
|
||||
*(_old) = _v; \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define try_cmpxchg_acquire(p, _old, _new) \
|
||||
try_cmpxchg(p, _old, _new)
|
||||
|
||||
#define smp_mb__before_atomic() cmm_smp_mb__before_uatomic_add()
|
||||
#define smp_mb__after_atomic() cmm_smp_mb__after_uatomic_add()
|
||||
#define smp_wmb() cmm_smp_wmb()
|
||||
#define smp_rmb() cmm_smp_rmb()
|
||||
#define smp_mb() cmm_smp_mb()
|
||||
#define smp_read_barrier_depends() cmm_smp_read_barrier_depends()
|
||||
#define smp_acquire__after_ctrl_dep() cmm_smp_mb()
|
||||
|
||||
#else /* C11_ATOMICS */
|
||||
|
||||
#define __ATOMIC_READ(p) __atomic_load_n(p, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SET(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SET_RELEASE(p, v) __atomic_store_n(p, v, __ATOMIC_RELEASE)
|
||||
@ -72,17 +25,22 @@ typedef struct {
|
||||
#define __ATOMIC_SUB_RETURN(v, p) __atomic_sub_fetch(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SUB_RETURN_RELEASE(v, p) \
|
||||
__atomic_sub_fetch(p, v, __ATOMIC_RELEASE)
|
||||
#define __ATOMIC_AND(p) __atomic_and_fetch(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_OR(p) __atomic_or_fetch(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_AND(v, p) __atomic_and_fetch(&(p)->counter, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_OR(v, p) __atomic_or_fetch(&(p)->counter, v, __ATOMIC_RELAXED)
|
||||
|
||||
#define xchg(p, v) __atomic_exchange_n(p, v, __ATOMIC_SEQ_CST)
|
||||
#define xchg_acquire(p, v) __atomic_exchange_n(p, v, __ATOMIC_ACQUIRE)
|
||||
|
||||
#define try_cmpxchg(p, old, new) \
|
||||
__atomic_compare_exchange_n((p), __old, new, false, \
|
||||
__atomic_compare_exchange_n((p), old, new, false, \
|
||||
__ATOMIC_SEQ_CST, \
|
||||
__ATOMIC_SEQ_CST)
|
||||
|
||||
#define try_cmpxchg_acquire(p, old, new) \
|
||||
__atomic_compare_exchange_n((p), old, new, false, \
|
||||
__ATOMIC_ACQUIRE, \
|
||||
__ATOMIC_RELAXED)
|
||||
|
||||
#define cmpxchg(p, old, new) \
|
||||
({ \
|
||||
typeof(*(p)) __old = (old); \
|
||||
@ -109,7 +67,7 @@ typedef struct {
|
||||
\
|
||||
__atomic_compare_exchange_n((p), &__old, new, false, \
|
||||
__ATOMIC_RELEASE, \
|
||||
__ATOMIC_RELEASE); \
|
||||
__ATOMIC_RELAXED); \
|
||||
__old; \
|
||||
})
|
||||
|
||||
@ -119,9 +77,7 @@ typedef struct {
|
||||
#define smp_rmb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_mb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_read_barrier_depends()
|
||||
|
||||
#endif
|
||||
|
||||
#define smp_acquire__after_ctrl_dep() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
@ -188,12 +144,12 @@ static inline i_type a_type##_read_acquire(const a_type##_t *v) \
|
||||
\
|
||||
static inline void a_type##_set(a_type##_t *v, i_type i) \
|
||||
{ \
|
||||
return __ATOMIC_SET(&v->counter, i); \
|
||||
__ATOMIC_SET(&v->counter, i); \
|
||||
} \
|
||||
\
|
||||
static inline void a_type##_set_release(a_type##_t *v, i_type i) \
|
||||
{ \
|
||||
return __ATOMIC_SET_RELEASE(&v->counter, i); \
|
||||
__ATOMIC_SET_RELEASE(&v->counter, i); \
|
||||
} \
|
||||
\
|
||||
static inline i_type a_type##_add_return(i_type i, a_type##_t *v) \
|
||||
|
@ -6,9 +6,12 @@
|
||||
#define __LINUX_BLK_TYPES_H
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/bvec.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/rwsem.h>
|
||||
|
||||
struct bio_set;
|
||||
struct bio;
|
||||
@ -62,6 +65,8 @@ struct block_device {
|
||||
struct gendisk * bd_disk;
|
||||
struct gendisk __bd_disk;
|
||||
int bd_fd;
|
||||
|
||||
struct mutex bd_holder_lock;
|
||||
};
|
||||
|
||||
#define bdev_kobj(_bdev) (&((_bdev)->kobj))
|
||||
|
@ -65,7 +65,10 @@ unsigned bdev_logical_block_size(struct block_device *bdev);
|
||||
sector_t get_capacity(struct gendisk *disk);
|
||||
|
||||
struct blk_holder_ops {
|
||||
void (*mark_dead)(struct block_device *bdev);
|
||||
void (*mark_dead)(struct block_device *bdev, bool surprise);
|
||||
void (*sync)(struct block_device *bdev);
|
||||
int (*freeze)(struct block_device *bdev);
|
||||
int (*thaw)(struct block_device *bdev);
|
||||
};
|
||||
|
||||
static inline struct block_device *file_bdev(struct file *file)
|
||||
@ -80,8 +83,12 @@ int lookup_bdev(const char *path, dev_t *);
|
||||
|
||||
struct super_block {
|
||||
void *s_fs_info;
|
||||
struct rw_semaphore s_umount;
|
||||
};
|
||||
|
||||
static inline void evict_inodes(struct super_block *sb) {}
|
||||
static inline int sync_filesystem(struct super_block *) { return 0; }
|
||||
|
||||
/*
|
||||
* File types
|
||||
*
|
||||
|
@ -67,6 +67,7 @@
|
||||
#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
|
||||
#define fallthrough __attribute__((__fallthrough__))
|
||||
#define __noreturn __attribute__((__noreturn__))
|
||||
#define __no_kmsan_checks
|
||||
|
||||
#ifndef __counted_by
|
||||
#define __counted_by(nr)
|
||||
|
@ -9,4 +9,9 @@ struct dentry {
|
||||
struct inode *d_inode;
|
||||
};
|
||||
|
||||
static inline void shrink_dcache_sb(struct super_block *) {}
|
||||
|
||||
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
|
||||
#define QSTR(n) (struct qstr)QSTR_INIT(n, strlen(n))
|
||||
|
||||
#endif /* __LINUX_DCACHE_H */
|
||||
|
20
include/linux/fs_parser.h
Normal file
20
include/linux/fs_parser.h
Normal file
@ -0,0 +1,20 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/* Filesystem parameter description and parser
|
||||
*
|
||||
* Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_FS_PARSER_H
|
||||
#define _LINUX_FS_PARSER_H
|
||||
|
||||
struct constant_table {
|
||||
const char *name;
|
||||
int value;
|
||||
};
|
||||
|
||||
extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found);
|
||||
|
||||
extern const struct constant_table bool_names[];
|
||||
|
||||
#endif /* _LINUX_FS_PARSER_H */
|
@ -11,6 +11,8 @@
|
||||
#include <linux/bug.h>
|
||||
#include <linux/byteorder.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/kmsan-checks.h>
|
||||
#include <linux/math.h>
|
||||
#include <linux/minmax.h>
|
||||
|
||||
|
@ -26,6 +26,7 @@ extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
|
||||
extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
|
||||
extern void kmemleak_update_trace(const void *ptr) __ref;
|
||||
extern void kmemleak_not_leak(const void *ptr) __ref;
|
||||
extern void kmemleak_transient_leak(const void *ptr) __ref;
|
||||
extern void kmemleak_ignore(const void *ptr) __ref;
|
||||
extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
|
||||
extern void kmemleak_no_scan(const void *ptr) __ref;
|
||||
@ -93,6 +94,9 @@ static inline void kmemleak_update_trace(const void *ptr)
|
||||
static inline void kmemleak_not_leak(const void *ptr)
|
||||
{
|
||||
}
|
||||
static inline void kmemleak_transient_leak(const void *ptr)
|
||||
{
|
||||
}
|
||||
static inline void kmemleak_ignore(const void *ptr)
|
||||
{
|
||||
}
|
||||
|
98
include/linux/kmsan-checks.h
Normal file
98
include/linux/kmsan-checks.h
Normal file
@ -0,0 +1,98 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* KMSAN checks to be used for one-off annotations in subsystems.
|
||||
*
|
||||
* Copyright (C) 2017-2022 Google LLC
|
||||
* Author: Alexander Potapenko <glider@google.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_KMSAN_CHECKS_H
|
||||
#define _LINUX_KMSAN_CHECKS_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef CONFIG_KMSAN
|
||||
|
||||
/**
|
||||
* kmsan_poison_memory() - Mark the memory range as uninitialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to poison.
|
||||
* @flags: GFP flags for allocations done by this function.
|
||||
*
|
||||
* Until other data is written to this range, KMSAN will treat it as
|
||||
* uninitialized. Error reports for this memory will reference the call site of
|
||||
* kmsan_poison_memory() as origin.
|
||||
*/
|
||||
void kmsan_poison_memory(const void *address, size_t size, gfp_t flags);
|
||||
|
||||
/**
|
||||
* kmsan_unpoison_memory() - Mark the memory range as initialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to unpoison.
|
||||
*
|
||||
* Until other data is written to this range, KMSAN will treat it as
|
||||
* initialized.
|
||||
*/
|
||||
void kmsan_unpoison_memory(const void *address, size_t size);
|
||||
|
||||
/**
|
||||
* kmsan_check_memory() - Check the memory range for being initialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to check.
|
||||
*
|
||||
* If any piece of the given range is marked as uninitialized, KMSAN will report
|
||||
* an error.
|
||||
*/
|
||||
void kmsan_check_memory(const void *address, size_t size);
|
||||
|
||||
/**
|
||||
* kmsan_copy_to_user() - Notify KMSAN about a data transfer to userspace.
|
||||
* @to: destination address in the userspace.
|
||||
* @from: source address in the kernel.
|
||||
* @to_copy: number of bytes to copy.
|
||||
* @left: number of bytes not copied.
|
||||
*
|
||||
* If this is a real userspace data transfer, KMSAN checks the bytes that were
|
||||
* actually copied to ensure there was no information leak. If @to belongs to
|
||||
* the kernel space (which is possible for compat syscalls), KMSAN just copies
|
||||
* the metadata.
|
||||
*/
|
||||
void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
|
||||
size_t left);
|
||||
|
||||
/**
|
||||
* kmsan_memmove() - Notify KMSAN about a data copy within kernel.
|
||||
* @to: destination address in the kernel.
|
||||
* @from: source address in the kernel.
|
||||
* @size: number of bytes to copy.
|
||||
*
|
||||
* Invoked after non-instrumented version (e.g. implemented using assembly
|
||||
* code) of memmove()/memcpy() is called, in order to copy KMSAN's metadata.
|
||||
*/
|
||||
void kmsan_memmove(void *to, const void *from, size_t to_copy);
|
||||
|
||||
#else
|
||||
|
||||
static inline void kmsan_poison_memory(const void *address, size_t size,
|
||||
gfp_t flags)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_unpoison_memory(const void *address, size_t size)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_check_memory(const void *address, size_t size)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_copy_to_user(void __user *to, const void *from,
|
||||
size_t to_copy, size_t left)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void kmsan_memmove(void *to, const void *from, size_t to_copy)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_KMSAN_CHECKS_H */
|
@ -20,6 +20,7 @@
|
||||
#include <linux/bug.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
@ -82,4 +82,71 @@ static inline s64 div_s64(s64 dividend, s32 divisor)
|
||||
return div_s64_rem(dividend, divisor, &remainder);
|
||||
}
|
||||
|
||||
#ifndef mul_u32_u32
|
||||
/*
|
||||
* Many a GCC version messes this up and generates a 64x64 mult :-(
|
||||
*/
|
||||
static inline u64 mul_u32_u32(u32 a, u32 b)
|
||||
{
|
||||
return (u64)a * b;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
|
||||
|
||||
#ifndef mul_u64_u64_shr
|
||||
static __always_inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
|
||||
{
|
||||
return (u64)(((unsigned __int128)a * mul) >> shift);
|
||||
}
|
||||
#endif /* mul_u64_u64_shr */
|
||||
|
||||
#else
|
||||
|
||||
#ifndef mul_u64_u64_shr
|
||||
static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
|
||||
{
|
||||
union {
|
||||
u64 ll;
|
||||
struct {
|
||||
#ifdef __BIG_ENDIAN
|
||||
u32 high, low;
|
||||
#else
|
||||
u32 low, high;
|
||||
#endif
|
||||
} l;
|
||||
} rl, rm, rn, rh, a0, b0;
|
||||
u64 c;
|
||||
|
||||
a0.ll = a;
|
||||
b0.ll = b;
|
||||
|
||||
rl.ll = mul_u32_u32(a0.l.low, b0.l.low);
|
||||
rm.ll = mul_u32_u32(a0.l.low, b0.l.high);
|
||||
rn.ll = mul_u32_u32(a0.l.high, b0.l.low);
|
||||
rh.ll = mul_u32_u32(a0.l.high, b0.l.high);
|
||||
|
||||
/*
|
||||
* Each of these lines computes a 64-bit intermediate result into "c",
|
||||
* starting at bits 32-95. The low 32-bits go into the result of the
|
||||
* multiplication, the high 32-bits are carried into the next step.
|
||||
*/
|
||||
rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low;
|
||||
rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low;
|
||||
rh.l.high = (c >> 32) + rh.l.high;
|
||||
|
||||
/*
|
||||
* The 128-bit result of the multiplication is in rl.ll and rh.ll,
|
||||
* shift it right and throw away the high part of the result.
|
||||
*/
|
||||
if (shift == 0)
|
||||
return rl.ll;
|
||||
if (shift < 64)
|
||||
return (rl.ll >> shift) | (rh.ll << (64 - shift));
|
||||
return rh.ll >> (shift & 63);
|
||||
}
|
||||
#endif /* mul_u64_u64_shr */
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_MATH64_H */
|
||||
|
@ -15,8 +15,8 @@
|
||||
*/
|
||||
#define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \
|
||||
struct _name { \
|
||||
int nr; \
|
||||
int size; \
|
||||
size_t nr; \
|
||||
size_t size; \
|
||||
_type *data; \
|
||||
_type preallocated[_nr]; \
|
||||
}
|
||||
@ -38,9 +38,176 @@ struct min_heap_callbacks {
|
||||
void (*swp)(void *lhs, void *rhs, void *args);
|
||||
};
|
||||
|
||||
/**
|
||||
* is_aligned - is this pointer & size okay for word-wide copying?
|
||||
* @base: pointer to data
|
||||
* @size: size of each element
|
||||
* @align: required alignment (typically 4 or 8)
|
||||
*
|
||||
* Returns true if elements can be copied using word loads and stores.
|
||||
* The size must be a multiple of the alignment, and the base address must
|
||||
* be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
|
||||
*
|
||||
* For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
|
||||
* to "if ((a | b) & mask)", so we do that by hand.
|
||||
*/
|
||||
__attribute_const__ __always_inline
|
||||
static bool is_aligned(const void *base, size_t size, unsigned char align)
|
||||
{
|
||||
unsigned char lsbits = (unsigned char)size;
|
||||
|
||||
(void)base;
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
lsbits |= (unsigned char)(uintptr_t)base;
|
||||
#endif
|
||||
return (lsbits & (align - 1)) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_words_32 - swap two elements in 32-bit chunks
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size (must be a multiple of 4)
|
||||
*
|
||||
* Exchange the two objects in memory. This exploits base+index addressing,
|
||||
* which basically all CPUs have, to minimize loop overhead computations.
|
||||
*
|
||||
* For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
|
||||
* bottom of the loop, even though the zero flag is still valid from the
|
||||
* subtract (since the intervening mov instructions don't alter the flags).
|
||||
* Gcc 8.1.0 doesn't have that problem.
|
||||
*/
|
||||
static __always_inline
|
||||
void swap_words_32(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
u32 t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_words_64 - swap two elements in 64-bit chunks
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size (must be a multiple of 8)
|
||||
*
|
||||
* Exchange the two objects in memory. This exploits base+index
|
||||
* addressing, which basically all CPUs have, to minimize loop overhead
|
||||
* computations.
|
||||
*
|
||||
* We'd like to use 64-bit loads if possible. If they're not, emulating
|
||||
* one requires base+index+4 addressing which x86 has but most other
|
||||
* processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
|
||||
* but it's possible to have 64-bit loads without 64-bit pointers (e.g.
|
||||
* x32 ABI). Are there any cases the kernel needs to worry about?
|
||||
*/
|
||||
static __always_inline
|
||||
void swap_words_64(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
#ifdef CONFIG_64BIT
|
||||
u64 t = *(u64 *)(a + (n -= 8));
|
||||
*(u64 *)(a + n) = *(u64 *)(b + n);
|
||||
*(u64 *)(b + n) = t;
|
||||
#else
|
||||
/* Use two 32-bit transfers to avoid base+index+4 addressing */
|
||||
u32 t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
|
||||
t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
#endif
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_bytes - swap two elements a byte at a time
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size
|
||||
*
|
||||
* This is the fallback if alignment doesn't allow using larger chunks.
|
||||
*/
|
||||
static __always_inline
|
||||
void swap_bytes(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
char t = ((char *)a)[--n];
|
||||
((char *)a)[n] = ((char *)b)[n];
|
||||
((char *)b)[n] = t;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/*
|
||||
* The values are arbitrary as long as they can't be confused with
|
||||
* a pointer, but small integers make for the smallest compare
|
||||
* instructions.
|
||||
*/
|
||||
#define SWAP_WORDS_64 ((void (*)(void *, void *, void *))0)
|
||||
#define SWAP_WORDS_32 ((void (*)(void *, void *, void *))1)
|
||||
#define SWAP_BYTES ((void (*)(void *, void *, void *))2)
|
||||
|
||||
/*
|
||||
* Selects the appropriate swap function based on the element size.
|
||||
*/
|
||||
static __always_inline
|
||||
void *select_swap_func(const void *base, size_t size)
|
||||
{
|
||||
if (is_aligned(base, size, 8))
|
||||
return SWAP_WORDS_64;
|
||||
else if (is_aligned(base, size, 4))
|
||||
return SWAP_WORDS_32;
|
||||
else
|
||||
return SWAP_BYTES;
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
void do_swap(void *a, void *b, size_t size, void (*swap_func)(void *lhs, void *rhs, void *args),
|
||||
void *priv)
|
||||
{
|
||||
if (swap_func == SWAP_WORDS_64)
|
||||
swap_words_64(a, b, size);
|
||||
else if (swap_func == SWAP_WORDS_32)
|
||||
swap_words_32(a, b, size);
|
||||
else if (swap_func == SWAP_BYTES)
|
||||
swap_bytes(a, b, size);
|
||||
else
|
||||
swap_func(a, b, priv);
|
||||
}
|
||||
|
||||
/**
|
||||
* parent - given the offset of the child, find the offset of the parent.
|
||||
* @i: the offset of the heap element whose parent is sought. Non-zero.
|
||||
* @lsbit: a precomputed 1-bit mask, equal to "size & -size"
|
||||
* @size: size of each element
|
||||
*
|
||||
* In terms of array indexes, the parent of element j = @i/@size is simply
|
||||
* (j-1)/2. But when working in byte offsets, we can't use implicit
|
||||
* truncation of integer divides.
|
||||
*
|
||||
* Fortunately, we only need one bit of the quotient, not the full divide.
|
||||
* @size has a least significant bit. That bit will be clear if @i is
|
||||
* an even multiple of @size, and set if it's an odd multiple.
|
||||
*
|
||||
* Logically, we're doing "if (i & lsbit) i -= size;", but since the
|
||||
* branch is unpredictable, it's done with a bit of clever branch-free
|
||||
* code instead.
|
||||
*/
|
||||
__attribute_const__ __always_inline
|
||||
static size_t parent(size_t i, unsigned int lsbit, size_t size)
|
||||
{
|
||||
i -= size;
|
||||
i -= size & -(i & lsbit);
|
||||
return i / 2;
|
||||
}
|
||||
|
||||
/* Initialize a min-heap. */
|
||||
static __always_inline
|
||||
void __min_heap_init(min_heap_char *heap, void *data, int size)
|
||||
void __min_heap_init_inline(min_heap_char *heap, void *data, int size)
|
||||
{
|
||||
heap->nr = 0;
|
||||
heap->size = size;
|
||||
@ -50,104 +217,113 @@ void __min_heap_init(min_heap_char *heap, void *data, int size)
|
||||
heap->data = heap->preallocated;
|
||||
}
|
||||
|
||||
#define min_heap_init(_heap, _data, _size) \
|
||||
__min_heap_init((min_heap_char *)_heap, _data, _size)
|
||||
#define min_heap_init_inline(_heap, _data, _size) \
|
||||
__min_heap_init_inline((min_heap_char *)_heap, _data, _size)
|
||||
|
||||
/* Get the minimum element from the heap. */
|
||||
static __always_inline
|
||||
void *__min_heap_peek(struct min_heap_char *heap)
|
||||
void *__min_heap_peek_inline(struct min_heap_char *heap)
|
||||
{
|
||||
return heap->nr ? heap->data : NULL;
|
||||
}
|
||||
|
||||
#define min_heap_peek(_heap) \
|
||||
(__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap))
|
||||
#define min_heap_peek_inline(_heap) \
|
||||
(__minheap_cast(_heap) __min_heap_peek_inline((min_heap_char *)_heap))
|
||||
|
||||
/* Check if the heap is full. */
|
||||
static __always_inline
|
||||
bool __min_heap_full(min_heap_char *heap)
|
||||
bool __min_heap_full_inline(min_heap_char *heap)
|
||||
{
|
||||
return heap->nr == heap->size;
|
||||
}
|
||||
|
||||
#define min_heap_full(_heap) \
|
||||
__min_heap_full((min_heap_char *)_heap)
|
||||
#define min_heap_full_inline(_heap) \
|
||||
__min_heap_full_inline((min_heap_char *)_heap)
|
||||
|
||||
/* Sift the element at pos down the heap. */
|
||||
static __always_inline
|
||||
void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
|
||||
void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
void *left, *right;
|
||||
const unsigned long lsbit = elem_size & -elem_size;
|
||||
void *data = heap->data;
|
||||
void *root = data + pos * elem_size;
|
||||
int i = pos, j;
|
||||
void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
|
||||
/* pre-scale counters for performance */
|
||||
size_t a = pos * elem_size;
|
||||
size_t b, c, d;
|
||||
size_t n = heap->nr * elem_size;
|
||||
|
||||
if (!swp)
|
||||
swp = select_swap_func(data, elem_size);
|
||||
|
||||
/* Find the sift-down path all the way to the leaves. */
|
||||
for (;;) {
|
||||
if (i * 2 + 2 >= heap->nr)
|
||||
break;
|
||||
left = data + (i * 2 + 1) * elem_size;
|
||||
right = data + (i * 2 + 2) * elem_size;
|
||||
i = func->less(left, right, args) ? i * 2 + 1 : i * 2 + 2;
|
||||
}
|
||||
for (b = a; c = 2 * b + elem_size, (d = c + elem_size) < n;)
|
||||
b = func->less(data + c, data + d, args) ? c : d;
|
||||
|
||||
/* Special case for the last leaf with no sibling. */
|
||||
if (i * 2 + 2 == heap->nr)
|
||||
i = i * 2 + 1;
|
||||
if (d == n)
|
||||
b = c;
|
||||
|
||||
/* Backtrack to the correct location. */
|
||||
while (i != pos && func->less(root, data + i * elem_size, args))
|
||||
i = (i - 1) / 2;
|
||||
while (b != a && func->less(data + a, data + b, args))
|
||||
b = parent(b, lsbit, elem_size);
|
||||
|
||||
/* Shift the element into its correct place. */
|
||||
j = i;
|
||||
while (i != pos) {
|
||||
i = (i - 1) / 2;
|
||||
func->swp(data + i * elem_size, data + j * elem_size, args);
|
||||
c = b;
|
||||
while (b != a) {
|
||||
b = parent(b, lsbit, elem_size);
|
||||
do_swap(data + b, data + c, elem_size, swp, args);
|
||||
}
|
||||
}
|
||||
|
||||
#define min_heap_sift_down(_heap, _pos, _func, _args) \
|
||||
__min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_sift_down_inline(_heap, _pos, _func, _args) \
|
||||
__min_heap_sift_down_inline((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), \
|
||||
_func, _args)
|
||||
|
||||
/* Sift up ith element from the heap, O(log2(nr)). */
|
||||
static __always_inline
|
||||
void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
|
||||
void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
const unsigned long lsbit = elem_size & -elem_size;
|
||||
void *data = heap->data;
|
||||
size_t parent;
|
||||
void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
|
||||
/* pre-scale counters for performance */
|
||||
size_t a = idx * elem_size, b;
|
||||
|
||||
while (idx) {
|
||||
parent = (idx - 1) / 2;
|
||||
if (func->less(data + parent * elem_size, data + idx * elem_size, args))
|
||||
if (!swp)
|
||||
swp = select_swap_func(data, elem_size);
|
||||
|
||||
while (a) {
|
||||
b = parent(a, lsbit, elem_size);
|
||||
if (func->less(data + b, data + a, args))
|
||||
break;
|
||||
func->swp(data + parent * elem_size, data + idx * elem_size, args);
|
||||
idx = parent;
|
||||
do_swap(data + a, data + b, elem_size, swp, args);
|
||||
a = b;
|
||||
}
|
||||
}
|
||||
|
||||
#define min_heap_sift_up(_heap, _idx, _func, _args) \
|
||||
__min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
|
||||
#define min_heap_sift_up_inline(_heap, _idx, _func, _args) \
|
||||
__min_heap_sift_up_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \
|
||||
_func, _args)
|
||||
|
||||
/* Floyd's approach to heapification that is O(nr). */
|
||||
static __always_inline
|
||||
void __min_heapify_all(min_heap_char *heap, size_t elem_size,
|
||||
void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = heap->nr / 2 - 1; i >= 0; i--)
|
||||
__min_heap_sift_down(heap, i, elem_size, func, args);
|
||||
__min_heap_sift_down_inline(heap, i, elem_size, func, args);
|
||||
}
|
||||
|
||||
#define min_heapify_all(_heap, _func, _args) \
|
||||
__min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heapify_all_inline(_heap, _func, _args) \
|
||||
__min_heapify_all_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
|
||||
|
||||
/* Remove minimum element from the heap, O(log2(nr)). */
|
||||
static __always_inline
|
||||
bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
|
||||
bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
void *data = heap->data;
|
||||
@ -158,13 +334,13 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
|
||||
/* Place last element at the root (position 0) and then sift down. */
|
||||
heap->nr--;
|
||||
memcpy(data, data + (heap->nr * elem_size), elem_size);
|
||||
__min_heap_sift_down(heap, 0, elem_size, func, args);
|
||||
__min_heap_sift_down_inline(heap, 0, elem_size, func, args);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define min_heap_pop(_heap, _func, _args) \
|
||||
__min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_pop_inline(_heap, _func, _args) \
|
||||
__min_heap_pop_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
|
||||
|
||||
/*
|
||||
* Remove the minimum element and then push the given element. The
|
||||
@ -172,21 +348,20 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
|
||||
* efficient than a pop followed by a push that does 2.
|
||||
*/
|
||||
static __always_inline
|
||||
void __min_heap_pop_push(min_heap_char *heap,
|
||||
const void *element, size_t elem_size,
|
||||
const struct min_heap_callbacks *func,
|
||||
void *args)
|
||||
void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
memcpy(heap->data, element, elem_size);
|
||||
__min_heap_sift_down(heap, 0, elem_size, func, args);
|
||||
__min_heap_sift_down_inline(heap, 0, elem_size, func, args);
|
||||
}
|
||||
|
||||
#define min_heap_pop_push(_heap, _element, _func, _args) \
|
||||
__min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_pop_push_inline(_heap, _element, _func, _args) \
|
||||
__min_heap_pop_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \
|
||||
_func, _args)
|
||||
|
||||
/* Push an element on to the heap, O(log2(nr)). */
|
||||
static __always_inline
|
||||
bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
|
||||
bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
void *data = heap->data;
|
||||
@ -201,35 +376,81 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
|
||||
heap->nr++;
|
||||
|
||||
/* Sift child at pos up. */
|
||||
__min_heap_sift_up(heap, elem_size, pos, func, args);
|
||||
__min_heap_sift_up_inline(heap, elem_size, pos, func, args);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define min_heap_push(_heap, _element, _func, _args) \
|
||||
__min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_push_inline(_heap, _element, _func, _args) \
|
||||
__min_heap_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \
|
||||
_func, _args)
|
||||
|
||||
/* Remove ith element from the heap, O(log2(nr)). */
|
||||
static __always_inline
|
||||
bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
|
||||
bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
void *data = heap->data;
|
||||
void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
|
||||
|
||||
if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap"))
|
||||
return false;
|
||||
|
||||
if (!swp)
|
||||
swp = select_swap_func(data, elem_size);
|
||||
|
||||
/* Place last element at the root (position 0) and then sift down. */
|
||||
heap->nr--;
|
||||
if (idx == heap->nr)
|
||||
return true;
|
||||
func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args);
|
||||
__min_heap_sift_up(heap, elem_size, idx, func, args);
|
||||
__min_heap_sift_down(heap, idx, elem_size, func, args);
|
||||
do_swap(data + (idx * elem_size), data + (heap->nr * elem_size), elem_size, swp, args);
|
||||
__min_heap_sift_up_inline(heap, elem_size, idx, func, args);
|
||||
__min_heap_sift_down_inline(heap, idx, elem_size, func, args);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define min_heap_del_inline(_heap, _idx, _func, _args) \
|
||||
__min_heap_del_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \
|
||||
_func, _args)
|
||||
|
||||
void __min_heap_init(min_heap_char *heap, void *data, int size);
|
||||
void *__min_heap_peek(struct min_heap_char *heap);
|
||||
bool __min_heap_full(min_heap_char *heap);
|
||||
void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
void __min_heapify_all(min_heap_char *heap, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
|
||||
#define min_heap_init(_heap, _data, _size) \
|
||||
__min_heap_init((min_heap_char *)_heap, _data, _size)
|
||||
#define min_heap_peek(_heap) \
|
||||
(__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap))
|
||||
#define min_heap_full(_heap) \
|
||||
__min_heap_full((min_heap_char *)_heap)
|
||||
#define min_heap_sift_down(_heap, _pos, _func, _args) \
|
||||
__min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_sift_up(_heap, _idx, _func, _args) \
|
||||
__min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
|
||||
#define min_heapify_all(_heap, _func, _args) \
|
||||
__min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_pop(_heap, _func, _args) \
|
||||
__min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_pop_push(_heap, _element, _func, _args) \
|
||||
__min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \
|
||||
_func, _args)
|
||||
#define min_heap_push(_heap, _element, _func, _args) \
|
||||
__min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_del(_heap, _idx, _func, _args) \
|
||||
__min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct sysinfo {
|
||||
|
1
include/linux/moduleparam.h
Normal file
1
include/linux/moduleparam.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/module.h>
|
@ -9,7 +9,9 @@
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/math64.h>
|
||||
|
||||
#ifdef SYS_getrandom
|
||||
static inline int getrandom(void *buf, size_t buflen, unsigned int flags)
|
||||
@ -67,4 +69,24 @@ static inline u32 get_random_u32_below(u32 ceil)
|
||||
}
|
||||
}
|
||||
|
||||
static inline u32 __get_random_u32_below(u32 ceil)
|
||||
{
|
||||
return get_random_u32_below(ceil);
|
||||
}
|
||||
|
||||
static inline u64 get_random_u64_below(u64 ceil)
|
||||
{
|
||||
if (ceil <= 1)
|
||||
return 0;
|
||||
if (ceil <= U32_MAX)
|
||||
return get_random_u32_below(ceil);
|
||||
|
||||
for (;;) {
|
||||
u64 rand = get_random_u64();
|
||||
u64 mult = ceil * rand;
|
||||
if (likely(mult >= -ceil % ceil))
|
||||
return mul_u64_u64_shr(ceil, rand, 64);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _LINUX_RANDOM_H */
|
||||
|
@ -97,6 +97,11 @@ struct task_struct {
|
||||
struct signal_struct {
|
||||
struct rw_semaphore exec_update_lock;
|
||||
} *signal, _signal;
|
||||
|
||||
struct {
|
||||
u64 sum_exec_runtime;
|
||||
u64 exec_start;
|
||||
} se;
|
||||
};
|
||||
|
||||
extern __thread struct task_struct *current;
|
||||
|
6
include/linux/sched/sysctl.h
Normal file
6
include/linux/sched/sysctl.h
Normal file
@ -0,0 +1,6 @@
|
||||
#ifndef __TOOLS_LINUX_SCHED_SYSCTL_H
|
||||
#define __TOOLS_LINUX_SCHED_SYSCTL_H
|
||||
|
||||
#define sysctl_hung_task_timeout_secs (HZ * 10)
|
||||
|
||||
#endif /* __TOOLS_LINUX_SCHED_SYSCTL_H */
|
@ -103,6 +103,8 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
|
||||
((size) != 0 && (n) > SIZE_MAX / (size) \
|
||||
? NULL : kmalloc((n) * (size), flags))
|
||||
|
||||
#define kvcalloc(n, size, flags) kvmalloc_array(n, size, flags|__GFP_ZERO)
|
||||
|
||||
#define kvmalloc_array_noprof(...) kvmalloc_array(__VA_ARGS__)
|
||||
|
||||
#define kcalloc(n, size, flags) kmalloc_array(n, size, flags|__GFP_ZERO)
|
||||
@ -111,6 +113,7 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
|
||||
#define kzfree(p) free((void *) p)
|
||||
|
||||
#define kvmalloc(size, flags) kmalloc(size, flags)
|
||||
#define kvmalloc_noprof(size, flags) kmalloc(size, flags)
|
||||
#define kvzalloc(size, flags) kzalloc(size, flags)
|
||||
#define kvfree(p) kfree(p)
|
||||
|
||||
@ -274,6 +277,8 @@ static inline void *vmalloc(unsigned long size)
|
||||
return __vmalloc(size, GFP_KERNEL);
|
||||
}
|
||||
|
||||
#define vmalloc_noprof(...) vmalloc(__VA_ARGS__)
|
||||
|
||||
static inline void *vzalloc(unsigned long size)
|
||||
{
|
||||
return __vmalloc(size, GFP_KERNEL|__GFP_ZERO);
|
||||
|
@ -7,6 +7,11 @@
|
||||
|
||||
typedef void (*rcu_callback_t)(struct rcu_head *head);
|
||||
|
||||
static inline struct urcu_gp_poll_state get_state_synchronize_rcu()
|
||||
{
|
||||
return start_poll_synchronize_rcu();
|
||||
}
|
||||
|
||||
struct srcu_struct {
|
||||
};
|
||||
|
||||
@ -17,36 +22,19 @@ static inline int srcu_read_lock(struct srcu_struct *ssp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
|
||||
static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, struct urcu_gp_poll_state cookie)
|
||||
{
|
||||
return true;
|
||||
return poll_state_synchronize_rcu(cookie);
|
||||
}
|
||||
|
||||
static inline unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp)
|
||||
static inline struct urcu_gp_poll_state start_poll_synchronize_srcu(struct srcu_struct *ssp)
|
||||
{
|
||||
return 0;
|
||||
return start_poll_synchronize_rcu();
|
||||
}
|
||||
|
||||
static inline unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp)
|
||||
static inline struct urcu_gp_poll_state get_state_synchronize_srcu(struct srcu_struct *ssp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#undef poll_state_synchronize_rcu
|
||||
static inline bool poll_state_synchronize_rcu(unsigned long cookie)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#undef start_poll_synchronize_rcu
|
||||
static inline unsigned long start_poll_synchronize_rcu()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned long get_state_synchronize_rcu()
|
||||
{
|
||||
return 0;
|
||||
return get_state_synchronize_rcu();
|
||||
}
|
||||
|
||||
static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) {}
|
||||
|
85
include/linux/string_choices.h
Normal file
85
include/linux/string_choices.h
Normal file
@ -0,0 +1,85 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_STRING_CHOICES_H_
|
||||
#define _LINUX_STRING_CHOICES_H_
|
||||
|
||||
/*
|
||||
* Here provide a series of helpers in the str_$TRUE_$FALSE format (you can
|
||||
* also expand some helpers as needed), where $TRUE and $FALSE are their
|
||||
* corresponding literal strings. These helpers can be used in the printing
|
||||
* and also in other places where constant strings are required. Using these
|
||||
* helpers offers the following benefits:
|
||||
* 1) Reducing the hardcoding of strings, which makes the code more elegant
|
||||
* through these simple literal-meaning helpers.
|
||||
* 2) Unifying the output, which prevents the same string from being printed
|
||||
* in various forms, such as enable/disable, enabled/disabled, en/dis.
|
||||
* 3) Deduping by the linker, which results in a smaller binary file.
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
static inline const char *str_enable_disable(bool v)
|
||||
{
|
||||
return v ? "enable" : "disable";
|
||||
}
|
||||
#define str_disable_enable(v) str_enable_disable(!(v))
|
||||
|
||||
static inline const char *str_enabled_disabled(bool v)
|
||||
{
|
||||
return v ? "enabled" : "disabled";
|
||||
}
|
||||
#define str_disabled_enabled(v) str_enabled_disabled(!(v))
|
||||
|
||||
static inline const char *str_hi_lo(bool v)
|
||||
{
|
||||
return v ? "hi" : "lo";
|
||||
}
|
||||
#define str_lo_hi(v) str_hi_lo(!(v))
|
||||
|
||||
static inline const char *str_high_low(bool v)
|
||||
{
|
||||
return v ? "high" : "low";
|
||||
}
|
||||
#define str_low_high(v) str_high_low(!(v))
|
||||
|
||||
static inline const char *str_read_write(bool v)
|
||||
{
|
||||
return v ? "read" : "write";
|
||||
}
|
||||
#define str_write_read(v) str_read_write(!(v))
|
||||
|
||||
static inline const char *str_on_off(bool v)
|
||||
{
|
||||
return v ? "on" : "off";
|
||||
}
|
||||
#define str_off_on(v) str_on_off(!(v))
|
||||
|
||||
static inline const char *str_yes_no(bool v)
|
||||
{
|
||||
return v ? "yes" : "no";
|
||||
}
|
||||
#define str_no_yes(v) str_yes_no(!(v))
|
||||
|
||||
static inline const char *str_up_down(bool v)
|
||||
{
|
||||
return v ? "up" : "down";
|
||||
}
|
||||
#define str_down_up(v) str_up_down(!(v))
|
||||
|
||||
static inline const char *str_true_false(bool v)
|
||||
{
|
||||
return v ? "true" : "false";
|
||||
}
|
||||
#define str_false_true(v) str_true_false(!(v))
|
||||
|
||||
/**
|
||||
* str_plural - Return the simple pluralization based on English counts
|
||||
* @num: Number used for deciding pluralization
|
||||
*
|
||||
* If @num is 1, returns empty string, otherwise returns "s".
|
||||
*/
|
||||
static inline const char *str_plural(size_t num)
|
||||
{
|
||||
return num == 1 ? "" : "s";
|
||||
}
|
||||
|
||||
#endif
|
@ -17,4 +17,15 @@ enum string_size_units {
|
||||
int string_get_size(u64 size, u64 blk_size, enum string_size_units units,
|
||||
char *buf, int len);
|
||||
|
||||
static inline void memcpy_and_pad(void *dest, size_t dest_len, const void *src,
|
||||
size_t count, int pad)
|
||||
{
|
||||
if (dest_len > count) {
|
||||
memcpy(dest, src, count);
|
||||
memset(dest + count, pad, dest_len - count);
|
||||
} else {
|
||||
memcpy(dest, src, dest_len);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
1
include/linux/unaligned.h
Normal file
1
include/linux/unaligned.h
Normal file
@ -0,0 +1 @@
|
||||
#include <asm/unaligned.h>
|
0
include/linux/unicode.h
Normal file
0
include/linux/unicode.h
Normal file
@ -96,6 +96,7 @@ do { \
|
||||
#define wait_event_freezable(wq, condition) ({wait_event(wq, condition); 0; })
|
||||
#define wait_event_killable(wq, condition) ({wait_event(wq, condition); 0; })
|
||||
#define wait_event_interruptible(wq, condition) ({wait_event(wq, condition); 0; })
|
||||
#define wait_event_state(wq, condition, state) ({wait_event(wq, condition); 0; })
|
||||
|
||||
#define __wait_event_timeout(wq, condition, timeout) \
|
||||
___wait_event(wq, ___wait_cond_timeout(condition), \
|
||||
|
@ -184,11 +184,6 @@ invalid:
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
#define acl_for_each_entry(acl, acl_e) \
|
||||
for (acl_e = acl->a_entries; \
|
||||
acl_e < acl->a_entries + acl->a_count; \
|
||||
acl_e++)
|
||||
|
||||
/*
|
||||
* Convert from in-memory to filesystem representation.
|
||||
*/
|
||||
@ -199,11 +194,11 @@ bch2_acl_to_xattr(struct btree_trans *trans,
|
||||
{
|
||||
struct bkey_i_xattr *xattr;
|
||||
bch_acl_header *acl_header;
|
||||
const struct posix_acl_entry *acl_e;
|
||||
const struct posix_acl_entry *acl_e, *pe;
|
||||
void *outptr;
|
||||
unsigned nr_short = 0, nr_long = 0, acl_len, u64s;
|
||||
|
||||
acl_for_each_entry(acl, acl_e) {
|
||||
FOREACH_ACL_ENTRY(acl_e, acl, pe) {
|
||||
switch (acl_e->e_tag) {
|
||||
case ACL_USER:
|
||||
case ACL_GROUP:
|
||||
@ -241,7 +236,7 @@ bch2_acl_to_xattr(struct btree_trans *trans,
|
||||
|
||||
outptr = (void *) acl_header + sizeof(*acl_header);
|
||||
|
||||
acl_for_each_entry(acl, acl_e) {
|
||||
FOREACH_ACL_ENTRY(acl_e, acl, pe) {
|
||||
bch_acl_entry *entry = outptr;
|
||||
|
||||
entry->e_tag = cpu_to_le16(acl_e->e_tag);
|
||||
@ -278,7 +273,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu)
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
|
||||
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
|
||||
struct btree_iter iter = { NULL };
|
||||
struct btree_iter iter = {};
|
||||
struct posix_acl *acl = NULL;
|
||||
|
||||
if (rcu)
|
||||
@ -349,7 +344,7 @@ int bch2_set_acl(struct mnt_idmap *idmap,
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct btree_iter inode_iter = { NULL };
|
||||
struct btree_iter inode_iter = {};
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct posix_acl *acl;
|
||||
umode_t mode;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -8,8 +8,6 @@
|
||||
#include "debug.h"
|
||||
#include "super.h"
|
||||
|
||||
enum bch_validate_flags;
|
||||
|
||||
/* How out of date a pointer gen is allowed to be: */
|
||||
#define BUCKET_GC_GEN_MAX 96U
|
||||
|
||||
@ -133,7 +131,7 @@ static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
|
||||
if (a.stripe)
|
||||
return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe;
|
||||
if (bch2_bucket_sectors_dirty(a))
|
||||
return data_type;
|
||||
return bucket_data_type(data_type);
|
||||
if (a.cached_sectors)
|
||||
return BCH_DATA_cached;
|
||||
if (BCH_ALLOC_V4_NEED_DISCARD(&a))
|
||||
@ -168,6 +166,9 @@ static inline bool data_type_movable(enum bch_data_type type)
|
||||
static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
|
||||
struct bch_dev *ca)
|
||||
{
|
||||
if (a.data_type >= BCH_DATA_NR)
|
||||
return 0;
|
||||
|
||||
if (!data_type_movable(a.data_type) ||
|
||||
!bch2_bucket_sectors_fragmented(ca, a))
|
||||
return 0;
|
||||
@ -242,10 +243,14 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s
|
||||
|
||||
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
|
||||
|
||||
int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
|
||||
int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
|
||||
int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
|
||||
int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
|
||||
int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
void bch2_alloc_v4_swab(struct bkey_s);
|
||||
void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
@ -279,7 +284,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
})
|
||||
|
||||
int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c,
|
||||
enum bch_validate_flags);
|
||||
struct bkey_validate_context);
|
||||
void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \
|
||||
@ -304,6 +309,8 @@ int bch2_alloc_key_to_dev_counters(struct btree_trans *, struct bch_dev *,
|
||||
int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
|
||||
struct bkey_s_c, struct bkey_s,
|
||||
enum btree_iter_update_trigger_flags);
|
||||
|
||||
int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *, bool);
|
||||
int bch2_check_alloc_info(struct bch_fs *);
|
||||
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
|
||||
void bch2_dev_do_discards(struct bch_dev *);
|
||||
@ -314,11 +321,11 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
|
||||
{
|
||||
u64 want_free = ca->mi.nbuckets >> 7;
|
||||
u64 free = max_t(s64, 0,
|
||||
u.d[BCH_DATA_free].buckets
|
||||
+ u.d[BCH_DATA_need_discard].buckets
|
||||
u.buckets[BCH_DATA_free]
|
||||
+ u.buckets[BCH_DATA_need_discard]
|
||||
- bch2_dev_buckets_reserved(ca, BCH_WATERMARK_stripe));
|
||||
|
||||
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
|
||||
return clamp_t(s64, want_free - free, 0, u.buckets[BCH_DATA_cached]);
|
||||
}
|
||||
|
||||
void bch2_dev_do_invalidates(struct bch_dev *);
|
||||
|
@ -58,7 +58,7 @@ LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
|
||||
|
||||
struct bch_alloc_v4 {
|
||||
struct bch_val v;
|
||||
__u64 journal_seq;
|
||||
__u64 journal_seq_nonempty;
|
||||
__u32 flags;
|
||||
__u8 gen;
|
||||
__u8 oldest_gen;
|
||||
@ -70,7 +70,7 @@ struct bch_alloc_v4 {
|
||||
__u32 stripe;
|
||||
__u32 nr_external_backpointers;
|
||||
/* end of fields in original version of alloc_v4 */
|
||||
__u64 _fragmentation_lru; /* obsolete */
|
||||
__u64 journal_seq_empty;
|
||||
__u32 stripe_sectors;
|
||||
__u32 pad;
|
||||
} __packed __aligned(8);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -5,6 +5,7 @@
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_types.h"
|
||||
#include "extents.h"
|
||||
#include "io_write_types.h"
|
||||
#include "sb-members.h"
|
||||
|
||||
#include <linux/hash.h>
|
||||
@ -20,7 +21,53 @@ void bch2_reset_alloc_cursors(struct bch_fs *);
|
||||
|
||||
struct dev_alloc_list {
|
||||
unsigned nr;
|
||||
u8 devs[BCH_SB_MEMBERS_MAX];
|
||||
u8 data[BCH_SB_MEMBERS_MAX];
|
||||
};
|
||||
|
||||
struct alloc_request {
|
||||
unsigned nr_replicas;
|
||||
unsigned target;
|
||||
bool ec;
|
||||
enum bch_watermark watermark;
|
||||
enum bch_write_flags flags;
|
||||
enum bch_data_type data_type;
|
||||
struct bch_devs_list *devs_have;
|
||||
struct write_point *wp;
|
||||
|
||||
/* These fields are used primarily by open_bucket_add_buckets */
|
||||
struct open_buckets ptrs;
|
||||
unsigned nr_effective; /* sum of @ptrs durability */
|
||||
bool have_cache; /* have we allocated from a 0 durability dev */
|
||||
struct bch_devs_mask devs_may_alloc;
|
||||
|
||||
/* bch2_bucket_alloc_set_trans(): */
|
||||
struct bch_dev_usage usage;
|
||||
|
||||
/* bch2_bucket_alloc_trans(): */
|
||||
struct bch_dev *ca;
|
||||
|
||||
enum {
|
||||
BTREE_BITMAP_NO,
|
||||
BTREE_BITMAP_YES,
|
||||
BTREE_BITMAP_ANY,
|
||||
} btree_bitmap;
|
||||
|
||||
struct {
|
||||
u64 buckets_seen;
|
||||
u64 skipped_open;
|
||||
u64 skipped_need_journal_commit;
|
||||
u64 need_journal_commit;
|
||||
u64 skipped_nocow;
|
||||
u64 skipped_nouse;
|
||||
u64 skipped_mi_btree_bitmap;
|
||||
} counters;
|
||||
|
||||
unsigned scratch_nr_replicas;
|
||||
unsigned scratch_nr_effective;
|
||||
bool scratch_have_cache;
|
||||
enum bch_data_type scratch_data_type;
|
||||
struct open_buckets scratch_ptrs;
|
||||
struct bch_devs_mask scratch_devs_may_alloc;
|
||||
};
|
||||
|
||||
struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
|
||||
@ -28,13 +75,28 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
|
||||
struct bch_devs_mask *);
|
||||
void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
|
||||
|
||||
long bch2_bucket_alloc_new_fs(struct bch_dev *);
|
||||
|
||||
static inline struct bch_dev *ob_dev(struct bch_fs *c, struct open_bucket *ob)
|
||||
{
|
||||
return bch2_dev_have_ref(c, ob->dev);
|
||||
}
|
||||
|
||||
static inline unsigned bch2_open_buckets_reserved(enum bch_watermark watermark)
|
||||
{
|
||||
switch (watermark) {
|
||||
case BCH_WATERMARK_interior_updates:
|
||||
return 0;
|
||||
case BCH_WATERMARK_reclaim:
|
||||
return OPEN_BUCKETS_COUNT / 6;
|
||||
case BCH_WATERMARK_btree:
|
||||
case BCH_WATERMARK_btree_copygc:
|
||||
return OPEN_BUCKETS_COUNT / 4;
|
||||
case BCH_WATERMARK_copygc:
|
||||
return OPEN_BUCKETS_COUNT / 3;
|
||||
default:
|
||||
return OPEN_BUCKETS_COUNT / 2;
|
||||
}
|
||||
}
|
||||
|
||||
struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
|
||||
enum bch_watermark, enum bch_data_type,
|
||||
struct closure *);
|
||||
@ -67,7 +129,7 @@ static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
|
||||
}
|
||||
|
||||
void bch2_open_bucket_write_error(struct bch_fs *,
|
||||
struct open_buckets *, unsigned);
|
||||
struct open_buckets *, unsigned, int);
|
||||
|
||||
void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
|
||||
|
||||
@ -156,11 +218,8 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
|
||||
}
|
||||
|
||||
enum bch_write_flags;
|
||||
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
|
||||
struct dev_stripe_state *, struct bch_devs_mask *,
|
||||
unsigned, unsigned *, bool *, enum bch_write_flags,
|
||||
enum bch_data_type, enum bch_watermark,
|
||||
struct closure *);
|
||||
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct alloc_request *,
|
||||
struct dev_stripe_state *, struct closure *);
|
||||
|
||||
int bch2_alloc_sectors_start_trans(struct btree_trans *,
|
||||
unsigned, unsigned,
|
||||
|
@ -8,21 +8,6 @@
|
||||
#include "clock_types.h"
|
||||
#include "fifo.h"
|
||||
|
||||
struct bucket_alloc_state {
|
||||
enum {
|
||||
BTREE_BITMAP_NO,
|
||||
BTREE_BITMAP_YES,
|
||||
BTREE_BITMAP_ANY,
|
||||
} btree_bitmap;
|
||||
|
||||
u64 buckets_seen;
|
||||
u64 skipped_open;
|
||||
u64 skipped_need_journal_commit;
|
||||
u64 skipped_nocow;
|
||||
u64 skipped_nouse;
|
||||
u64 skipped_mi_btree_bitmap;
|
||||
};
|
||||
|
||||
#define BCH_WATERMARKS() \
|
||||
x(stripe) \
|
||||
x(normal) \
|
||||
@ -89,6 +74,7 @@ struct dev_stripe_state {
|
||||
x(stopped) \
|
||||
x(waiting_io) \
|
||||
x(waiting_work) \
|
||||
x(runnable) \
|
||||
x(running)
|
||||
|
||||
enum write_point_state {
|
||||
@ -124,6 +110,7 @@ struct write_point {
|
||||
enum write_point_state state;
|
||||
u64 last_state_change;
|
||||
u64 time[WRITE_POINT_STATE_NR];
|
||||
u64 last_runtime;
|
||||
} __aligned(SMP_CACHE_BYTES);
|
||||
};
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_H
|
||||
#define _BCACHEFS_BACKPOINTERS_H
|
||||
|
||||
#include "btree_cache.h"
|
||||
#include "btree_iter.h"
|
||||
@ -18,14 +18,14 @@ static inline u64 swab40(u64 x)
|
||||
((x & 0xff00000000ULL) >> 32));
|
||||
}
|
||||
|
||||
int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags);
|
||||
void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *);
|
||||
void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k,
|
||||
struct bkey_validate_context);
|
||||
void bch2_backpointer_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_backpointer_swab(struct bkey_s);
|
||||
|
||||
#define bch2_bkey_ops_backpointer ((struct bkey_ops) { \
|
||||
.key_validate = bch2_backpointer_validate, \
|
||||
.val_to_text = bch2_backpointer_k_to_text, \
|
||||
.val_to_text = bch2_backpointer_to_text, \
|
||||
.swab = bch2_backpointer_swab, \
|
||||
.min_val_size = 32, \
|
||||
})
|
||||
@ -43,22 +43,24 @@ static inline struct bpos bp_pos_to_bucket(const struct bch_dev *ca, struct bpos
|
||||
return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector));
|
||||
}
|
||||
|
||||
static inline struct bpos bp_pos_to_bucket_and_offset(const struct bch_dev *ca, struct bpos bp_pos,
|
||||
u32 *bucket_offset)
|
||||
{
|
||||
u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
|
||||
|
||||
return POS(bp_pos.inode, sector_to_bucket_and_offset(ca, bucket_sector, bucket_offset));
|
||||
}
|
||||
|
||||
static inline bool bp_pos_to_bucket_nodev_noerror(struct bch_fs *c, struct bpos bp_pos, struct bpos *bucket)
|
||||
{
|
||||
rcu_read_lock();
|
||||
struct bch_dev *ca = bch2_dev_rcu(c, bp_pos.inode);
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp_pos.inode);
|
||||
if (ca)
|
||||
*bucket = bp_pos_to_bucket(ca, bp_pos);
|
||||
rcu_read_unlock();
|
||||
return ca != NULL;
|
||||
}
|
||||
|
||||
static inline bool bp_pos_to_bucket_nodev(struct bch_fs *c, struct bpos bp_pos, struct bpos *bucket)
|
||||
{
|
||||
return !bch2_fs_inconsistent_on(!bp_pos_to_bucket_nodev_noerror(c, bp_pos, bucket),
|
||||
c, "backpointer for missing device %llu", bp_pos.inode);
|
||||
}
|
||||
|
||||
static inline struct bpos bucket_pos_to_bp_noerror(const struct bch_dev *ca,
|
||||
struct bpos bucket,
|
||||
u64 bucket_offset)
|
||||
@ -80,31 +82,35 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_dev *ca,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bch_dev *,
|
||||
struct bpos bucket, struct bch_backpointer, struct bkey_s_c, bool);
|
||||
static inline struct bpos bucket_pos_to_bp_start(const struct bch_dev *ca, struct bpos bucket)
|
||||
{
|
||||
return bucket_pos_to_bp(ca, bucket, 0);
|
||||
}
|
||||
|
||||
static inline struct bpos bucket_pos_to_bp_end(const struct bch_dev *ca, struct bpos bucket)
|
||||
{
|
||||
return bpos_nosnap_predecessor(bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0));
|
||||
}
|
||||
|
||||
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *,
|
||||
struct bkey_s_c,
|
||||
struct bkey_i_backpointer *,
|
||||
bool);
|
||||
|
||||
static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct bpos bucket,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c orig_k,
|
||||
struct bkey_i_backpointer *bp,
|
||||
bool insert)
|
||||
{
|
||||
if (unlikely(bch2_backpointers_no_use_write_buffer))
|
||||
return bch2_bucket_backpointer_mod_nowritebuffer(trans, ca, bucket, bp, orig_k, insert);
|
||||
|
||||
struct bkey_i_backpointer bp_k;
|
||||
|
||||
bkey_backpointer_init(&bp_k.k_i);
|
||||
bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset);
|
||||
bp_k.v = bp;
|
||||
return bch2_bucket_backpointer_mod_nowritebuffer(trans, orig_k, bp, insert);
|
||||
|
||||
if (!insert) {
|
||||
bp_k.k.type = KEY_TYPE_deleted;
|
||||
set_bkey_val_u64s(&bp_k.k, 0);
|
||||
bp->k.type = KEY_TYPE_deleted;
|
||||
set_bkey_val_u64s(&bp->k, 0);
|
||||
}
|
||||
|
||||
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i);
|
||||
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp->k_i);
|
||||
}
|
||||
|
||||
static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
|
||||
@ -117,7 +123,12 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
|
||||
return BCH_DATA_btree;
|
||||
case KEY_TYPE_extent:
|
||||
case KEY_TYPE_reflink_v:
|
||||
return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user;
|
||||
if (p.has_ec)
|
||||
return BCH_DATA_stripe;
|
||||
if (p.ptr.cached)
|
||||
return BCH_DATA_cached;
|
||||
else
|
||||
return BCH_DATA_user;
|
||||
case KEY_TYPE_stripe: {
|
||||
const struct bch_extent_ptr *ptr = &entry->ptr;
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
@ -134,44 +145,42 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca,
|
||||
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||
const union bch_extent_entry *entry,
|
||||
struct bpos *bucket_pos, struct bch_backpointer *bp,
|
||||
u64 sectors)
|
||||
struct bkey_i_backpointer *bp)
|
||||
{
|
||||
u32 bucket_offset;
|
||||
*bucket_pos = PTR_BUCKET_POS_OFFSET(ca, &p.ptr, &bucket_offset);
|
||||
*bp = (struct bch_backpointer) {
|
||||
bkey_backpointer_init(&bp->k_i);
|
||||
bp->k.p.inode = p.ptr.dev;
|
||||
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
bp->k.p.offset = ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset;
|
||||
else {
|
||||
/*
|
||||
* Put stripe backpointers where they won't collide with the
|
||||
* extent backpointers within the stripe:
|
||||
*/
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
bp->k.p.offset = ((u64) (p.ptr.offset + le16_to_cpu(s.v->sectors)) <<
|
||||
MAX_EXTENT_COMPRESS_RATIO_SHIFT) - 1;
|
||||
}
|
||||
|
||||
bp->v = (struct bch_backpointer) {
|
||||
.btree_id = btree_id,
|
||||
.level = level,
|
||||
.data_type = bch2_bkey_ptr_data_type(k, p, entry),
|
||||
.bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) +
|
||||
p.crc.offset,
|
||||
.bucket_len = sectors,
|
||||
.bucket_gen = p.ptr.gen,
|
||||
.bucket_len = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p),
|
||||
.pos = k.k->p,
|
||||
};
|
||||
}
|
||||
|
||||
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||
const union bch_extent_entry *entry,
|
||||
struct bpos *bucket_pos, struct bch_backpointer *bp)
|
||||
{
|
||||
u64 sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p);
|
||||
|
||||
__bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, bucket_pos, bp, sectors);
|
||||
}
|
||||
|
||||
int bch2_get_next_backpointer(struct btree_trans *, struct bch_dev *ca, struct bpos, int,
|
||||
struct bpos *, struct bch_backpointer *, unsigned);
|
||||
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *,
|
||||
struct bpos, struct bch_backpointer,
|
||||
unsigned);
|
||||
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *,
|
||||
struct bpos, struct bch_backpointer);
|
||||
struct bkey_buf;
|
||||
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_backpointer,
|
||||
struct btree_iter *, unsigned, struct bkey_buf *);
|
||||
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer,
|
||||
struct btree_iter *, struct bkey_buf *);
|
||||
|
||||
int bch2_check_btree_backpointers(struct bch_fs *);
|
||||
int bch2_check_extents_to_backpointers(struct bch_fs *);
|
||||
|
@ -29,7 +29,7 @@ static inline struct bbpos bbpos_successor(struct bbpos pos)
|
||||
|
||||
static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos)
|
||||
{
|
||||
prt_str(out, bch2_btree_id_str(pos.btree));
|
||||
bch2_btree_id_to_text(out, pos.btree);
|
||||
prt_char(out, ':');
|
||||
bch2_bpos_to_text(out, pos.pos);
|
||||
}
|
||||
|
@ -203,8 +203,10 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/zstd.h>
|
||||
#include <linux/unicode.h>
|
||||
|
||||
#include "bcachefs_format.h"
|
||||
#include "btree_journal_iter_types.h"
|
||||
#include "disk_accounting_types.h"
|
||||
#include "errcode.h"
|
||||
#include "fifo.h"
|
||||
@ -293,6 +295,8 @@ do { \
|
||||
|
||||
#define bch_info(c, fmt, ...) \
|
||||
bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
|
||||
#define bch_info_ratelimited(c, fmt, ...) \
|
||||
bch2_print_ratelimited(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
|
||||
#define bch_notice(c, fmt, ...) \
|
||||
bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
|
||||
#define bch_warn(c, fmt, ...) \
|
||||
@ -352,6 +356,12 @@ do { \
|
||||
bch_info(c, fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define bch_verbose_ratelimited(c, fmt, ...) \
|
||||
do { \
|
||||
if ((c)->opts.verbose) \
|
||||
bch_info_ratelimited(c, fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define pr_verbose_init(opts, fmt, ...) \
|
||||
do { \
|
||||
if (opt_get(opts, verbose)) \
|
||||
@ -435,6 +445,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(btree_node_sort) \
|
||||
x(btree_node_read) \
|
||||
x(btree_node_read_done) \
|
||||
x(btree_node_write) \
|
||||
x(btree_interior_update_foreground) \
|
||||
x(btree_interior_update_total) \
|
||||
x(btree_gc) \
|
||||
@ -447,6 +458,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(blocked_journal_low_on_space) \
|
||||
x(blocked_journal_low_on_pin) \
|
||||
x(blocked_journal_max_in_flight) \
|
||||
x(blocked_journal_max_open) \
|
||||
x(blocked_key_cache_flush) \
|
||||
x(blocked_allocate) \
|
||||
x(blocked_allocate_open_bucket) \
|
||||
@ -512,8 +524,8 @@ struct bch_dev {
|
||||
struct percpu_ref ref;
|
||||
#endif
|
||||
struct completion ref_completion;
|
||||
struct percpu_ref io_ref;
|
||||
struct completion io_ref_completion;
|
||||
struct percpu_ref io_ref[2];
|
||||
struct completion io_ref_completion[2];
|
||||
|
||||
struct bch_fs *fs;
|
||||
|
||||
@ -524,6 +536,7 @@ struct bch_dev {
|
||||
*/
|
||||
struct bch_member_cpu mi;
|
||||
atomic64_t errors[BCH_MEMBER_ERROR_NR];
|
||||
unsigned long write_errors_start;
|
||||
|
||||
__uuid_t uuid;
|
||||
char name[BDEVNAME_SIZE];
|
||||
@ -538,23 +551,25 @@ struct bch_dev {
|
||||
|
||||
/*
|
||||
* Buckets:
|
||||
* Per-bucket arrays are protected by c->mark_lock, bucket_lock and
|
||||
* gc_gens_lock, for device resize - holding any is sufficient for
|
||||
* access: Or rcu_read_lock(), but only for dev_ptr_stale():
|
||||
* Per-bucket arrays are protected by either rcu_read_lock or
|
||||
* state_lock, for device resize.
|
||||
*/
|
||||
GENRADIX(struct bucket) buckets_gc;
|
||||
struct bucket_gens __rcu *bucket_gens;
|
||||
u8 *oldest_gen;
|
||||
unsigned long *buckets_nouse;
|
||||
struct rw_semaphore bucket_lock;
|
||||
|
||||
struct bch_dev_usage __percpu *usage;
|
||||
unsigned long *bucket_backpointer_mismatches;
|
||||
unsigned long *bucket_backpointer_empty;
|
||||
|
||||
struct bch_dev_usage_full __percpu
|
||||
*usage;
|
||||
|
||||
/* Allocator: */
|
||||
u64 new_fs_bucket_idx;
|
||||
u64 alloc_cursor[3];
|
||||
|
||||
unsigned nr_open_buckets;
|
||||
unsigned nr_partial_buckets;
|
||||
unsigned nr_btree_reserve;
|
||||
|
||||
size_t inc_gen_needs_gc;
|
||||
@ -599,12 +614,14 @@ struct bch_dev {
|
||||
x(accounting_replay_done) \
|
||||
x(may_go_rw) \
|
||||
x(rw) \
|
||||
x(rw_init_done) \
|
||||
x(was_rw) \
|
||||
x(stopping) \
|
||||
x(emergency_ro) \
|
||||
x(going_ro) \
|
||||
x(write_disable_complete) \
|
||||
x(clean_shutdown) \
|
||||
x(recovery_running) \
|
||||
x(fsck_running) \
|
||||
x(initial_gc_unfixed) \
|
||||
x(need_delete_dead_snapshots) \
|
||||
@ -612,7 +629,8 @@ struct bch_dev {
|
||||
x(topology_error) \
|
||||
x(errors_fixed) \
|
||||
x(errors_not_fixed) \
|
||||
x(no_invalid_checks)
|
||||
x(no_invalid_checks) \
|
||||
x(discard_mount_opt_set) \
|
||||
|
||||
enum bch_fs_flags {
|
||||
#define x(n) BCH_FS_##n,
|
||||
@ -633,6 +651,9 @@ struct btree_transaction_stats {
|
||||
unsigned nr_max_paths;
|
||||
unsigned journal_entries_size;
|
||||
unsigned max_mem;
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_trans_kmalloc_trace trans_kmalloc_trace;
|
||||
#endif
|
||||
char *max_paths_text;
|
||||
};
|
||||
|
||||
@ -649,28 +670,6 @@ struct journal_seq_blacklist_table {
|
||||
} entries[];
|
||||
};
|
||||
|
||||
struct journal_keys {
|
||||
/* must match layout in darray_types.h */
|
||||
size_t nr, size;
|
||||
struct journal_key {
|
||||
u64 journal_seq;
|
||||
u32 journal_offset;
|
||||
enum btree_id btree_id:8;
|
||||
unsigned level:8;
|
||||
bool allocated;
|
||||
bool overwritten;
|
||||
struct bkey_i *k;
|
||||
} *data;
|
||||
/*
|
||||
* Gap buffer: instead of all the empty space in the array being at the
|
||||
* end of the buffer - from @nr to @size - the empty space is at @gap.
|
||||
* This means that sequential insertions are O(n) instead of O(n^2).
|
||||
*/
|
||||
size_t gap;
|
||||
atomic_t ref;
|
||||
bool initial_ref_held;
|
||||
};
|
||||
|
||||
struct btree_trans_buf {
|
||||
struct btree_trans *trans;
|
||||
};
|
||||
@ -679,6 +678,7 @@ struct btree_trans_buf {
|
||||
((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
|
||||
|
||||
#define BCH_WRITE_REFS() \
|
||||
x(journal) \
|
||||
x(trans) \
|
||||
x(write) \
|
||||
x(promote) \
|
||||
@ -691,12 +691,14 @@ struct btree_trans_buf {
|
||||
x(dio_write) \
|
||||
x(discard) \
|
||||
x(discard_fast) \
|
||||
x(check_discard_freespace_key) \
|
||||
x(invalidate) \
|
||||
x(delete_dead_snapshots) \
|
||||
x(gc_gens) \
|
||||
x(snapshot_delete_pagecache) \
|
||||
x(sysfs) \
|
||||
x(btree_write_buffer)
|
||||
x(btree_write_buffer) \
|
||||
x(btree_node_scrub)
|
||||
|
||||
enum bch_write_ref {
|
||||
#define x(n) BCH_WRITE_REF_##n,
|
||||
@ -705,6 +707,8 @@ enum bch_write_ref {
|
||||
BCH_WRITE_REF_NR,
|
||||
};
|
||||
|
||||
#define BCH_FS_DEFAULT_UTF8_ENCODING UNICODE_AGE(12, 1, 0)
|
||||
|
||||
struct bch_fs {
|
||||
struct closure cl;
|
||||
|
||||
@ -733,6 +737,12 @@ struct bch_fs {
|
||||
#else
|
||||
struct percpu_ref writes;
|
||||
#endif
|
||||
/*
|
||||
* Certain operations are only allowed in single threaded mode, during
|
||||
* recovery, and we want to assert that this is the case:
|
||||
*/
|
||||
struct task_struct *recovery_task;
|
||||
|
||||
/*
|
||||
* Analagous to c->writes, for asynchronous ops that don't necessarily
|
||||
* need fs to be read-write
|
||||
@ -763,6 +773,8 @@ struct bch_fs {
|
||||
__uuid_t user_uuid;
|
||||
|
||||
u16 version;
|
||||
u16 version_incompat;
|
||||
u16 version_incompat_allowed;
|
||||
u16 version_min;
|
||||
u16 version_upgrade_complete;
|
||||
|
||||
@ -781,6 +793,9 @@ struct bch_fs {
|
||||
u64 btrees_lost_data;
|
||||
} sb;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
struct unicode_map *cf_encoding;
|
||||
#endif
|
||||
|
||||
struct bch_sb_handle disk_sb;
|
||||
|
||||
@ -833,9 +848,10 @@ struct bch_fs {
|
||||
struct work_struct btree_interior_update_work;
|
||||
|
||||
struct workqueue_struct *btree_node_rewrite_worker;
|
||||
|
||||
struct list_head pending_node_rewrites;
|
||||
struct mutex pending_node_rewrites_lock;
|
||||
struct list_head btree_node_rewrites;
|
||||
struct list_head btree_node_rewrites_pending;
|
||||
spinlock_t btree_node_rewrites_lock;
|
||||
struct closure_waitlist btree_node_rewrites_wait;
|
||||
|
||||
/* btree_io.c: */
|
||||
spinlock_t btree_write_error_lock;
|
||||
@ -860,7 +876,7 @@ struct bch_fs {
|
||||
struct btree_write_buffer btree_write_buffer;
|
||||
|
||||
struct workqueue_struct *btree_update_wq;
|
||||
struct workqueue_struct *btree_io_complete_wq;
|
||||
struct workqueue_struct *btree_write_complete_wq;
|
||||
/* copygc needs its own workqueue for index updates.. */
|
||||
struct workqueue_struct *copygc_wq;
|
||||
/*
|
||||
@ -966,11 +982,9 @@ struct bch_fs {
|
||||
struct rhashtable promote_table;
|
||||
|
||||
mempool_t compression_bounce[2];
|
||||
mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR];
|
||||
mempool_t decompress_workspace;
|
||||
mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR];
|
||||
size_t zstd_workspace_size;
|
||||
|
||||
struct crypto_shash *sha256;
|
||||
struct crypto_sync_skcipher *chacha20;
|
||||
struct crypto_shash *poly1305;
|
||||
|
||||
@ -994,15 +1008,11 @@ struct bch_fs {
|
||||
wait_queue_head_t copygc_running_wq;
|
||||
|
||||
/* STRIPES: */
|
||||
GENRADIX(struct stripe) stripes;
|
||||
GENRADIX(struct gc_stripe) gc_stripes;
|
||||
|
||||
struct hlist_head ec_stripes_new[32];
|
||||
spinlock_t ec_stripes_new_lock;
|
||||
|
||||
ec_stripes_heap ec_stripes_heap;
|
||||
struct mutex ec_stripes_heap_lock;
|
||||
|
||||
/* ERASURE CODING */
|
||||
struct list_head ec_stripe_head_list;
|
||||
struct mutex ec_stripe_head_lock;
|
||||
@ -1026,6 +1036,7 @@ struct bch_fs {
|
||||
struct list_head vfs_inodes_list;
|
||||
struct mutex vfs_inodes_lock;
|
||||
struct rhashtable vfs_inodes_table;
|
||||
struct rhltable vfs_inodes_by_inum_table;
|
||||
|
||||
/* VFS IO PATH - fs-io.c */
|
||||
struct bio_set writepage_bioset;
|
||||
@ -1047,10 +1058,12 @@ struct bch_fs {
|
||||
* for signaling to the toplevel code which pass we want to run now.
|
||||
*/
|
||||
enum bch_recovery_pass curr_recovery_pass;
|
||||
enum bch_recovery_pass next_recovery_pass;
|
||||
/* bitmask of recovery passes that we actually ran */
|
||||
u64 recovery_passes_complete;
|
||||
/* never rewinds version of curr_recovery_pass */
|
||||
enum bch_recovery_pass recovery_pass_done;
|
||||
spinlock_t recovery_pass_lock;
|
||||
struct semaphore online_fsck_mutex;
|
||||
|
||||
/* DEBUG JUNK */
|
||||
@ -1061,9 +1074,6 @@ struct bch_fs {
|
||||
struct btree_node *verify_ondisk;
|
||||
struct mutex verify_lock;
|
||||
|
||||
u64 *unused_inode_hints;
|
||||
unsigned inode_shard_bits;
|
||||
|
||||
/*
|
||||
* A btree node on disk could have too many bsets for an iterator to fit
|
||||
* on the stack - have to dynamically allocate them
|
||||
@ -1085,8 +1095,6 @@ struct bch_fs {
|
||||
u64 counters_on_mount[BCH_COUNTER_NR];
|
||||
u64 __percpu *counters;
|
||||
|
||||
unsigned copy_gc_enabled:1;
|
||||
|
||||
struct bch2_time_stats times[BCH_TIME_STAT_NR];
|
||||
|
||||
struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
|
||||
|
@ -418,7 +418,8 @@ static inline void bkey_init(struct bkey *k)
|
||||
x(snapshot_tree, 31) \
|
||||
x(logged_op_truncate, 32) \
|
||||
x(logged_op_finsert, 33) \
|
||||
x(accounting, 34)
|
||||
x(accounting, 34) \
|
||||
x(inode_alloc_cursor, 35)
|
||||
|
||||
enum bch_bkey_type {
|
||||
#define x(name, nr) KEY_TYPE_##name = nr,
|
||||
@ -463,7 +464,8 @@ struct bch_backpointer {
|
||||
__u8 btree_id;
|
||||
__u8 level;
|
||||
__u8 data_type;
|
||||
__u64 bucket_offset:40;
|
||||
__u8 bucket_gen;
|
||||
__u32 pad;
|
||||
__u32 bucket_len;
|
||||
struct bpos pos;
|
||||
} __packed __aligned(8);
|
||||
@ -499,8 +501,6 @@ struct bch_sb_field {
|
||||
#include "disk_groups_format.h"
|
||||
#include "extents_format.h"
|
||||
#include "ec_format.h"
|
||||
#include "dirent_format.h"
|
||||
#include "disk_groups_format.h"
|
||||
#include "inode_format.h"
|
||||
#include "journal_seq_blacklist_format.h"
|
||||
#include "logged_ops_format.h"
|
||||
@ -679,7 +679,19 @@ struct bch_sb_field_ext {
|
||||
x(disk_accounting_v3, BCH_VERSION(1, 10)) \
|
||||
x(disk_accounting_inum, BCH_VERSION(1, 11)) \
|
||||
x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \
|
||||
x(inode_has_child_snapshots, BCH_VERSION(1, 13))
|
||||
x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \
|
||||
x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \
|
||||
x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \
|
||||
x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \
|
||||
x(inode_depth, BCH_VERSION(1, 17)) \
|
||||
x(persistent_inode_cursors, BCH_VERSION(1, 18)) \
|
||||
x(autofix_errors, BCH_VERSION(1, 19)) \
|
||||
x(directory_size, BCH_VERSION(1, 20)) \
|
||||
x(cached_backpointers, BCH_VERSION(1, 21)) \
|
||||
x(stripe_backpointers, BCH_VERSION(1, 22)) \
|
||||
x(stripe_lru, BCH_VERSION(1, 23)) \
|
||||
x(casefolding, BCH_VERSION(1, 24)) \
|
||||
x(extent_flags, BCH_VERSION(1, 25))
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
@ -830,6 +842,7 @@ LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29);
|
||||
LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
|
||||
LE64_BITMASK(BCH_SB_SINGLE_DEVICE, struct bch_sb, flags[3], 63, 64);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
|
||||
LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
|
||||
@ -844,6 +857,13 @@ LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
|
||||
struct bch_sb, flags[5], 0, 16);
|
||||
LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT,
|
||||
struct bch_sb, flags[5], 16, 32);
|
||||
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48);
|
||||
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
|
||||
struct bch_sb, flags[5], 48, 64);
|
||||
LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
|
||||
LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14);
|
||||
LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20);
|
||||
LE64_BITMASK(BCH_SB_DEGRADED_ACTION, struct bch_sb, flags[6], 20, 22);
|
||||
|
||||
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
|
||||
{
|
||||
@ -896,21 +916,24 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u
|
||||
x(new_varint, 15) \
|
||||
x(journal_no_flush, 16) \
|
||||
x(alloc_v2, 17) \
|
||||
x(extents_across_btree_nodes, 18)
|
||||
x(extents_across_btree_nodes, 18) \
|
||||
x(incompat_version_field, 19) \
|
||||
x(casefolding, 20)
|
||||
|
||||
#define BCH_SB_FEATURES_ALWAYS \
|
||||
((1ULL << BCH_FEATURE_new_extent_overwrite)| \
|
||||
(1ULL << BCH_FEATURE_extents_above_btree_updates)|\
|
||||
(1ULL << BCH_FEATURE_btree_updates_journalled)|\
|
||||
(1ULL << BCH_FEATURE_alloc_v2)|\
|
||||
(1ULL << BCH_FEATURE_extents_across_btree_nodes))
|
||||
(BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \
|
||||
BIT_ULL(BCH_FEATURE_extents_above_btree_updates)|\
|
||||
BIT_ULL(BCH_FEATURE_btree_updates_journalled)|\
|
||||
BIT_ULL(BCH_FEATURE_alloc_v2)|\
|
||||
BIT_ULL(BCH_FEATURE_extents_across_btree_nodes))
|
||||
|
||||
#define BCH_SB_FEATURES_ALL \
|
||||
(BCH_SB_FEATURES_ALWAYS| \
|
||||
(1ULL << BCH_FEATURE_new_siphash)| \
|
||||
(1ULL << BCH_FEATURE_btree_ptr_v2)| \
|
||||
(1ULL << BCH_FEATURE_new_varint)| \
|
||||
(1ULL << BCH_FEATURE_journal_no_flush))
|
||||
BIT_ULL(BCH_FEATURE_new_siphash)| \
|
||||
BIT_ULL(BCH_FEATURE_btree_ptr_v2)| \
|
||||
BIT_ULL(BCH_FEATURE_new_varint)| \
|
||||
BIT_ULL(BCH_FEATURE_journal_no_flush)| \
|
||||
BIT_ULL(BCH_FEATURE_incompat_version_field))
|
||||
|
||||
enum bch_sb_feature {
|
||||
#define x(f, n) BCH_FEATURE_##f,
|
||||
@ -962,6 +985,19 @@ enum bch_error_actions {
|
||||
BCH_ON_ERROR_NR
|
||||
};
|
||||
|
||||
#define BCH_DEGRADED_ACTIONS() \
|
||||
x(ask, 0) \
|
||||
x(yes, 1) \
|
||||
x(very, 2) \
|
||||
x(no, 3)
|
||||
|
||||
enum bch_degraded_actions {
|
||||
#define x(t, n) BCH_DEGRADED_##t = n,
|
||||
BCH_DEGRADED_ACTIONS()
|
||||
#undef x
|
||||
BCH_DEGRADED_ACTIONS_NR
|
||||
};
|
||||
|
||||
#define BCH_STR_HASH_TYPES() \
|
||||
x(crc32c, 0) \
|
||||
x(crc64, 1) \
|
||||
@ -1032,7 +1068,7 @@ static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type)
|
||||
x(crc64, 2) \
|
||||
x(xxhash, 3)
|
||||
|
||||
enum bch_csum_opts {
|
||||
enum bch_csum_opt {
|
||||
#define x(t, n) BCH_CSUM_OPT_##t = n,
|
||||
BCH_CSUM_OPTS()
|
||||
#undef x
|
||||
@ -1121,7 +1157,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
|
||||
x(log, 9) \
|
||||
x(overwrite, 10) \
|
||||
x(write_buffer_keys, 11) \
|
||||
x(datetime, 12)
|
||||
x(datetime, 12) \
|
||||
x(log_bkey, 13)
|
||||
|
||||
enum bch_jset_entry_type {
|
||||
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
|
||||
@ -1221,6 +1258,15 @@ struct jset_entry_log {
|
||||
u8 d[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
static inline unsigned jset_entry_log_msg_bytes(struct jset_entry_log *l)
|
||||
{
|
||||
unsigned b = vstruct_bytes(&l->entry) - offsetof(struct jset_entry_log, d);
|
||||
|
||||
while (b && !l->d[b - 1])
|
||||
--b;
|
||||
return b;
|
||||
}
|
||||
|
||||
struct jset_entry_datetime {
|
||||
struct jset_entry entry;
|
||||
__le64 seconds;
|
||||
@ -1268,14 +1314,18 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6);
|
||||
/* Btree: */
|
||||
|
||||
enum btree_id_flags {
|
||||
BTREE_ID_EXTENTS = BIT(0),
|
||||
BTREE_ID_SNAPSHOTS = BIT(1),
|
||||
BTREE_ID_SNAPSHOT_FIELD = BIT(2),
|
||||
BTREE_ID_DATA = BIT(3),
|
||||
BTREE_IS_extents = BIT(0),
|
||||
BTREE_IS_snapshots = BIT(1),
|
||||
BTREE_IS_snapshot_field = BIT(2),
|
||||
BTREE_IS_data = BIT(3),
|
||||
BTREE_IS_write_buffer = BIT(4),
|
||||
};
|
||||
|
||||
#define BCH_BTREE_IDS() \
|
||||
x(extents, 0, BTREE_ID_EXTENTS|BTREE_ID_SNAPSHOTS|BTREE_ID_DATA,\
|
||||
x(extents, 0, \
|
||||
BTREE_IS_extents| \
|
||||
BTREE_IS_snapshots| \
|
||||
BTREE_IS_data, \
|
||||
BIT_ULL(KEY_TYPE_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_error)| \
|
||||
BIT_ULL(KEY_TYPE_cookie)| \
|
||||
@ -1283,17 +1333,20 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_reservation)| \
|
||||
BIT_ULL(KEY_TYPE_reflink_p)| \
|
||||
BIT_ULL(KEY_TYPE_inline_data)) \
|
||||
x(inodes, 1, BTREE_ID_SNAPSHOTS, \
|
||||
x(inodes, 1, \
|
||||
BTREE_IS_snapshots, \
|
||||
BIT_ULL(KEY_TYPE_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_inode)| \
|
||||
BIT_ULL(KEY_TYPE_inode_v2)| \
|
||||
BIT_ULL(KEY_TYPE_inode_v3)| \
|
||||
BIT_ULL(KEY_TYPE_inode_generation)) \
|
||||
x(dirents, 2, BTREE_ID_SNAPSHOTS, \
|
||||
x(dirents, 2, \
|
||||
BTREE_IS_snapshots, \
|
||||
BIT_ULL(KEY_TYPE_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_hash_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_dirent)) \
|
||||
x(xattrs, 3, BTREE_ID_SNAPSHOTS, \
|
||||
x(xattrs, 3, \
|
||||
BTREE_IS_snapshots, \
|
||||
BIT_ULL(KEY_TYPE_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_cookie)| \
|
||||
BIT_ULL(KEY_TYPE_hash_whiteout)| \
|
||||
@ -1307,7 +1360,9 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_quota)) \
|
||||
x(stripes, 6, 0, \
|
||||
BIT_ULL(KEY_TYPE_stripe)) \
|
||||
x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \
|
||||
x(reflink, 7, \
|
||||
BTREE_IS_extents| \
|
||||
BTREE_IS_data, \
|
||||
BIT_ULL(KEY_TYPE_reflink_v)| \
|
||||
BIT_ULL(KEY_TYPE_indirect_inline_data)| \
|
||||
BIT_ULL(KEY_TYPE_error)) \
|
||||
@ -1315,28 +1370,38 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_subvolume)) \
|
||||
x(snapshots, 9, 0, \
|
||||
BIT_ULL(KEY_TYPE_snapshot)) \
|
||||
x(lru, 10, 0, \
|
||||
x(lru, 10, \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(freespace, 11, BTREE_ID_EXTENTS, \
|
||||
x(freespace, 11, \
|
||||
BTREE_IS_extents, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(need_discard, 12, 0, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(backpointers, 13, 0, \
|
||||
x(backpointers, 13, \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_backpointer)) \
|
||||
x(bucket_gens, 14, 0, \
|
||||
BIT_ULL(KEY_TYPE_bucket_gens)) \
|
||||
x(snapshot_trees, 15, 0, \
|
||||
BIT_ULL(KEY_TYPE_snapshot_tree)) \
|
||||
x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
x(deleted_inodes, 16, \
|
||||
BTREE_IS_snapshot_field| \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(logged_ops, 17, 0, \
|
||||
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
|
||||
BIT_ULL(KEY_TYPE_logged_op_finsert)) \
|
||||
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
BIT_ULL(KEY_TYPE_logged_op_finsert)| \
|
||||
BIT_ULL(KEY_TYPE_inode_alloc_cursor)) \
|
||||
x(rebalance_work, 18, \
|
||||
BTREE_IS_snapshot_field| \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \
|
||||
x(subvolume_children, 19, 0, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(accounting, 20, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
x(accounting, 20, \
|
||||
BTREE_IS_snapshot_field| \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_accounting)) \
|
||||
|
||||
enum btree_id {
|
||||
@ -1361,6 +1426,8 @@ static inline bool btree_id_is_alloc(enum btree_id id)
|
||||
case BTREE_ID_need_discard:
|
||||
case BTREE_ID_freespace:
|
||||
case BTREE_ID_bucket_gens:
|
||||
case BTREE_ID_lru:
|
||||
case BTREE_ID_accounting:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -87,6 +87,7 @@ struct bch_ioctl_incremental {
|
||||
#define BCH_IOCTL_FSCK_OFFLINE _IOW(0xbc, 19, struct bch_ioctl_fsck_offline)
|
||||
#define BCH_IOCTL_FSCK_ONLINE _IOW(0xbc, 20, struct bch_ioctl_fsck_online)
|
||||
#define BCH_IOCTL_QUERY_ACCOUNTING _IOW(0xbc, 21, struct bch_ioctl_query_accounting)
|
||||
#define BCH_IOCTL_QUERY_COUNTERS _IOW(0xbc, 21, struct bch_ioctl_query_counters)
|
||||
|
||||
/* ioctl below act on a particular file, not the filesystem as a whole: */
|
||||
|
||||
@ -213,6 +214,10 @@ struct bch_ioctl_data {
|
||||
struct bpos end_pos;
|
||||
|
||||
union {
|
||||
struct {
|
||||
__u32 dev;
|
||||
__u32 data_types;
|
||||
} scrub;
|
||||
struct {
|
||||
__u32 dev;
|
||||
__u32 pad;
|
||||
@ -229,6 +234,11 @@ enum bch_data_event {
|
||||
BCH_DATA_EVENT_NR = 1,
|
||||
};
|
||||
|
||||
enum data_progress_data_type_special {
|
||||
DATA_PROGRESS_DATA_TYPE_phys = 254,
|
||||
DATA_PROGRESS_DATA_TYPE_done = 255,
|
||||
};
|
||||
|
||||
struct bch_ioctl_data_progress {
|
||||
__u8 data_type;
|
||||
__u8 btree_id;
|
||||
@ -237,11 +247,19 @@ struct bch_ioctl_data_progress {
|
||||
|
||||
__u64 sectors_done;
|
||||
__u64 sectors_total;
|
||||
__u64 sectors_error_corrected;
|
||||
__u64 sectors_error_uncorrected;
|
||||
} __packed __aligned(8);
|
||||
|
||||
enum bch_ioctl_data_event_ret {
|
||||
BCH_IOCTL_DATA_EVENT_RET_done = 1,
|
||||
BCH_IOCTL_DATA_EVENT_RET_device_offline = 2,
|
||||
};
|
||||
|
||||
struct bch_ioctl_data_event {
|
||||
__u8 type;
|
||||
__u8 pad[7];
|
||||
__u8 ret;
|
||||
__u8 pad[6];
|
||||
union {
|
||||
struct bch_ioctl_data_progress p;
|
||||
__u64 pad2[15];
|
||||
@ -443,4 +461,13 @@ struct bch_ioctl_query_accounting {
|
||||
struct bkey_i_accounting accounting[];
|
||||
};
|
||||
|
||||
#define BCH_IOCTL_QUERY_COUNTERS_MOUNT (1 << 0)
|
||||
|
||||
struct bch_ioctl_query_counters {
|
||||
__u16 nr;
|
||||
__u16 flags;
|
||||
__u32 pad;
|
||||
__u64 d[];
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_IOCTL_H */
|
||||
|
@ -643,7 +643,7 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
|
||||
enum bch_validate_flags flags,
|
||||
struct printbuf *err)
|
||||
{
|
||||
unsigned i, bits = KEY_PACKED_BITS_START;
|
||||
unsigned bits = KEY_PACKED_BITS_START;
|
||||
|
||||
if (f->nr_fields != BKEY_NR_FIELDS) {
|
||||
prt_printf(err, "incorrect number of fields: got %u, should be %u",
|
||||
@ -655,9 +655,8 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
|
||||
* Verify that the packed format can't represent fields larger than the
|
||||
* unpacked format:
|
||||
*/
|
||||
for (i = 0; i < f->nr_fields; i++) {
|
||||
if ((!c || c->sb.version_min >= bcachefs_metadata_version_snapshot) &&
|
||||
bch2_bkey_format_field_overflows(f, i)) {
|
||||
for (unsigned i = 0; i < f->nr_fields; i++) {
|
||||
if (bch2_bkey_format_field_overflows(f, i)) {
|
||||
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
|
||||
u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
|
||||
unsigned packed_bits = min(64, f->bits_per_field[i]);
|
||||
|
@ -9,13 +9,6 @@
|
||||
#include "util.h"
|
||||
#include "vstructs.h"
|
||||
|
||||
enum bch_validate_flags {
|
||||
BCH_VALIDATE_write = BIT(0),
|
||||
BCH_VALIDATE_commit = BIT(1),
|
||||
BCH_VALIDATE_journal = BIT(2),
|
||||
BCH_VALIDATE_silent = BIT(3),
|
||||
};
|
||||
|
||||
#if 0
|
||||
|
||||
/*
|
||||
@ -198,6 +191,7 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
|
||||
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
|
||||
{
|
||||
return bpos_eq(l.k->p, r.k->p) &&
|
||||
l.k->size == r.k->size &&
|
||||
bkey_bytes(l.k) == bkey_bytes(r.k) &&
|
||||
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ const char * const bch2_bkey_types[] = {
|
||||
};
|
||||
|
||||
static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -42,7 +42,7 @@ static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
})
|
||||
|
||||
static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
@ -59,7 +59,7 @@ fsck_err:
|
||||
})
|
||||
|
||||
static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -83,7 +83,7 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
})
|
||||
|
||||
static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -124,7 +124,7 @@ const struct bkey_ops bch2_bkey_null_ops = {
|
||||
};
|
||||
|
||||
int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
if (test_bit(BCH_FS_no_invalid_checks, &c->flags))
|
||||
return 0;
|
||||
@ -140,7 +140,7 @@ int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
if (!ops->key_validate)
|
||||
return 0;
|
||||
|
||||
ret = ops->key_validate(c, k, flags);
|
||||
ret = ops->key_validate(c, k, from);
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
@ -161,9 +161,10 @@ const char *bch2_btree_node_type_str(enum btree_node_type type)
|
||||
}
|
||||
|
||||
int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum btree_node_type type,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
enum btree_node_type type = __btree_node_type(from.level, from.btree);
|
||||
|
||||
if (test_bit(BCH_FS_no_invalid_checks, &c->flags))
|
||||
return 0;
|
||||
|
||||
@ -177,7 +178,7 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
return 0;
|
||||
|
||||
bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX &&
|
||||
(type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) &&
|
||||
(type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) &&
|
||||
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)),
|
||||
c, bkey_invalid_type_for_btree,
|
||||
"invalid key type for btree %s (%s)",
|
||||
@ -228,15 +229,15 @@ fsck_err:
|
||||
}
|
||||
|
||||
int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum btree_node_type type,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
return __bch2_bkey_validate(c, k, type, flags) ?:
|
||||
bch2_bkey_val_validate(c, k, flags);
|
||||
return __bch2_bkey_validate(c, k, from) ?:
|
||||
bch2_bkey_val_validate(c, k, from);
|
||||
}
|
||||
|
||||
int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_s_c k, enum bch_validate_flags flags)
|
||||
struct bkey_s_c k,
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
|
@ -22,7 +22,7 @@ extern const struct bkey_ops bch2_bkey_null_ops;
|
||||
*/
|
||||
struct bkey_ops {
|
||||
int (*key_validate)(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags);
|
||||
struct bkey_validate_context from);
|
||||
void (*val_to_text)(struct printbuf *, struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
void (*swab)(struct bkey_s);
|
||||
@ -48,13 +48,14 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type)
|
||||
: &bch2_bkey_null_ops;
|
||||
}
|
||||
|
||||
int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
|
||||
int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
|
||||
enum bch_validate_flags);
|
||||
int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
|
||||
enum bch_validate_flags);
|
||||
int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c,
|
||||
enum bch_validate_flags);
|
||||
struct bkey_validate_context from);
|
||||
|
||||
void bch2_bpos_to_text(struct printbuf *, struct bpos);
|
||||
void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
|
||||
|
@ -210,4 +210,32 @@ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
|
||||
BCH_BKEY_TYPES();
|
||||
#undef x
|
||||
|
||||
enum bch_validate_flags {
|
||||
BCH_VALIDATE_write = BIT(0),
|
||||
BCH_VALIDATE_commit = BIT(1),
|
||||
BCH_VALIDATE_silent = BIT(2),
|
||||
};
|
||||
|
||||
#define BKEY_VALIDATE_CONTEXTS() \
|
||||
x(unknown) \
|
||||
x(superblock) \
|
||||
x(journal) \
|
||||
x(btree_root) \
|
||||
x(btree_node) \
|
||||
x(commit)
|
||||
|
||||
struct bkey_validate_context {
|
||||
enum {
|
||||
#define x(n) BKEY_VALIDATE_##n,
|
||||
BKEY_VALIDATE_CONTEXTS()
|
||||
#undef x
|
||||
} from:8;
|
||||
enum bch_validate_flags flags:8;
|
||||
u8 level;
|
||||
enum btree_id btree;
|
||||
bool root:1;
|
||||
unsigned journal_offset;
|
||||
u64 journal_seq;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_BKEY_TYPES_H */
|
||||
|
@ -13,7 +13,7 @@
|
||||
#include "trace.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/unaligned.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/prefetch.h>
|
||||
|
@ -24,7 +24,10 @@ do { \
|
||||
} while (0)
|
||||
|
||||
const char * const bch2_btree_node_flags[] = {
|
||||
#define x(f) #f,
|
||||
"typebit",
|
||||
"typebit",
|
||||
"typebit",
|
||||
#define x(f) [BTREE_NODE_##f] = #f,
|
||||
BTREE_FLAGS()
|
||||
#undef x
|
||||
NULL
|
||||
@ -59,16 +62,38 @@ static inline size_t btree_cache_can_free(struct btree_cache_list *list)
|
||||
|
||||
static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
|
||||
if (b->c.lock.readers)
|
||||
list_move(&b->list, &bc->freed_pcpu);
|
||||
list_add(&b->list, &bc->freed_pcpu);
|
||||
else
|
||||
list_move(&b->list, &bc->freed_nonpcpu);
|
||||
list_add(&b->list, &bc->freed_nonpcpu);
|
||||
}
|
||||
|
||||
static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
BUG_ON(!b->data);
|
||||
|
||||
bc->nr_freeable++;
|
||||
list_add(&b->list, &bc->freeable);
|
||||
}
|
||||
|
||||
void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
__bch2_btree_node_to_freelist(bc, b);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
}
|
||||
|
||||
static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
BUG_ON(btree_node_hashed(b));
|
||||
|
||||
/*
|
||||
@ -94,11 +119,17 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
#endif
|
||||
b->aux_data = NULL;
|
||||
|
||||
bc->nr_freeable--;
|
||||
|
||||
btree_node_to_freedlist(bc, b);
|
||||
}
|
||||
|
||||
static void btree_node_data_free(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(list_empty(&b->list));
|
||||
list_del_init(&b->list);
|
||||
--bc->nr_freeable;
|
||||
__btree_node_data_free(bc, b);
|
||||
}
|
||||
|
||||
static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
|
||||
const void *obj)
|
||||
{
|
||||
@ -172,23 +203,12 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bch2_btree_lock_init(&b->c, 0);
|
||||
bch2_btree_lock_init(&b->c, 0, GFP_KERNEL);
|
||||
|
||||
bc->nr_freeable++;
|
||||
list_add(&b->list, &bc->freeable);
|
||||
__bch2_btree_node_to_freelist(bc, b);
|
||||
return b;
|
||||
}
|
||||
|
||||
void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
list_move(&b->list, &c->btree_cache.freeable);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
}
|
||||
|
||||
static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
|
||||
@ -205,7 +225,6 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b)
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
BUG_ON(!__btree_node_pinned(bc, b));
|
||||
if (b != btree_node_root(c, b) && !btree_node_pinned(b)) {
|
||||
set_btree_node_pinned(b);
|
||||
list_move(&b->list, &bc->live[1].list);
|
||||
@ -236,11 +255,11 @@ void bch2_btree_cache_unpin(struct bch_fs *c)
|
||||
|
||||
/* Btree in memory cache - hash table */
|
||||
|
||||
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
lockdep_assert_held(&bc->lock);
|
||||
int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
|
||||
|
||||
int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
|
||||
BUG_ON(ret);
|
||||
|
||||
/* Cause future lookups for this node to fail: */
|
||||
@ -248,17 +267,22 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
|
||||
if (b->c.btree_id < BTREE_ID_NR)
|
||||
--bc->nr_by_btree[b->c.btree_id];
|
||||
--bc->live[btree_node_pinned(b)].nr;
|
||||
list_del_init(&b->list);
|
||||
}
|
||||
|
||||
bc->live[btree_node_pinned(b)].nr--;
|
||||
bc->nr_freeable++;
|
||||
list_move(&b->list, &bc->freeable);
|
||||
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
__bch2_btree_node_hash_remove(bc, b);
|
||||
__bch2_btree_node_to_freelist(bc, b);
|
||||
}
|
||||
|
||||
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
BUG_ON(b->hash_val);
|
||||
b->hash_val = btree_ptr_hash_val(&b->key);
|
||||
|
||||
b->hash_val = btree_ptr_hash_val(&b->key);
|
||||
int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash,
|
||||
bch_btree_cache_params);
|
||||
if (ret)
|
||||
@ -270,10 +294,8 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
||||
bool p = __btree_node_pinned(bc, b);
|
||||
mod_bit(BTREE_NODE_pinned, &b->flags, p);
|
||||
|
||||
list_move_tail(&b->list, &bc->live[p].list);
|
||||
list_add_tail(&b->list, &bc->live[p].list);
|
||||
bc->live[p].nr++;
|
||||
|
||||
bc->nr_freeable--;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -306,7 +328,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans,
|
||||
if (!IS_ERR_OR_NULL(b)) {
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
__bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
bkey_copy(&b->key, new);
|
||||
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
|
||||
@ -485,7 +507,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||
goto out;
|
||||
|
||||
if (!btree_node_reclaim(c, b, true)) {
|
||||
btree_node_data_free(c, b);
|
||||
btree_node_data_free(bc, b);
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
freed++;
|
||||
@ -501,10 +523,10 @@ restart:
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++;
|
||||
--touched;;
|
||||
} else if (!btree_node_reclaim(c, b, true)) {
|
||||
bch2_btree_node_hash_remove(bc, b);
|
||||
__bch2_btree_node_hash_remove(bc, b);
|
||||
__btree_node_data_free(bc, b);
|
||||
|
||||
freed++;
|
||||
btree_node_data_free(c, b);
|
||||
bc->nr_freed++;
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
@ -587,7 +609,8 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
BUG_ON(btree_node_read_in_flight(b) ||
|
||||
btree_node_write_in_flight(b));
|
||||
|
||||
btree_node_data_free(c, b);
|
||||
btree_node_data_free(bc, b);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
BUG_ON(!bch2_journal_error(&c->journal) &&
|
||||
@ -773,21 +796,22 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
|
||||
}
|
||||
|
||||
b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN);
|
||||
if (!b) {
|
||||
if (b) {
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_NOWAIT);
|
||||
} else {
|
||||
mutex_unlock(&bc->lock);
|
||||
bch2_trans_unlock(trans);
|
||||
b = __btree_node_mem_alloc(c, GFP_KERNEL);
|
||||
if (!b)
|
||||
goto err;
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL);
|
||||
mutex_lock(&bc->lock);
|
||||
}
|
||||
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0);
|
||||
|
||||
BUG_ON(!six_trylock_intent(&b->c.lock));
|
||||
BUG_ON(!six_trylock_write(&b->c.lock));
|
||||
got_node:
|
||||
|
||||
got_node:
|
||||
/*
|
||||
* btree_free() doesn't free memory; it sticks the node on the end of
|
||||
* the list. Check if there's any freed nodes there:
|
||||
@ -796,7 +820,12 @@ got_node:
|
||||
if (!btree_node_reclaim(c, b2, false)) {
|
||||
swap(b->data, b2->data);
|
||||
swap(b->aux_data, b2->aux_data);
|
||||
|
||||
list_del_init(&b2->list);
|
||||
--bc->nr_freeable;
|
||||
btree_node_to_freedlist(bc, b2);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
six_unlock_write(&b2->c.lock);
|
||||
six_unlock_intent(&b2->c.lock);
|
||||
goto got_mem;
|
||||
@ -810,11 +839,8 @@ got_node:
|
||||
goto err;
|
||||
}
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
bc->nr_freeable++;
|
||||
got_mem:
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
BUG_ON(btree_node_hashed(b));
|
||||
BUG_ON(btree_node_dirty(b));
|
||||
BUG_ON(btree_node_write_in_flight(b));
|
||||
@ -845,7 +871,7 @@ err:
|
||||
if (bc->alloc_lock == current) {
|
||||
b2 = btree_node_cannibalize(c);
|
||||
clear_btree_node_just_written(b2);
|
||||
bch2_btree_node_hash_remove(bc, b2);
|
||||
__bch2_btree_node_hash_remove(bc, b2);
|
||||
|
||||
if (b) {
|
||||
swap(b->data, b2->data);
|
||||
@ -855,9 +881,9 @@ err:
|
||||
six_unlock_intent(&b2->c.lock);
|
||||
} else {
|
||||
b = b2;
|
||||
list_del_init(&b->list);
|
||||
}
|
||||
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
trace_and_count(c, btree_cache_cannibalize, trans);
|
||||
@ -936,7 +962,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
|
||||
b->hash_val = 0;
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
list_add(&b->list, &bc->freeable);
|
||||
__bch2_btree_node_to_freelist(bc, b);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
@ -982,16 +1008,14 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
|
||||
return;
|
||||
|
||||
prt_printf(&buf,
|
||||
"btree node header doesn't match ptr\n"
|
||||
"btree %s level %u\n"
|
||||
"ptr: ",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level);
|
||||
"btree node header doesn't match ptr: ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_str(&buf, "\nptr: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
prt_printf(&buf, "\nheader: btree %s level %llu\n"
|
||||
"min ",
|
||||
bch2_btree_id_str(BTREE_NODE_ID(b->data)),
|
||||
BTREE_NODE_LEVEL(b->data));
|
||||
prt_str(&buf, "\nheader: ");
|
||||
bch2_btree_id_level_to_text(&buf, BTREE_NODE_ID(b->data), BTREE_NODE_LEVEL(b->data));
|
||||
prt_str(&buf, "\nmin ");
|
||||
bch2_bpos_to_text(&buf, b->data->min_key);
|
||||
|
||||
prt_printf(&buf, "\nmax ");
|
||||
@ -1111,7 +1135,7 @@ retry:
|
||||
|
||||
if (unlikely(btree_node_read_error(b))) {
|
||||
six_unlock_type(&b->c.lock, lock_type);
|
||||
return ERR_PTR(-BCH_ERR_btree_node_read_error);
|
||||
return ERR_PTR(-BCH_ERR_btree_node_read_err_cached);
|
||||
}
|
||||
|
||||
EBUG_ON(b->c.btree_id != path->btree_id);
|
||||
@ -1201,7 +1225,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
|
||||
|
||||
if (unlikely(btree_node_read_error(b))) {
|
||||
six_unlock_type(&b->c.lock, lock_type);
|
||||
return ERR_PTR(-BCH_ERR_btree_node_read_error);
|
||||
return ERR_PTR(-BCH_ERR_btree_node_read_err_cached);
|
||||
}
|
||||
|
||||
EBUG_ON(b->c.btree_id != path->btree_id);
|
||||
@ -1283,7 +1307,7 @@ lock_node:
|
||||
|
||||
if (unlikely(btree_node_read_error(b))) {
|
||||
six_unlock_read(&b->c.lock);
|
||||
b = ERR_PTR(-BCH_ERR_btree_node_read_error);
|
||||
b = ERR_PTR(-BCH_ERR_btree_node_read_err_cached);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1312,9 +1336,12 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
|
||||
|
||||
b = bch2_btree_node_fill(trans, path, k, btree_id,
|
||||
level, SIX_LOCK_read, false);
|
||||
if (!IS_ERR_OR_NULL(b))
|
||||
int ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (b)
|
||||
six_unlock_read(&b->c.lock);
|
||||
return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
|
||||
@ -1353,7 +1380,7 @@ wait_on_io:
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
bch2_btree_node_hash_remove(bc, b);
|
||||
btree_node_data_free(c, b);
|
||||
btree_node_data_free(bc, b);
|
||||
mutex_unlock(&bc->lock);
|
||||
out:
|
||||
six_unlock_write(&b->c.lock);
|
||||
@ -1373,13 +1400,31 @@ void bch2_btree_id_to_text(struct printbuf *out, enum btree_id btree)
|
||||
prt_printf(out, "(unknown btree %u)", btree);
|
||||
}
|
||||
|
||||
void bch2_btree_id_level_to_text(struct printbuf *out, enum btree_id btree, unsigned level)
|
||||
{
|
||||
prt_str(out, "btree=");
|
||||
bch2_btree_id_to_text(out, btree);
|
||||
prt_printf(out, " level=%u", level);
|
||||
}
|
||||
|
||||
void __bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
enum btree_id btree, unsigned level, struct bkey_s_c k)
|
||||
{
|
||||
bch2_btree_id_to_text(out, btree);
|
||||
prt_printf(out, " level %u/", level);
|
||||
struct btree_root *r = bch2_btree_id_root(c, btree);
|
||||
if (r)
|
||||
prt_printf(out, "%u", r->level);
|
||||
else
|
||||
prt_printf(out, "(unknown)");
|
||||
prt_newline(out);
|
||||
|
||||
bch2_bkey_val_to_text(out, c, k);
|
||||
}
|
||||
|
||||
void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
|
||||
{
|
||||
prt_printf(out, "%s level %u/%u\n ",
|
||||
bch2_btree_id_str(b->c.btree_id),
|
||||
b->c.level,
|
||||
bch2_btree_id_root(c, b->c.btree_id)->level);
|
||||
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
|
||||
__bch2_btree_pos_to_text(out, c, b->c.btree_id, b->c.level, bkey_i_to_s_c(&b->key));
|
||||
}
|
||||
|
||||
void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
|
||||
@ -1453,8 +1498,12 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc
|
||||
prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
|
||||
prt_newline(out);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++)
|
||||
prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->nr_by_btree[i]);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) {
|
||||
bch2_btree_id_to_text(out, i);
|
||||
prt_printf(out, "\t");
|
||||
prt_human_readable_u64(out, bc->nr_by_btree[i] * c->opts.btree_node_size);
|
||||
prt_printf(out, " (%zu)\n", bc->nr_by_btree[i]);
|
||||
}
|
||||
|
||||
prt_newline(out);
|
||||
prt_printf(out, "freed:\t%zu\n", bc->nr_freed);
|
||||
|
@ -14,7 +14,9 @@ void bch2_recalc_btree_reserve(struct bch_fs *);
|
||||
|
||||
void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *);
|
||||
|
||||
void __bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
|
||||
void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
|
||||
|
||||
int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
|
||||
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
|
||||
unsigned, enum btree_id);
|
||||
@ -126,19 +128,27 @@ static inline struct btree_root *bch2_btree_id_root(struct bch_fs *c, unsigned i
|
||||
} else {
|
||||
unsigned idx = id - BTREE_ID_NR;
|
||||
|
||||
EBUG_ON(idx >= c->btree_roots_extra.nr);
|
||||
/* This can happen when we're called from btree_node_scan */
|
||||
if (idx >= c->btree_roots_extra.nr)
|
||||
return NULL;
|
||||
|
||||
return &c->btree_roots_extra.data[idx];
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
return bch2_btree_id_root(c, b->c.btree_id)->b;
|
||||
struct btree_root *r = bch2_btree_id_root(c, b->c.btree_id);
|
||||
|
||||
return r ? r->b : NULL;
|
||||
}
|
||||
|
||||
const char *bch2_btree_id_str(enum btree_id);
|
||||
const char *bch2_btree_id_str(enum btree_id); /* avoid */
|
||||
void bch2_btree_id_to_text(struct printbuf *, enum btree_id);
|
||||
void bch2_btree_id_level_to_text(struct printbuf *, enum btree_id, unsigned);
|
||||
|
||||
void __bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *,
|
||||
enum btree_id, unsigned, struct bkey_s_c);
|
||||
void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
|
||||
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
|
||||
void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
|
||||
|
@ -27,8 +27,10 @@
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "progress.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "reflink.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
#include "trace.h"
|
||||
@ -56,8 +58,8 @@ void bch2_gc_pos_to_text(struct printbuf *out, struct gc_pos *p)
|
||||
{
|
||||
prt_str(out, bch2_gc_phase_strs[p->phase]);
|
||||
prt_char(out, ' ');
|
||||
bch2_btree_id_to_text(out, p->btree);
|
||||
prt_printf(out, " l=%u ", p->level);
|
||||
bch2_btree_id_level_to_text(out, p->btree, p->level);
|
||||
prt_char(out, ' ');
|
||||
bch2_bpos_to_text(out, p->pos);
|
||||
}
|
||||
|
||||
@ -182,7 +184,7 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
|
||||
bch2_btree_node_drop_keys_outside_node(b);
|
||||
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
__bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
bkey_copy(&b->key, &new->k_i);
|
||||
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
|
||||
@ -209,8 +211,9 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree *
|
||||
if (bpos_eq(expected_start, cur->data->min_key))
|
||||
return 0;
|
||||
|
||||
prt_printf(&buf, " at btree %s level %u:\n parent: ",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level);
|
||||
prt_printf(&buf, " at ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_printf(&buf, ":\nparent: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
if (prev) {
|
||||
@ -277,8 +280,9 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b,
|
||||
if (bpos_eq(child->key.k.p, b->key.k.p))
|
||||
return 0;
|
||||
|
||||
prt_printf(&buf, "at btree %s level %u:\n parent: ",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level);
|
||||
prt_printf(&buf, "\nat: ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_printf(&buf, "\nparent: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
prt_str(&buf, "\nchild: ");
|
||||
@ -341,14 +345,13 @@ again:
|
||||
ret = PTR_ERR_OR_ZERO(cur);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level - 1);
|
||||
prt_char(&buf, ' ');
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
|
||||
|
||||
if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
|
||||
trans, btree_node_unreadable,
|
||||
"Topology repair: unreadable btree node at btree %s level %u:\n"
|
||||
" %s",
|
||||
bch2_btree_id_str(b->c.btree_id),
|
||||
b->c.level - 1,
|
||||
trans, btree_node_read_error,
|
||||
"Topology repair: unreadable btree node at\n%s",
|
||||
buf.buf)) {
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
cur = NULL;
|
||||
@ -357,11 +360,9 @@ again:
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (!btree_id_is_alloc(b->c.btree_id)) {
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
|
||||
ret = bch2_btree_lost_data(c, b->c.btree_id);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -370,7 +371,7 @@ again:
|
||||
break;
|
||||
|
||||
if (bch2_btree_node_is_stale(c, cur)) {
|
||||
bch_info(c, "btree node %s older than nodes found by scanning", buf.buf);
|
||||
bch_info(c, "btree node older than nodes found by scanning\n %s", buf.buf);
|
||||
six_unlock_read(&cur->c.lock);
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
@ -478,14 +479,13 @@ again:
|
||||
}
|
||||
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
if (mustfix_fsck_err_on(!have_child,
|
||||
trans, btree_node_topology_interior_node_empty,
|
||||
"empty interior btree node at btree %s level %u\n"
|
||||
" %s",
|
||||
bch2_btree_id_str(b->c.btree_id),
|
||||
b->c.level, buf.buf))
|
||||
"empty interior btree node at %s", buf.buf))
|
||||
ret = DROP_THIS_NODE;
|
||||
err:
|
||||
fsck_err:
|
||||
@ -511,6 +511,7 @@ int bch2_check_topology(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct bpos pulled_from_scan = POS_MIN;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_srcu_unlock(trans);
|
||||
@ -519,19 +520,22 @@ int bch2_check_topology(struct bch_fs *c)
|
||||
struct btree_root *r = bch2_btree_id_root(c, i);
|
||||
bool reconstructed_root = false;
|
||||
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_id_to_text(&buf, i);
|
||||
|
||||
if (r->error) {
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
|
||||
ret = bch2_btree_lost_data(c, i);
|
||||
if (ret)
|
||||
break;
|
||||
reconstruct_root:
|
||||
bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i));
|
||||
bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
|
||||
|
||||
r->alive = false;
|
||||
r->error = 0;
|
||||
|
||||
if (!bch2_btree_has_scanned_nodes(c, i)) {
|
||||
mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing,
|
||||
"no nodes found for btree %s, continue?", bch2_btree_id_str(i));
|
||||
"no nodes found for btree %s, continue?", buf.buf);
|
||||
bch2_btree_root_alloc_fake_trans(trans, i, 0);
|
||||
} else {
|
||||
bch2_btree_root_alloc_fake_trans(trans, i, 1);
|
||||
@ -560,13 +564,14 @@ reconstruct_root:
|
||||
if (!reconstructed_root)
|
||||
goto reconstruct_root;
|
||||
|
||||
bch_err(c, "empty btree root %s", bch2_btree_id_str(i));
|
||||
bch_err(c, "empty btree root %s", buf.buf);
|
||||
bch2_btree_root_alloc_fake_trans(trans, i, 0);
|
||||
r->alive = false;
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
bch2_trans_put(trans);
|
||||
return ret;
|
||||
}
|
||||
@ -651,7 +656,9 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool initial)
|
||||
static int bch2_gc_btree(struct btree_trans *trans,
|
||||
struct progress_indicator_state *progress,
|
||||
enum btree_id btree, bool initial)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
unsigned target_depth = btree_node_type_has_triggers(__btree_node_type(0, btree)) ? 0 : 1;
|
||||
@ -668,6 +675,7 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in
|
||||
BTREE_ITER_prefetch);
|
||||
|
||||
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
||||
bch2_progress_update_iter(trans, progress, &iter, "check_allocations");
|
||||
gc_pos_set(c, gc_pos_btree(btree, level, k.k->p));
|
||||
bch2_gc_mark_key(trans, btree, level, &prev, &iter, k, initial);
|
||||
}));
|
||||
@ -683,7 +691,7 @@ retry_root:
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN,
|
||||
0, bch2_btree_id_root(c, btree)->b->c.level, 0);
|
||||
struct btree *b = bch2_btree_iter_peek_node(&iter);
|
||||
struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
|
||||
ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
goto err_root;
|
||||
@ -712,29 +720,27 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
|
||||
static int bch2_gc_btrees(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
enum btree_id ids[BTREE_ID_NR];
|
||||
unsigned i;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
struct progress_indicator_state progress;
|
||||
bch2_progress_init(&progress, c, ~0ULL);
|
||||
|
||||
enum btree_id ids[BTREE_ID_NR];
|
||||
for (unsigned i = 0; i < BTREE_ID_NR; i++)
|
||||
ids[i] = i;
|
||||
bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
|
||||
|
||||
for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
||||
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
||||
unsigned btree = i < BTREE_ID_NR ? ids[i] : i;
|
||||
|
||||
if (IS_ERR_OR_NULL(bch2_btree_id_root(c, btree)->b))
|
||||
continue;
|
||||
|
||||
ret = bch2_gc_btree(trans, btree, true);
|
||||
|
||||
if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
|
||||
trans, btree_node_read_error,
|
||||
"btree node read error for %s",
|
||||
bch2_btree_id_str(btree)))
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
|
||||
ret = bch2_gc_btree(trans, &progress, btree, true);
|
||||
}
|
||||
fsck_err:
|
||||
|
||||
printbuf_exit(&buf);
|
||||
bch2_trans_put(trans);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
@ -802,7 +808,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
old = bch2_alloc_to_v4(k, &old_convert);
|
||||
gc = new = *old;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
__bucket_m_to_alloc(&gc, *gc_bucket(ca, iter->pos.offset));
|
||||
|
||||
old_gc = gc;
|
||||
@ -813,19 +818,26 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
gc.data_type = old->data_type;
|
||||
gc.dirty_sectors = old->dirty_sectors;
|
||||
}
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
/*
|
||||
* gc.data_type doesn't yet include need_discard & need_gc_gen states -
|
||||
* fix that here:
|
||||
*/
|
||||
alloc_data_type_set(&gc, gc.data_type);
|
||||
|
||||
if (gc.data_type != old_gc.data_type ||
|
||||
gc.dirty_sectors != old_gc.dirty_sectors) {
|
||||
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Ugly: alloc_key_to_dev_counters(..., BTREE_TRIGGER_gc) is not
|
||||
* safe w.r.t. transaction restarts, so fixup the gc_bucket so
|
||||
* we don't run it twice:
|
||||
*/
|
||||
struct bucket *gc_m = gc_bucket(ca, iter->pos.offset);
|
||||
gc_m->data_type = gc.data_type;
|
||||
gc_m->dirty_sectors = gc.dirty_sectors;
|
||||
}
|
||||
|
||||
if (fsck_err_on(new.data_type != gc.data_type,
|
||||
@ -885,11 +897,11 @@ static int bch2_gc_alloc_done(struct bch_fs *c)
|
||||
|
||||
for_each_member_device(c, ca) {
|
||||
ret = bch2_trans_run(c,
|
||||
for_each_btree_key_upto_commit(trans, iter, BTREE_ID_alloc,
|
||||
for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc,
|
||||
POS(ca->dev_idx, ca->mi.first_bucket),
|
||||
POS(ca->dev_idx, ca->mi.nbuckets - 1),
|
||||
BTREE_ITER_slots|BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_alloc_write_key(trans, &iter, ca, k)));
|
||||
if (ret) {
|
||||
bch2_dev_put(ca);
|
||||
@ -918,98 +930,6 @@ static int bch2_gc_alloc_start(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_write_reflink_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
size_t *idx)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct reflink_gc *r;
|
||||
int ret = 0;
|
||||
|
||||
if (!refcount)
|
||||
return 0;
|
||||
|
||||
while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) &&
|
||||
r->offset < k.k->p.offset)
|
||||
++*idx;
|
||||
|
||||
if (!r ||
|
||||
r->offset != k.k->p.offset ||
|
||||
r->size != k.k->size) {
|
||||
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount),
|
||||
trans, reflink_v_refcount_wrong,
|
||||
"reflink key has wrong refcount:\n"
|
||||
" %s\n"
|
||||
" should be %u",
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf),
|
||||
r->refcount)) {
|
||||
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
|
||||
ret = PTR_ERR_OR_ZERO(new);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (!r->refcount)
|
||||
new->k.type = KEY_TYPE_deleted;
|
||||
else
|
||||
*bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount);
|
||||
ret = bch2_trans_update(trans, iter, new, 0);
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_done(struct bch_fs *c)
|
||||
{
|
||||
size_t idx = 0;
|
||||
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_gc_write_reflink_key(trans, &iter, k, &idx)));
|
||||
c->reflink_gc_nr = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_start(struct bch_fs *c)
|
||||
{
|
||||
c->reflink_gc_nr = 0;
|
||||
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_prefetch, k, ({
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
|
||||
if (!refcount)
|
||||
continue;
|
||||
|
||||
struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table,
|
||||
c->reflink_gc_nr++, GFP_KERNEL);
|
||||
if (!r) {
|
||||
ret = -BCH_ERR_ENOMEM_gc_reflink_start;
|
||||
break;
|
||||
}
|
||||
|
||||
r->offset = k.k->p.offset;
|
||||
r->size = k.k->size;
|
||||
r->refcount = 0;
|
||||
0;
|
||||
})));
|
||||
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_write_stripes_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k)
|
||||
@ -1100,8 +1020,7 @@ int bch2_check_allocations(struct bch_fs *c)
|
||||
{
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
down_read(&c->state_lock);
|
||||
down_write(&c->gc_lock);
|
||||
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
@ -1139,6 +1058,7 @@ out:
|
||||
percpu_up_write(&c->mark_lock);
|
||||
|
||||
up_write(&c->gc_lock);
|
||||
up_read(&c->state_lock);
|
||||
|
||||
/*
|
||||
* At startup, allocations can happen directly instead of via the
|
||||
@ -1161,7 +1081,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
|
||||
if (unlikely(test_bit(BCH_FS_going_ro, &c->flags)))
|
||||
return -EROFS;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
rcu_read_lock();
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
|
||||
@ -1170,7 +1089,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
|
||||
|
||||
if (dev_ptr_stale(ca, ptr) > 16) {
|
||||
rcu_read_unlock();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
goto update;
|
||||
}
|
||||
}
|
||||
@ -1185,7 +1103,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
|
||||
*gen = ptr->gen;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
return 0;
|
||||
update:
|
||||
u = bch2_bkey_make_mut(trans, iter, &k, 0);
|
||||
@ -1214,7 +1131,6 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev
|
||||
return ret;
|
||||
|
||||
a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset];
|
||||
alloc_data_type_set(&a_mut->v, a_mut->v.data_type);
|
||||
|
||||
return bch2_trans_update(trans, iter, &a_mut->k_i, 0);
|
||||
}
|
||||
@ -1283,7 +1199,7 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
BCH_TRANS_COMMIT_no_enospc, ({
|
||||
ca = bch2_dev_iterate(c, ca, k.k->p.inode);
|
||||
if (!ca) {
|
||||
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
|
||||
bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
|
||||
continue;
|
||||
}
|
||||
bch2_alloc_write_oldest_gen(trans, ca, &iter, k);
|
||||
@ -1327,9 +1243,11 @@ void bch2_gc_gens_async(struct bch_fs *c)
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
|
||||
}
|
||||
|
||||
void bch2_fs_gc_init(struct bch_fs *c)
|
||||
void bch2_fs_btree_gc_init_early(struct bch_fs *c)
|
||||
{
|
||||
seqcount_init(&c->gc_pos_lock);
|
||||
|
||||
INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work);
|
||||
|
||||
init_rwsem(&c->gc_lock);
|
||||
mutex_init(&c->gc_gens_lock);
|
||||
}
|
||||
|
@ -82,6 +82,7 @@ void bch2_gc_pos_to_text(struct printbuf *, struct gc_pos *);
|
||||
|
||||
int bch2_gc_gens(struct bch_fs *);
|
||||
void bch2_gc_gens_async(struct bch_fs *);
|
||||
void bch2_fs_gc_init(struct bch_fs *);
|
||||
|
||||
void bch2_fs_btree_gc_init_early(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_GC_H */
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "bkey_methods.h"
|
||||
#include "bkey_sort.h"
|
||||
#include "btree_cache.h"
|
||||
@ -25,9 +26,8 @@
|
||||
|
||||
static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn)
|
||||
{
|
||||
prt_printf(out, "btree=%s l=%u seq %llux\n",
|
||||
bch2_btree_id_str(BTREE_NODE_ID(bn)),
|
||||
(unsigned) BTREE_NODE_LEVEL(bn), bn->keys.seq);
|
||||
bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn));
|
||||
prt_printf(out, " seq %llx %llu\n", bn->keys.seq, BTREE_NODE_SEQ(bn));
|
||||
prt_str(out, "min: ");
|
||||
bch2_bpos_to_text(out, bn->min_key);
|
||||
prt_newline(out);
|
||||
@ -490,7 +490,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
|
||||
if (b->nsets == MAX_BSETS &&
|
||||
!btree_node_write_in_flight(b) &&
|
||||
should_compact_all(c, b)) {
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_write,
|
||||
bch2_btree_node_write_trans(trans, b, SIX_LOCK_write,
|
||||
BTREE_WRITE_init_next_bset);
|
||||
reinit_iter = true;
|
||||
}
|
||||
@ -525,8 +525,6 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, "at btree ");
|
||||
bch2_btree_pos_to_text(out, c, b);
|
||||
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_printf(out, "\nnode offset %u/%u",
|
||||
b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)));
|
||||
if (i)
|
||||
@ -550,23 +548,7 @@ static int __btree_err(int ret,
|
||||
enum bch_sb_error_id err_type,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct printbuf out = PRINTBUF;
|
||||
bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes;
|
||||
va_list args;
|
||||
|
||||
btree_err_msg(&out, c, ca, b, i, k, b->written, write);
|
||||
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(&out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (write == WRITE) {
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = c->opts.errors == BCH_ON_ERROR_continue
|
||||
? 0
|
||||
: -BCH_ERR_fsck_errors_not_fixed;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
|
||||
ret = -BCH_ERR_btree_node_read_err_fixable;
|
||||
@ -576,6 +558,29 @@ static int __btree_err(int ret,
|
||||
if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable)
|
||||
bch2_sb_error_count(c, err_type);
|
||||
|
||||
struct printbuf out = PRINTBUF;
|
||||
if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) {
|
||||
printbuf_indent_add_nextline(&out, 2);
|
||||
#ifdef BCACHEFS_LOG_PREFIX
|
||||
prt_printf(&out, bch2_log_msg(c, ""));
|
||||
#endif
|
||||
}
|
||||
|
||||
btree_err_msg(&out, c, ca, b, i, k, b->written, write);
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(&out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (write == WRITE) {
|
||||
prt_str(&out, ", ");
|
||||
ret = __bch2_inconsistent_error(c, &out)
|
||||
? -BCH_ERR_fsck_errors_not_fixed
|
||||
: 0;
|
||||
silent = false;
|
||||
}
|
||||
|
||||
switch (ret) {
|
||||
case -BCH_ERR_btree_node_read_err_fixable:
|
||||
ret = !silent
|
||||
@ -585,25 +590,21 @@ static int __btree_err(int ret,
|
||||
ret != -BCH_ERR_fsck_ignore)
|
||||
goto fsck_err;
|
||||
ret = -BCH_ERR_fsck_fix;
|
||||
break;
|
||||
case -BCH_ERR_btree_node_read_err_want_retry:
|
||||
case -BCH_ERR_btree_node_read_err_must_retry:
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
break;
|
||||
goto out;
|
||||
case -BCH_ERR_btree_node_read_err_bad_node:
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = bch2_topology_error(c);
|
||||
prt_str(&out, ", ");
|
||||
ret = __bch2_topology_error(c, &out);
|
||||
if (ret)
|
||||
silent = false;
|
||||
break;
|
||||
case -BCH_ERR_btree_node_read_err_incompatible:
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed;
|
||||
silent = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&out);
|
||||
@ -733,11 +734,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
c, ca, b, i, NULL,
|
||||
bset_past_end_of_btree_node,
|
||||
"bset past end of btree node (offset %u len %u but written %zu)",
|
||||
offset, sectors, ptr_written ?: btree_sectors(c))) {
|
||||
offset, sectors, ptr_written ?: btree_sectors(c)))
|
||||
i->u64s = 0;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
btree_err_on(offset && !i->u64s,
|
||||
-BCH_ERR_btree_node_read_err_fixable,
|
||||
@ -829,20 +827,38 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
BSET_BIG_ENDIAN(i), write,
|
||||
&bn->format);
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf2);
|
||||
printbuf_exit(&buf1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btree_node_bkey_val_validate(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
{
|
||||
return bch2_bkey_val_validate(c, k, (struct bkey_validate_context) {
|
||||
.from = BKEY_VALIDATE_btree_node,
|
||||
.level = b->c.level,
|
||||
.btree = b->c.btree_id,
|
||||
.flags = flags
|
||||
});
|
||||
}
|
||||
|
||||
static int bset_key_validate(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_s_c k,
|
||||
bool updated_range, int rw)
|
||||
bool updated_range,
|
||||
enum bch_validate_flags flags)
|
||||
{
|
||||
return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?:
|
||||
(!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?:
|
||||
(rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0);
|
||||
struct bkey_validate_context from = (struct bkey_validate_context) {
|
||||
.from = BKEY_VALIDATE_btree_node,
|
||||
.level = b->c.level,
|
||||
.btree = b->c.btree_id,
|
||||
.flags = flags,
|
||||
};
|
||||
return __bch2_bkey_validate(c, k, from) ?:
|
||||
(!updated_range ? bch2_bkey_in_btree_node(c, b, k, from) : 0) ?:
|
||||
(flags & BCH_VALIDATE_write ? btree_node_bkey_val_validate(c, b, k, flags) : 0);
|
||||
}
|
||||
|
||||
static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
|
||||
@ -859,7 +875,21 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
|
||||
|
||||
struct bkey tmp;
|
||||
struct bkey_s u = __bkey_disassemble(b, k, &tmp);
|
||||
return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent);
|
||||
return !__bch2_bkey_validate(c, u.s_c,
|
||||
(struct bkey_validate_context) {
|
||||
.from = BKEY_VALIDATE_btree_node,
|
||||
.level = b->c.level,
|
||||
.btree = b->c.btree_id,
|
||||
.flags = BCH_VALIDATE_silent
|
||||
});
|
||||
}
|
||||
|
||||
static inline int btree_node_read_bkey_cmp(const struct btree *b,
|
||||
const struct bkey_packed *l,
|
||||
const struct bkey_packed *r)
|
||||
{
|
||||
return bch2_bkey_cmp_packed(b, l, r)
|
||||
?: (int) bkey_deleted(r) - (int) bkey_deleted(l);
|
||||
}
|
||||
|
||||
static int validate_bset_keys(struct bch_fs *c, struct btree *b,
|
||||
@ -922,7 +952,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
|
||||
BSET_BIG_ENDIAN(i), write,
|
||||
&b->format, k);
|
||||
|
||||
if (prev && bkey_iter_cmp(b, prev, k) > 0) {
|
||||
if (prev && btree_node_read_bkey_cmp(b, prev, k) >= 0) {
|
||||
struct bkey up = bkey_unpack_key(b, prev);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
@ -968,7 +998,8 @@ drop_this_key:
|
||||
}
|
||||
got_good_key:
|
||||
le16_add_cpu(&i->u64s, -next_good_key);
|
||||
memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k);
|
||||
memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k);
|
||||
set_btree_node_need_rewrite(b);
|
||||
}
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
@ -1042,23 +1073,34 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
while (b->written < (ptr_written ?: btree_sectors(c))) {
|
||||
unsigned sectors;
|
||||
struct nonce nonce;
|
||||
bool first = !b->written;
|
||||
bool csum_bad;
|
||||
|
||||
if (!b->written) {
|
||||
if (first) {
|
||||
bne = NULL;
|
||||
i = &b->data->keys;
|
||||
} else {
|
||||
bne = write_block(b);
|
||||
i = &bne->keys;
|
||||
|
||||
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
|
||||
-BCH_ERR_btree_node_read_err_want_retry,
|
||||
if (i->seq != b->data->keys.seq)
|
||||
break;
|
||||
}
|
||||
|
||||
struct nonce nonce = btree_nonce(i, b->written << 9);
|
||||
bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i));
|
||||
|
||||
btree_err_on(!good_csum_type,
|
||||
bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i))
|
||||
? -BCH_ERR_btree_node_read_err_must_retry
|
||||
: -BCH_ERR_btree_node_read_err_want_retry,
|
||||
c, ca, b, i, NULL,
|
||||
bset_unknown_csum,
|
||||
"unknown checksum type %llu", BSET_CSUM_TYPE(i));
|
||||
|
||||
nonce = btree_nonce(i, b->written << 9);
|
||||
|
||||
if (first) {
|
||||
if (good_csum_type) {
|
||||
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
|
||||
csum_bad = bch2_crc_cmp(b->data->csum, csum);
|
||||
bool csum_bad = bch2_crc_cmp(b->data->csum, csum);
|
||||
if (csum_bad)
|
||||
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
|
||||
|
||||
@ -1075,6 +1117,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"decrypting btree node: %s", bch2_err_str(ret)))
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
|
||||
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
|
||||
@ -1085,21 +1128,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
sectors = vstruct_sectors(b->data, c->block_bits);
|
||||
} else {
|
||||
bne = write_block(b);
|
||||
i = &bne->keys;
|
||||
|
||||
if (i->seq != b->data->keys.seq)
|
||||
break;
|
||||
|
||||
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
|
||||
-BCH_ERR_btree_node_read_err_want_retry,
|
||||
c, ca, b, i, NULL,
|
||||
bset_unknown_csum,
|
||||
"unknown checksum type %llu", BSET_CSUM_TYPE(i));
|
||||
|
||||
nonce = btree_nonce(i, b->written << 9);
|
||||
if (good_csum_type) {
|
||||
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
|
||||
csum_bad = bch2_crc_cmp(bne->csum, csum);
|
||||
bool csum_bad = bch2_crc_cmp(bne->csum, csum);
|
||||
if (ca && csum_bad)
|
||||
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
|
||||
|
||||
@ -1116,6 +1147,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"decrypting btree node: %s", bch2_err_str(ret)))
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
sectors = vstruct_sectors(bne, c->block_bits);
|
||||
}
|
||||
@ -1156,7 +1188,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
le64_to_cpu(i->journal_seq),
|
||||
b->written, b->written + sectors, ptr_written);
|
||||
|
||||
b->written += sectors;
|
||||
b->written = min(b->written + sectors, btree_sectors(c));
|
||||
|
||||
if (blacklisted && !first)
|
||||
continue;
|
||||
@ -1220,7 +1252,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bkey tmp;
|
||||
struct bkey_s u = __bkey_disassemble(b, k, &tmp);
|
||||
|
||||
ret = bch2_bkey_val_validate(c, u.s_c, READ);
|
||||
ret = btree_node_bkey_val_validate(c, b, u.s_c, READ);
|
||||
if (ret == -BCH_ERR_fsck_delete_bkey ||
|
||||
(bch2_inject_invalid_keys &&
|
||||
!bversion_cmp(u.k->bversion, MAX_VERSION))) {
|
||||
@ -1230,6 +1262,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
memmove_u64s_down(k, bkey_p_next(k),
|
||||
(u64 *) vstruct_end(i) - (u64 *) k);
|
||||
set_btree_bset_end(b, b->set);
|
||||
set_btree_node_need_rewrite(b);
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
@ -1297,6 +1330,7 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
bch_info(c, "retrying read");
|
||||
ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ);
|
||||
rb->have_ioref = ca != NULL;
|
||||
rb->start_time = local_clock();
|
||||
bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
|
||||
bio->bi_iter.bi_sector = rb->pick.ptr.offset;
|
||||
bio->bi_iter.bi_size = btree_buf_bytes(b);
|
||||
@ -1307,21 +1341,26 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
} else {
|
||||
bio->bi_status = BLK_STS_REMOVED;
|
||||
}
|
||||
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
|
||||
rb->start_time, !bio->bi_status);
|
||||
start:
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch2_dev_io_err_on(ca && bio->bi_status, ca, BCH_MEMBER_ERROR_read,
|
||||
|
||||
if (ca && bio->bi_status)
|
||||
bch_err_dev_ratelimited(ca,
|
||||
"btree read error %s for %s",
|
||||
bch2_blk_status_to_str(bio->bi_status), buf.buf);
|
||||
if (rb->have_ioref)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
rb->have_ioref = false;
|
||||
|
||||
bch2_mark_io_failure(&failed, &rb->pick);
|
||||
bch2_mark_io_failure(&failed, &rb->pick, false);
|
||||
|
||||
can_retry = bch2_bkey_pick_read_device(c,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
&failed, &rb->pick) > 0;
|
||||
&failed, &rb->pick, -1) > 0;
|
||||
|
||||
if (!bio->bi_status &&
|
||||
!bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) {
|
||||
@ -1343,13 +1382,18 @@ start:
|
||||
rb->start_time);
|
||||
bio_put(&rb->bio);
|
||||
|
||||
if (saw_error &&
|
||||
if ((saw_error ||
|
||||
btree_node_need_rewrite(b)) &&
|
||||
!btree_node_read_error(b) &&
|
||||
c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
|
||||
if (saw_error) {
|
||||
printbuf_reset(&buf);
|
||||
bch2_bpos_to_text(&buf, b->key.k.p);
|
||||
bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
|
||||
__func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf);
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_str(&buf, " ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s",
|
||||
__func__, buf.buf);
|
||||
}
|
||||
|
||||
bch2_btree_node_rewrite_async(c, b);
|
||||
}
|
||||
@ -1364,12 +1408,11 @@ static void btree_node_read_endio(struct bio *bio)
|
||||
struct btree_read_bio *rb =
|
||||
container_of(bio, struct btree_read_bio, bio);
|
||||
struct bch_fs *c = rb->c;
|
||||
struct bch_dev *ca = rb->have_ioref
|
||||
? bch2_dev_have_ref(c, rb->pick.ptr.dev) : NULL;
|
||||
|
||||
if (rb->have_ioref) {
|
||||
struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev);
|
||||
|
||||
bch2_latency_acct(ca, rb->start_time, READ);
|
||||
}
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
|
||||
rb->start_time, !bio->bi_status);
|
||||
|
||||
queue_work(c->btree_read_complete_wq, &rb->work);
|
||||
}
|
||||
@ -1566,6 +1609,7 @@ static void btree_node_read_all_replicas_endio(struct bio *bio)
|
||||
struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev);
|
||||
|
||||
bch2_latency_acct(ca, rb->start_time, READ);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
}
|
||||
|
||||
ra->err[rb->idx] = bio->bi_status;
|
||||
@ -1661,7 +1705,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
|
||||
return;
|
||||
|
||||
ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
|
||||
NULL, &pick);
|
||||
NULL, &pick, -1);
|
||||
|
||||
if (ret <= 0) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
@ -1775,6 +1819,191 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
|
||||
return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
|
||||
}
|
||||
|
||||
struct btree_node_scrub {
|
||||
struct bch_fs *c;
|
||||
struct bch_dev *ca;
|
||||
void *buf;
|
||||
bool used_mempool;
|
||||
unsigned written;
|
||||
|
||||
enum btree_id btree;
|
||||
unsigned level;
|
||||
struct bkey_buf key;
|
||||
__le64 seq;
|
||||
|
||||
struct work_struct work;
|
||||
struct bio bio;
|
||||
};
|
||||
|
||||
static bool btree_node_scrub_check(struct bch_fs *c, struct btree_node *data, unsigned ptr_written,
|
||||
struct printbuf *err)
|
||||
{
|
||||
unsigned written = 0;
|
||||
|
||||
if (le64_to_cpu(data->magic) != bset_magic(c)) {
|
||||
prt_printf(err, "bad magic: want %llx, got %llx",
|
||||
bset_magic(c), le64_to_cpu(data->magic));
|
||||
return false;
|
||||
}
|
||||
|
||||
while (written < (ptr_written ?: btree_sectors(c))) {
|
||||
struct btree_node_entry *bne;
|
||||
struct bset *i;
|
||||
bool first = !written;
|
||||
|
||||
if (first) {
|
||||
bne = NULL;
|
||||
i = &data->keys;
|
||||
} else {
|
||||
bne = (void *) data + (written << 9);
|
||||
i = &bne->keys;
|
||||
|
||||
if (!ptr_written && i->seq != data->keys.seq)
|
||||
break;
|
||||
}
|
||||
|
||||
struct nonce nonce = btree_nonce(i, written << 9);
|
||||
bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i));
|
||||
|
||||
if (first) {
|
||||
if (good_csum_type) {
|
||||
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, data);
|
||||
if (bch2_crc_cmp(data->csum, csum)) {
|
||||
bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), data->csum, csum);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
written += vstruct_sectors(data, c->block_bits);
|
||||
} else {
|
||||
if (good_csum_type) {
|
||||
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
|
||||
if (bch2_crc_cmp(bne->csum, csum)) {
|
||||
bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), bne->csum, csum);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
written += vstruct_sectors(bne, c->block_bits);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void btree_node_scrub_work(struct work_struct *work)
|
||||
{
|
||||
struct btree_node_scrub *scrub = container_of(work, struct btree_node_scrub, work);
|
||||
struct bch_fs *c = scrub->c;
|
||||
struct printbuf err = PRINTBUF;
|
||||
|
||||
__bch2_btree_pos_to_text(&err, c, scrub->btree, scrub->level,
|
||||
bkey_i_to_s_c(scrub->key.k));
|
||||
prt_newline(&err);
|
||||
|
||||
if (!btree_node_scrub_check(c, scrub->buf, scrub->written, &err)) {
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter, scrub->btree,
|
||||
scrub->key.k->k.p, 0, scrub->level - 1, 0);
|
||||
|
||||
struct btree *b;
|
||||
int ret = lockrestart_do(trans,
|
||||
PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(trans, &iter)));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (bkey_i_to_btree_ptr_v2(&b->key)->v.seq == scrub->seq) {
|
||||
bch_err(c, "error validating btree node during scrub on %s at btree %s",
|
||||
scrub->ca->name, err.buf);
|
||||
|
||||
ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
bch2_trans_begin(trans);
|
||||
bch2_trans_put(trans);
|
||||
}
|
||||
|
||||
printbuf_exit(&err);
|
||||
bch2_bkey_buf_exit(&scrub->key, c);;
|
||||
btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf);
|
||||
percpu_ref_put(&scrub->ca->io_ref[READ]);
|
||||
kfree(scrub);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
|
||||
}
|
||||
|
||||
static void btree_node_scrub_endio(struct bio *bio)
|
||||
{
|
||||
struct btree_node_scrub *scrub = container_of(bio, struct btree_node_scrub, bio);
|
||||
|
||||
queue_work(scrub->c->btree_read_complete_wq, &scrub->work);
|
||||
}
|
||||
|
||||
int bch2_btree_node_scrub(struct btree_trans *trans,
|
||||
enum btree_id btree, unsigned level,
|
||||
struct bkey_s_c k, unsigned dev)
|
||||
{
|
||||
if (k.k->type != KEY_TYPE_btree_ptr_v2)
|
||||
return 0;
|
||||
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_node_scrub))
|
||||
return -BCH_ERR_erofs_no_writes;
|
||||
|
||||
struct extent_ptr_decoded pick;
|
||||
int ret = bch2_bkey_pick_read_device(c, k, NULL, &pick, dev);
|
||||
if (ret <= 0)
|
||||
goto err;
|
||||
|
||||
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
|
||||
if (!ca) {
|
||||
ret = -BCH_ERR_device_offline;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bool used_mempool = false;
|
||||
void *buf = btree_bounce_alloc(c, c->opts.btree_node_size, &used_mempool);
|
||||
|
||||
unsigned vecs = buf_pages(buf, c->opts.btree_node_size);
|
||||
|
||||
struct btree_node_scrub *scrub =
|
||||
kzalloc(sizeof(*scrub) + sizeof(struct bio_vec) * vecs, GFP_KERNEL);
|
||||
if (!scrub) {
|
||||
ret = -ENOMEM;
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
scrub->c = c;
|
||||
scrub->ca = ca;
|
||||
scrub->buf = buf;
|
||||
scrub->used_mempool = used_mempool;
|
||||
scrub->written = btree_ptr_sectors_written(k);
|
||||
|
||||
scrub->btree = btree;
|
||||
scrub->level = level;
|
||||
bch2_bkey_buf_init(&scrub->key);
|
||||
bch2_bkey_buf_reassemble(&scrub->key, c, k);
|
||||
scrub->seq = bkey_s_c_to_btree_ptr_v2(k).v->seq;
|
||||
|
||||
INIT_WORK(&scrub->work, btree_node_scrub_work);
|
||||
|
||||
bio_init(&scrub->bio, ca->disk_sb.bdev, scrub->bio.bi_inline_vecs, vecs, REQ_OP_READ);
|
||||
bch2_bio_map(&scrub->bio, scrub->buf, c->opts.btree_node_size);
|
||||
scrub->bio.bi_iter.bi_sector = pick.ptr.offset;
|
||||
scrub->bio.bi_end_io = btree_node_scrub_endio;
|
||||
submit_bio(&scrub->bio);
|
||||
return 0;
|
||||
err_free:
|
||||
btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
err:
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
|
||||
struct btree_write *w)
|
||||
{
|
||||
@ -1795,7 +2024,7 @@ static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
|
||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||
}
|
||||
|
||||
static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
static void __btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time)
|
||||
{
|
||||
struct btree_write *w = btree_prev_write(b);
|
||||
unsigned long old, new;
|
||||
@ -1803,6 +2032,9 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
|
||||
bch2_btree_complete_write(c, b, w);
|
||||
|
||||
if (start_time)
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_write], start_time);
|
||||
|
||||
old = READ_ONCE(b->flags);
|
||||
do {
|
||||
new = old;
|
||||
@ -1833,7 +2065,7 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
|
||||
}
|
||||
|
||||
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
@ -1841,7 +2073,7 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
|
||||
/* we don't need transaction context anymore after we got the lock. */
|
||||
bch2_trans_put(trans);
|
||||
__btree_node_write_done(c, b);
|
||||
__btree_node_write_done(c, b, start_time);
|
||||
six_unlock_read(&b->c.lock);
|
||||
}
|
||||
|
||||
@ -1851,6 +2083,7 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
container_of(work, struct btree_write_bio, work);
|
||||
struct bch_fs *c = wbio->wbio.c;
|
||||
struct btree *b = wbio->wbio.bio.bi_private;
|
||||
u64 start_time = wbio->start_time;
|
||||
int ret = 0;
|
||||
|
||||
btree_bounce_free(c,
|
||||
@ -1871,7 +2104,7 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
|
||||
}
|
||||
} else {
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
ret = bch2_trans_do(c,
|
||||
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
|
||||
BCH_WATERMARK_interior_updates|
|
||||
BCH_TRANS_COMMIT_journal_reclaim|
|
||||
@ -1883,12 +2116,18 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
}
|
||||
out:
|
||||
bio_put(&wbio->wbio.bio);
|
||||
btree_node_write_done(c, b);
|
||||
btree_node_write_done(c, b, start_time);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c,
|
||||
"writing btree node: %s", bch2_err_str(ret));
|
||||
|
||||
if (!bch2_err_matches(ret, EROFS)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret));
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch2_fs_fatal_error(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1901,23 +2140,33 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
struct bch_fs *c = wbio->c;
|
||||
struct btree *b = wbio->bio.bi_private;
|
||||
struct bch_dev *ca = wbio->have_ioref ? bch2_dev_have_ref(c, wbio->dev) : NULL;
|
||||
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_write,
|
||||
wbio->submit_time, !bio->bi_status);
|
||||
|
||||
if (ca && bio->bi_status) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
buf.atomic++;
|
||||
prt_printf(&buf, "btree write error: %s\n ",
|
||||
bch2_blk_status_to_str(bio->bi_status));
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch_err_dev_ratelimited(ca, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
if (bio->bi_status) {
|
||||
unsigned long flags;
|
||||
|
||||
if (wbio->have_ioref)
|
||||
bch2_latency_acct(ca, wbio->submit_time, WRITE);
|
||||
|
||||
if (!ca ||
|
||||
bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
|
||||
"btree write error: %s",
|
||||
bch2_blk_status_to_str(bio->bi_status)) ||
|
||||
bch2_meta_write_fault("btree")) {
|
||||
spin_lock_irqsave(&c->btree_write_error_lock, flags);
|
||||
bch2_dev_list_add_dev(&orig->failed, wbio->dev);
|
||||
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: we should be using io_ref[WRITE], but we aren't retrying failed
|
||||
* btree writes yet (due to device removal/ro):
|
||||
*/
|
||||
if (wbio->have_ioref)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
|
||||
if (parent) {
|
||||
bio_put(bio);
|
||||
@ -1928,7 +2177,7 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
clear_btree_node_write_in_flight_inner(b);
|
||||
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner);
|
||||
INIT_WORK(&wb->work, btree_node_write_work);
|
||||
queue_work(c->btree_io_complete_wq, &wb->work);
|
||||
queue_work(c->btree_write_complete_wq, &wb->work);
|
||||
}
|
||||
|
||||
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
@ -1937,7 +2186,12 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
bool saw_error;
|
||||
|
||||
int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key),
|
||||
BKEY_TYPE_btree, WRITE);
|
||||
(struct bkey_validate_context) {
|
||||
.from = BKEY_VALIDATE_btree_node,
|
||||
.level = b->c.level + 1,
|
||||
.btree = b->c.btree_id,
|
||||
.flags = BCH_VALIDATE_write,
|
||||
});
|
||||
if (ret) {
|
||||
bch2_fs_inconsistent(c, "invalid btree node key before write");
|
||||
return ret;
|
||||
@ -1982,6 +2236,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
||||
bool validate_before_checksum = false;
|
||||
enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK;
|
||||
void *data;
|
||||
u64 start_time = local_clock();
|
||||
int ret;
|
||||
|
||||
if (flags & BTREE_WRITE_ALREADY_STARTED)
|
||||
@ -2190,6 +2445,7 @@ do_write:
|
||||
wbio->data = data;
|
||||
wbio->data_bytes = bytes;
|
||||
wbio->sector_offset = b->written;
|
||||
wbio->start_time = start_time;
|
||||
wbio->wbio.c = c;
|
||||
wbio->wbio.used_mempool = used_mempool;
|
||||
wbio->wbio.first_btree_write = !b->written;
|
||||
@ -2217,7 +2473,7 @@ err:
|
||||
b->written += sectors_to_write;
|
||||
nowrite:
|
||||
btree_bounce_free(c, bytes, used_mempool, data);
|
||||
__btree_node_write_done(c, b);
|
||||
__btree_node_write_done(c, b, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2304,6 +2560,34 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_btree_node_write_trans(struct btree_trans *trans, struct btree *b,
|
||||
enum six_lock_type lock_type_held,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
if (lock_type_held == SIX_LOCK_intent ||
|
||||
(lock_type_held == SIX_LOCK_read &&
|
||||
six_lock_tryupgrade(&b->c.lock))) {
|
||||
__bch2_btree_node_write(c, b, flags);
|
||||
|
||||
/* don't cycle lock unnecessarily: */
|
||||
if (btree_node_just_written(b) &&
|
||||
six_trylock_write(&b->c.lock)) {
|
||||
bch2_btree_post_write_cleanup(c, b);
|
||||
__bch2_btree_node_unlock_write(trans, b);
|
||||
}
|
||||
|
||||
if (lock_type_held == SIX_LOCK_read)
|
||||
six_lock_downgrade(&b->c.lock);
|
||||
} else {
|
||||
__bch2_btree_node_write(c, b, flags);
|
||||
if (lock_type_held == SIX_LOCK_write &&
|
||||
btree_node_just_written(b))
|
||||
bch2_btree_post_write_cleanup(c, b);
|
||||
}
|
||||
}
|
||||
|
||||
static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
|
||||
{
|
||||
struct bucket_table *tbl;
|
||||
|
@ -52,6 +52,7 @@ struct btree_write_bio {
|
||||
void *data;
|
||||
unsigned data_bytes;
|
||||
unsigned sector_offset;
|
||||
u64 start_time;
|
||||
struct bch_write_bio wbio;
|
||||
};
|
||||
|
||||
@ -132,6 +133,9 @@ void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
|
||||
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
|
||||
const struct bkey_i *, unsigned);
|
||||
|
||||
int bch2_btree_node_scrub(struct btree_trans *, enum btree_id, unsigned,
|
||||
struct bkey_s_c, unsigned);
|
||||
|
||||
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
|
||||
|
||||
enum btree_write_flags {
|
||||
@ -144,11 +148,13 @@ enum btree_write_flags {
|
||||
void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned);
|
||||
void bch2_btree_node_write(struct bch_fs *, struct btree *,
|
||||
enum six_lock_type, unsigned);
|
||||
void bch2_btree_node_write_trans(struct btree_trans *, struct btree *,
|
||||
enum six_lock_type, unsigned);
|
||||
|
||||
static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
|
||||
static inline void btree_node_write_if_need(struct btree_trans *trans, struct btree *b,
|
||||
enum six_lock_type lock_held)
|
||||
{
|
||||
bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
|
||||
bch2_btree_node_write_trans(trans, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
|
||||
}
|
||||
|
||||
bool bch2_btree_flush_all_reads(struct bch_fs *);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -9,7 +9,6 @@
|
||||
void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
|
||||
void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t);
|
||||
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
|
||||
void bch2_dump_trans_updates(struct btree_trans *);
|
||||
void bch2_dump_trans_paths_updates(struct btree_trans *);
|
||||
|
||||
static inline int __bkey_err(const struct bkey *k)
|
||||
@ -23,6 +22,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path
|
||||
{
|
||||
unsigned idx = path - trans->paths;
|
||||
|
||||
EBUG_ON(idx >= trans->nr_paths);
|
||||
EBUG_ON(!test_bit(idx, trans->paths_allocated));
|
||||
if (unlikely(path->ref == U8_MAX)) {
|
||||
bch2_dump_trans_paths_updates(trans);
|
||||
@ -36,6 +36,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path
|
||||
|
||||
static inline bool __btree_path_put(struct btree_trans *trans, struct btree_path *path, bool intent)
|
||||
{
|
||||
EBUG_ON(path - trans->paths >= trans->nr_paths);
|
||||
EBUG_ON(!test_bit(path - trans->paths, trans->paths_allocated));
|
||||
EBUG_ON(!path->ref);
|
||||
EBUG_ON(!path->intent_ref && intent);
|
||||
@ -234,12 +235,12 @@ int __must_check bch2_btree_path_traverse_one(struct btree_trans *,
|
||||
btree_path_idx_t,
|
||||
unsigned, unsigned long);
|
||||
|
||||
static inline void bch2_trans_verify_not_unlocked(struct btree_trans *);
|
||||
static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_trans *);
|
||||
|
||||
static inline int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
|
||||
btree_path_idx_t path, unsigned flags)
|
||||
{
|
||||
bch2_trans_verify_not_unlocked(trans);
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
|
||||
if (trans->paths[path].uptodate < BTREE_ITER_NEED_RELOCK)
|
||||
return 0;
|
||||
@ -324,38 +325,52 @@ static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans,
|
||||
bch2_trans_restart_error(trans, restart_count);
|
||||
}
|
||||
|
||||
void __noreturn bch2_trans_in_restart_error(struct btree_trans *);
|
||||
void __noreturn bch2_trans_unlocked_or_in_restart_error(struct btree_trans *);
|
||||
|
||||
static inline void bch2_trans_verify_not_in_restart(struct btree_trans *trans)
|
||||
static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_trans *trans)
|
||||
{
|
||||
if (trans->restarted)
|
||||
bch2_trans_in_restart_error(trans);
|
||||
}
|
||||
|
||||
void __noreturn bch2_trans_unlocked_error(struct btree_trans *);
|
||||
|
||||
static inline void bch2_trans_verify_not_unlocked(struct btree_trans *trans)
|
||||
{
|
||||
if (!trans->locked)
|
||||
bch2_trans_unlocked_error(trans);
|
||||
if (trans->restarted || !trans->locked)
|
||||
bch2_trans_unlocked_or_in_restart_error(trans);
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int btree_trans_restart_nounlock(struct btree_trans *trans, int err)
|
||||
static int btree_trans_restart_foreign_task(struct btree_trans *trans, int err, unsigned long ip)
|
||||
{
|
||||
BUG_ON(err <= 0);
|
||||
BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart));
|
||||
|
||||
trans->restarted = err;
|
||||
trans->last_restarted_ip = _THIS_IP_;
|
||||
trans->last_restarted_ip = ip;
|
||||
return -err;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip)
|
||||
{
|
||||
btree_trans_restart_foreign_task(trans, err, ip);
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_exit(&trans->last_restarted_trace);
|
||||
bch2_save_backtrace(&trans->last_restarted_trace, current, 0, GFP_NOWAIT);
|
||||
#endif
|
||||
return -err;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int btree_trans_restart(struct btree_trans *trans, int err)
|
||||
{
|
||||
btree_trans_restart_nounlock(trans, err);
|
||||
return -err;
|
||||
return btree_trans_restart_ip(trans, err, _THIS_IP_);
|
||||
}
|
||||
|
||||
static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
if (!(ktime_get_ns() & ~(~0ULL << min(63, (10 + trans->restart_count_this_trans))))) {
|
||||
trace_and_count(trans->c, trans_restart_injected, trans, ip);
|
||||
return btree_trans_restart_ip(trans,
|
||||
BCH_ERR_transaction_restart_fault_inject, ip);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool bch2_btree_node_upgrade(struct btree_trans *,
|
||||
@ -375,32 +390,40 @@ static inline void bch2_btree_path_downgrade(struct btree_trans *trans,
|
||||
void bch2_trans_downgrade(struct btree_trans *);
|
||||
|
||||
void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct btree *);
|
||||
void bch2_trans_node_drop(struct btree_trans *trans, struct btree *);
|
||||
void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *);
|
||||
|
||||
int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter);
|
||||
int __must_check bch2_btree_iter_traverse(struct btree_iter *);
|
||||
int __must_check __bch2_btree_iter_traverse(struct btree_trans *, struct btree_iter *);
|
||||
int __must_check bch2_btree_iter_traverse(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
|
||||
struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *);
|
||||
struct btree *bch2_btree_iter_next_node(struct btree_iter *);
|
||||
struct btree *bch2_btree_iter_peek_node(struct btree_trans *, struct btree_iter *);
|
||||
struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_trans *, struct btree_iter *);
|
||||
struct btree *bch2_btree_iter_next_node(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
|
||||
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *, struct btree_iter *, struct bpos);
|
||||
struct bkey_s_c bch2_btree_iter_next(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_trans *trans,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
return bch2_btree_iter_peek_upto(iter, SPOS_MAX);
|
||||
return bch2_btree_iter_peek_max(trans, iter, SPOS_MAX);
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *, struct btree_iter *, struct bpos);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *);
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
return bch2_btree_iter_peek_prev_min(trans, iter, POS_MIN);
|
||||
}
|
||||
|
||||
bool bch2_btree_iter_advance(struct btree_iter *);
|
||||
bool bch2_btree_iter_rewind(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *, struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_trans *, struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
bool bch2_btree_iter_advance(struct btree_trans *, struct btree_iter *);
|
||||
bool bch2_btree_iter_rewind(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
|
||||
{
|
||||
@ -411,10 +434,9 @@ static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpo
|
||||
iter->k.size = 0;
|
||||
}
|
||||
|
||||
static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
|
||||
static inline void bch2_btree_iter_set_pos(struct btree_trans *trans,
|
||||
struct btree_iter *iter, struct bpos new_pos)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
|
||||
if (unlikely(iter->update_path))
|
||||
bch2_path_put(trans, iter->update_path,
|
||||
iter->flags & BTREE_ITER_intent);
|
||||
@ -432,21 +454,29 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it
|
||||
iter->pos = bkey_start_pos(&iter->k);
|
||||
}
|
||||
|
||||
static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot)
|
||||
static inline void bch2_btree_iter_set_snapshot(struct btree_trans *trans,
|
||||
struct btree_iter *iter, u32 snapshot)
|
||||
{
|
||||
struct bpos pos = iter->pos;
|
||||
|
||||
iter->snapshot = snapshot;
|
||||
pos.snapshot = snapshot;
|
||||
bch2_btree_iter_set_pos(iter, pos);
|
||||
bch2_btree_iter_set_pos(trans, iter, pos);
|
||||
}
|
||||
|
||||
void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
unsigned btree_id,
|
||||
unsigned level,
|
||||
unsigned flags)
|
||||
{
|
||||
if (level || !btree_id_cached(trans->c, btree_id)) {
|
||||
flags &= ~BTREE_ITER_cached;
|
||||
flags &= ~BTREE_ITER_with_key_cache;
|
||||
} else if (!(flags & BTREE_ITER_cached))
|
||||
flags |= BTREE_ITER_with_key_cache;
|
||||
|
||||
if (!(flags & (BTREE_ITER_all_snapshots|BTREE_ITER_not_extents)) &&
|
||||
btree_id_is_extents(btree_id))
|
||||
flags |= BTREE_ITER_is_extents;
|
||||
@ -465,19 +495,6 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
return flags;
|
||||
}
|
||||
|
||||
static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
unsigned btree_id,
|
||||
unsigned flags)
|
||||
{
|
||||
if (!btree_id_cached(trans->c, btree_id)) {
|
||||
flags &= ~BTREE_ITER_cached;
|
||||
flags &= ~BTREE_ITER_with_key_cache;
|
||||
} else if (!(flags & BTREE_ITER_cached))
|
||||
flags |= BTREE_ITER_with_key_cache;
|
||||
|
||||
return __bch2_btree_iter_flags(trans, btree_id, flags);
|
||||
}
|
||||
|
||||
static inline void bch2_trans_iter_init_common(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
unsigned btree_id, struct bpos pos,
|
||||
@ -486,7 +503,6 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans,
|
||||
unsigned flags,
|
||||
unsigned long ip)
|
||||
{
|
||||
iter->trans = trans;
|
||||
iter->update_path = 0;
|
||||
iter->key_cache_path = 0;
|
||||
iter->btree_id = btree_id;
|
||||
@ -514,7 +530,7 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans,
|
||||
if (__builtin_constant_p(btree_id) &&
|
||||
__builtin_constant_p(flags))
|
||||
bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
|
||||
bch2_btree_iter_flags(trans, btree_id, flags),
|
||||
bch2_btree_iter_flags(trans, btree_id, 0, flags),
|
||||
_THIS_IP_);
|
||||
else
|
||||
bch2_trans_iter_init_outlined(trans, iter, btree_id, pos, flags);
|
||||
@ -523,22 +539,50 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans,
|
||||
void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *,
|
||||
enum btree_id, struct bpos,
|
||||
unsigned, unsigned, unsigned);
|
||||
void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *);
|
||||
void bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *, struct btree_iter *);
|
||||
|
||||
void bch2_set_btree_iter_dontneed(struct btree_iter *);
|
||||
void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void bch2_trans_kmalloc_trace_to_text(struct printbuf *,
|
||||
darray_trans_kmalloc_trace *);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* bch2_trans_kmalloc - allocate memory for use by the current transaction
|
||||
*
|
||||
* Must be called after bch2_trans_begin, which on second and further calls
|
||||
* frees all memory allocated in this transaction
|
||||
*/
|
||||
static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
void *__bch2_trans_kmalloc(struct btree_trans *, size_t, unsigned long);
|
||||
|
||||
static inline void bch2_trans_kmalloc_trace(struct btree_trans *trans, size_t size,
|
||||
unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_push(&trans->trans_kmalloc_trace,
|
||||
((struct trans_kmalloc_trace) { .ip = ip, .bytes = size }));
|
||||
#endif
|
||||
}
|
||||
|
||||
static __always_inline void *bch2_trans_kmalloc_nomemzero_ip(struct btree_trans *trans, size_t size,
|
||||
unsigned long ip)
|
||||
{
|
||||
size = roundup(size, 8);
|
||||
|
||||
bch2_trans_kmalloc_trace(trans, size, ip);
|
||||
|
||||
if (likely(trans->mem_top + size <= trans->mem_bytes)) {
|
||||
void *p = trans->mem + trans->mem_top;
|
||||
|
||||
trans->mem_top += size;
|
||||
return p;
|
||||
} else {
|
||||
return __bch2_trans_kmalloc(trans, size, ip);
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline void *bch2_trans_kmalloc_ip(struct btree_trans *trans, size_t size,
|
||||
unsigned long ip)
|
||||
{
|
||||
size = roundup(size, 8);
|
||||
|
||||
bch2_trans_kmalloc_trace(trans, size, ip);
|
||||
|
||||
if (likely(trans->mem_top + size <= trans->mem_bytes)) {
|
||||
void *p = trans->mem + trans->mem_top;
|
||||
|
||||
@ -546,22 +590,24 @@ static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
memset(p, 0, size);
|
||||
return p;
|
||||
} else {
|
||||
return __bch2_trans_kmalloc(trans, size);
|
||||
return __bch2_trans_kmalloc(trans, size, ip);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size)
|
||||
/**
|
||||
* bch2_trans_kmalloc - allocate memory for use by the current transaction
|
||||
*
|
||||
* Must be called after bch2_trans_begin, which on second and further calls
|
||||
* frees all memory allocated in this transaction
|
||||
*/
|
||||
static __always_inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
{
|
||||
size = round_up(size, 8);
|
||||
|
||||
if (likely(trans->mem_top + size <= trans->mem_bytes)) {
|
||||
void *p = trans->mem + trans->mem_top;
|
||||
|
||||
trans->mem_top += size;
|
||||
return p;
|
||||
} else {
|
||||
return __bch2_trans_kmalloc(trans, size);
|
||||
return bch2_trans_kmalloc_ip(trans, size, _THIS_IP_);
|
||||
}
|
||||
|
||||
static __always_inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size)
|
||||
{
|
||||
return bch2_trans_kmalloc_nomemzero_ip(trans, size, _THIS_IP_);
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans,
|
||||
@ -572,7 +618,7 @@ static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans,
|
||||
struct bkey_s_c k;
|
||||
|
||||
bch2_trans_iter_init(trans, iter, btree_id, pos, flags);
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
k = bch2_btree_iter_peek_slot(trans, iter);
|
||||
|
||||
if (!bkey_err(k) && type && k.k->type != type)
|
||||
k = bkey_s_c_err(-BCH_ERR_ENOENT_bkey_type_mismatch);
|
||||
@ -593,13 +639,18 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans,
|
||||
bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \
|
||||
_btree_id, _pos, _flags, KEY_TYPE_##_type))
|
||||
|
||||
static inline void __bkey_val_copy(void *dst_v, unsigned dst_size, struct bkey_s_c src_k)
|
||||
{
|
||||
unsigned b = min_t(unsigned, dst_size, bkey_val_bytes(src_k.k));
|
||||
memcpy(dst_v, src_k.v, b);
|
||||
if (unlikely(b < dst_size))
|
||||
memset(dst_v + b, 0, dst_size - b);
|
||||
}
|
||||
|
||||
#define bkey_val_copy(_dst_v, _src_k) \
|
||||
do { \
|
||||
unsigned b = min_t(unsigned, sizeof(*_dst_v), \
|
||||
bkey_val_bytes(_src_k.k)); \
|
||||
memcpy(_dst_v, _src_k.v, b); \
|
||||
if (b < sizeof(*_dst_v)) \
|
||||
memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \
|
||||
BUILD_BUG_ON(!__typecheck(*_dst_v, *_src_k.v)); \
|
||||
__bkey_val_copy(_dst_v, sizeof(*_dst_v), _src_k.s_c); \
|
||||
} while (0)
|
||||
|
||||
static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
|
||||
@ -608,17 +659,10 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
|
||||
unsigned val_size, void *val)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type);
|
||||
ret = bkey_err(k);
|
||||
struct bkey_s_c k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type);
|
||||
int ret = bkey_err(k);
|
||||
if (!ret) {
|
||||
unsigned b = min_t(unsigned, bkey_val_bytes(k.k), val_size);
|
||||
|
||||
memcpy(val, k.v, b);
|
||||
if (unlikely(b < sizeof(*val)))
|
||||
memset((void *) val + b, 0, sizeof(*val) - b);
|
||||
__bkey_val_copy(val, val_size, k);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
}
|
||||
|
||||
@ -644,14 +688,14 @@ u32 bch2_trans_begin(struct btree_trans *);
|
||||
int _ret3 = 0; \
|
||||
do { \
|
||||
_ret3 = lockrestart_do((_trans), ({ \
|
||||
struct btree *_b = bch2_btree_iter_peek_node(&_iter); \
|
||||
struct btree *_b = bch2_btree_iter_peek_node(_trans, &_iter);\
|
||||
if (!_b) \
|
||||
break; \
|
||||
\
|
||||
PTR_ERR_OR_ZERO(_b) ?: (_do); \
|
||||
})) ?: \
|
||||
lockrestart_do((_trans), \
|
||||
PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \
|
||||
PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(_trans, &_iter)));\
|
||||
} while (!_ret3); \
|
||||
\
|
||||
bch2_trans_iter_exit((_trans), &(_iter)); \
|
||||
@ -663,31 +707,34 @@ u32 bch2_trans_begin(struct btree_trans *);
|
||||
__for_each_btree_node(_trans, _iter, _btree_id, _start, \
|
||||
0, 0, _flags, _b, _do)
|
||||
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter,
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
unsigned flags)
|
||||
{
|
||||
return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(iter) :
|
||||
bch2_btree_iter_peek_prev(iter);
|
||||
return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(trans, iter) :
|
||||
bch2_btree_iter_peek_prev(trans, iter);
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
unsigned flags)
|
||||
{
|
||||
return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(iter) :
|
||||
bch2_btree_iter_peek(iter);
|
||||
return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(trans, iter) :
|
||||
bch2_btree_iter_peek(trans, iter);
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter,
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos end,
|
||||
unsigned flags)
|
||||
{
|
||||
if (!(flags & BTREE_ITER_slots))
|
||||
return bch2_btree_iter_peek_upto(iter, end);
|
||||
return bch2_btree_iter_peek_max(trans, iter, end);
|
||||
|
||||
if (bkey_gt(iter->pos, end))
|
||||
return bkey_s_c_null;
|
||||
|
||||
return bch2_btree_iter_peek_slot(iter);
|
||||
return bch2_btree_iter_peek_slot(trans, iter);
|
||||
}
|
||||
|
||||
int __bch2_btree_trans_too_many_iters(struct btree_trans *);
|
||||
@ -743,10 +790,10 @@ transaction_restart: \
|
||||
if (!_ret2) \
|
||||
bch2_trans_verify_not_restarted(_trans, _restart_count);\
|
||||
\
|
||||
_ret2 ?: trans_was_restarted(_trans, _restart_count); \
|
||||
_ret2 ?: trans_was_restarted(_trans, _orig_restart_count); \
|
||||
})
|
||||
|
||||
#define for_each_btree_key_upto_continue(_trans, _iter, \
|
||||
#define for_each_btree_key_max_continue(_trans, _iter, \
|
||||
_end, _flags, _k, _do) \
|
||||
({ \
|
||||
struct bkey_s_c _k; \
|
||||
@ -754,23 +801,23 @@ transaction_restart: \
|
||||
\
|
||||
do { \
|
||||
_ret3 = lockrestart_do(_trans, ({ \
|
||||
(_k) = bch2_btree_iter_peek_upto_type(&(_iter), \
|
||||
(_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), \
|
||||
_end, (_flags)); \
|
||||
if (!(_k).k) \
|
||||
break; \
|
||||
\
|
||||
bkey_err(_k) ?: (_do); \
|
||||
})); \
|
||||
} while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \
|
||||
} while (!_ret3 && bch2_btree_iter_advance(_trans, &(_iter))); \
|
||||
\
|
||||
bch2_trans_iter_exit((_trans), &(_iter)); \
|
||||
_ret3; \
|
||||
})
|
||||
|
||||
#define for_each_btree_key_continue(_trans, _iter, _flags, _k, _do) \
|
||||
for_each_btree_key_upto_continue(_trans, _iter, SPOS_MAX, _flags, _k, _do)
|
||||
for_each_btree_key_max_continue(_trans, _iter, SPOS_MAX, _flags, _k, _do)
|
||||
|
||||
#define for_each_btree_key_upto(_trans, _iter, _btree_id, \
|
||||
#define for_each_btree_key_max(_trans, _iter, _btree_id, \
|
||||
_start, _end, _flags, _k, _do) \
|
||||
({ \
|
||||
bch2_trans_begin(trans); \
|
||||
@ -779,12 +826,12 @@ transaction_restart: \
|
||||
bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
|
||||
(_start), (_flags)); \
|
||||
\
|
||||
for_each_btree_key_upto_continue(_trans, _iter, _end, _flags, _k, _do);\
|
||||
for_each_btree_key_max_continue(_trans, _iter, _end, _flags, _k, _do);\
|
||||
})
|
||||
|
||||
#define for_each_btree_key(_trans, _iter, _btree_id, \
|
||||
_start, _flags, _k, _do) \
|
||||
for_each_btree_key_upto(_trans, _iter, _btree_id, _start, \
|
||||
for_each_btree_key_max(_trans, _iter, _btree_id, _start, \
|
||||
SPOS_MAX, _flags, _k, _do)
|
||||
|
||||
#define for_each_btree_key_reverse(_trans, _iter, _btree_id, \
|
||||
@ -799,14 +846,14 @@ transaction_restart: \
|
||||
\
|
||||
do { \
|
||||
_ret3 = lockrestart_do(_trans, ({ \
|
||||
(_k) = bch2_btree_iter_peek_prev_type(&(_iter), \
|
||||
(_k) = bch2_btree_iter_peek_prev_type(_trans, &(_iter), \
|
||||
(_flags)); \
|
||||
if (!(_k).k) \
|
||||
break; \
|
||||
\
|
||||
bkey_err(_k) ?: (_do); \
|
||||
})); \
|
||||
} while (!_ret3 && bch2_btree_iter_rewind(&(_iter))); \
|
||||
} while (!_ret3 && bch2_btree_iter_rewind(_trans, &(_iter))); \
|
||||
\
|
||||
bch2_trans_iter_exit((_trans), &(_iter)); \
|
||||
_ret3; \
|
||||
@ -828,45 +875,46 @@ transaction_restart: \
|
||||
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
|
||||
(_journal_seq), (_commit_flags)))
|
||||
|
||||
#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \
|
||||
#define for_each_btree_key_max_commit(_trans, _iter, _btree_id, \
|
||||
_start, _end, _iter_flags, _k, \
|
||||
_disk_res, _journal_seq, _commit_flags,\
|
||||
_do) \
|
||||
for_each_btree_key_upto(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\
|
||||
for_each_btree_key_max(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\
|
||||
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
|
||||
(_journal_seq), (_commit_flags)))
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *,
|
||||
struct btree_iter *);
|
||||
|
||||
#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \
|
||||
#define for_each_btree_key_max_norestart(_trans, _iter, _btree_id, \
|
||||
_start, _end, _flags, _k, _ret) \
|
||||
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
|
||||
(_start), (_flags)); \
|
||||
(_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\
|
||||
(_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), _end, _flags),\
|
||||
!((_ret) = bkey_err(_k)) && (_k).k; \
|
||||
bch2_btree_iter_advance(&(_iter)))
|
||||
bch2_btree_iter_advance(_trans, &(_iter)))
|
||||
|
||||
#define for_each_btree_key_upto_continue_norestart(_iter, _end, _flags, _k, _ret)\
|
||||
#define for_each_btree_key_max_continue_norestart(_trans, _iter, _end, _flags, _k, _ret)\
|
||||
for (; \
|
||||
(_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags), \
|
||||
(_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), _end, _flags), \
|
||||
!((_ret) = bkey_err(_k)) && (_k).k; \
|
||||
bch2_btree_iter_advance(&(_iter)))
|
||||
bch2_btree_iter_advance(_trans, &(_iter)))
|
||||
|
||||
#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \
|
||||
_start, _flags, _k, _ret) \
|
||||
for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, _start,\
|
||||
for_each_btree_key_max_norestart(_trans, _iter, _btree_id, _start,\
|
||||
SPOS_MAX, _flags, _k, _ret)
|
||||
|
||||
#define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \
|
||||
_start, _flags, _k, _ret) \
|
||||
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
|
||||
(_start), (_flags)); \
|
||||
(_k) = bch2_btree_iter_peek_prev_type(&(_iter), _flags), \
|
||||
(_k) = bch2_btree_iter_peek_prev_type(_trans, &(_iter), _flags), \
|
||||
!((_ret) = bkey_err(_k)) && (_k).k; \
|
||||
bch2_btree_iter_rewind(&(_iter)))
|
||||
bch2_btree_iter_rewind(_trans, &(_iter)))
|
||||
|
||||
#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \
|
||||
for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret)
|
||||
#define for_each_btree_key_continue_norestart(_trans, _iter, _flags, _k, _ret) \
|
||||
for_each_btree_key_max_continue_norestart(_trans, _iter, SPOS_MAX, _flags, _k, _ret)
|
||||
|
||||
/*
|
||||
* This should not be used in a fastpath, without first trying _do in
|
||||
@ -912,6 +960,8 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do))
|
||||
|
||||
struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned);
|
||||
void bch2_trans_put(struct btree_trans *);
|
||||
|
||||
|
@ -16,6 +16,17 @@
|
||||
* operations for the regular btree iter code to use:
|
||||
*/
|
||||
|
||||
static inline size_t pos_to_idx(struct journal_keys *keys, size_t pos)
|
||||
{
|
||||
size_t gap_size = keys->size - keys->nr;
|
||||
|
||||
BUG_ON(pos >= keys->gap && pos < keys->gap + gap_size);
|
||||
|
||||
if (pos >= keys->gap)
|
||||
pos -= gap_size;
|
||||
return pos;
|
||||
}
|
||||
|
||||
static inline size_t idx_to_pos(struct journal_keys *keys, size_t idx)
|
||||
{
|
||||
size_t gap_size = keys->size - keys->nr;
|
||||
@ -61,7 +72,7 @@ static size_t bch2_journal_key_search(struct journal_keys *keys,
|
||||
}
|
||||
|
||||
/* Returns first non-overwritten key >= search key: */
|
||||
struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id,
|
||||
struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id,
|
||||
unsigned level, struct bpos pos,
|
||||
struct bpos end_pos, size_t *idx)
|
||||
{
|
||||
@ -84,18 +95,54 @@ search:
|
||||
}
|
||||
}
|
||||
|
||||
struct bkey_i *ret = NULL;
|
||||
rcu_read_lock(); /* for overwritten_ranges */
|
||||
|
||||
while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
|
||||
if (__journal_key_cmp(btree_id, level, end_pos, k) < 0)
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
if (k->overwritten) {
|
||||
(*idx)++;
|
||||
if (k->overwritten_range)
|
||||
*idx = rcu_dereference(k->overwritten_range)->end;
|
||||
else
|
||||
*idx += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (__journal_key_cmp(btree_id, level, pos, k) <= 0)
|
||||
return k->k;
|
||||
if (__journal_key_cmp(btree_id, level, pos, k) <= 0) {
|
||||
ret = k->k;
|
||||
break;
|
||||
}
|
||||
|
||||
(*idx)++;
|
||||
iters++;
|
||||
if (iters == 10) {
|
||||
*idx = 0;
|
||||
rcu_read_unlock();
|
||||
goto search;
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id,
|
||||
unsigned level, struct bpos pos,
|
||||
struct bpos end_pos, size_t *idx)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
unsigned iters = 0;
|
||||
struct journal_key *k;
|
||||
|
||||
BUG_ON(*idx > keys->nr);
|
||||
search:
|
||||
if (!*idx)
|
||||
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
|
||||
|
||||
while (*idx &&
|
||||
__journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) {
|
||||
(*idx)++;
|
||||
iters++;
|
||||
if (iters == 10) {
|
||||
@ -104,7 +151,36 @@ search:
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
struct bkey_i *ret = NULL;
|
||||
rcu_read_lock(); /* for overwritten_ranges */
|
||||
|
||||
while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
|
||||
if (__journal_key_cmp(btree_id, level, end_pos, k) > 0)
|
||||
break;
|
||||
|
||||
if (k->overwritten) {
|
||||
if (k->overwritten_range)
|
||||
*idx = rcu_dereference(k->overwritten_range)->start - 1;
|
||||
else
|
||||
*idx -= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (__journal_key_cmp(btree_id, level, pos, k) >= 0) {
|
||||
ret = k->k;
|
||||
break;
|
||||
}
|
||||
|
||||
--(*idx);
|
||||
iters++;
|
||||
if (iters == 10) {
|
||||
*idx = 0;
|
||||
goto search;
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id,
|
||||
@ -112,11 +188,12 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree
|
||||
{
|
||||
size_t idx = 0;
|
||||
|
||||
return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx);
|
||||
return bch2_journal_keys_peek_max(c, btree_id, level, pos, pos, &idx);
|
||||
}
|
||||
|
||||
static void journal_iter_verify(struct journal_iter *iter)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
struct journal_keys *keys = iter->keys;
|
||||
size_t gap_size = keys->size - keys->nr;
|
||||
|
||||
@ -126,10 +203,10 @@ static void journal_iter_verify(struct journal_iter *iter)
|
||||
if (iter->idx < keys->size) {
|
||||
struct journal_key *k = keys->data + iter->idx;
|
||||
|
||||
int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
|
||||
cmp_int(k->level, iter->level);
|
||||
BUG_ON(cmp < 0);
|
||||
int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k);
|
||||
BUG_ON(cmp > 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void journal_iters_fix(struct bch_fs *c)
|
||||
@ -182,7 +259,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
||||
* Ensure these keys are done last by journal replay, to unblock
|
||||
* journal reclaim:
|
||||
*/
|
||||
.journal_seq = U32_MAX,
|
||||
.journal_seq = U64_MAX,
|
||||
};
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
|
||||
@ -290,6 +367,68 @@ bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree,
|
||||
bkey_deleted(&keys->data[idx].k->k));
|
||||
}
|
||||
|
||||
static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos)
|
||||
{
|
||||
struct journal_key *k = keys->data + pos;
|
||||
size_t idx = pos_to_idx(keys, pos);
|
||||
|
||||
k->overwritten = true;
|
||||
|
||||
struct journal_key *prev = idx > 0 ? keys->data + idx_to_pos(keys, idx - 1) : NULL;
|
||||
struct journal_key *next = idx + 1 < keys->nr ? keys->data + idx_to_pos(keys, idx + 1) : NULL;
|
||||
|
||||
bool prev_overwritten = prev && prev->overwritten;
|
||||
bool next_overwritten = next && next->overwritten;
|
||||
|
||||
struct journal_key_range_overwritten *prev_range =
|
||||
prev_overwritten ? prev->overwritten_range : NULL;
|
||||
struct journal_key_range_overwritten *next_range =
|
||||
next_overwritten ? next->overwritten_range : NULL;
|
||||
|
||||
BUG_ON(prev_range && prev_range->end != idx);
|
||||
BUG_ON(next_range && next_range->start != idx + 1);
|
||||
|
||||
if (prev_range && next_range) {
|
||||
prev_range->end = next_range->end;
|
||||
|
||||
keys->data[pos].overwritten_range = prev_range;
|
||||
for (size_t i = next_range->start; i < next_range->end; i++) {
|
||||
struct journal_key *ip = keys->data + idx_to_pos(keys, i);
|
||||
BUG_ON(ip->overwritten_range != next_range);
|
||||
ip->overwritten_range = prev_range;
|
||||
}
|
||||
|
||||
kfree_rcu_mightsleep(next_range);
|
||||
} else if (prev_range) {
|
||||
prev_range->end++;
|
||||
k->overwritten_range = prev_range;
|
||||
if (next_overwritten) {
|
||||
prev_range->end++;
|
||||
next->overwritten_range = prev_range;
|
||||
}
|
||||
} else if (next_range) {
|
||||
next_range->start--;
|
||||
k->overwritten_range = next_range;
|
||||
if (prev_overwritten) {
|
||||
next_range->start--;
|
||||
prev->overwritten_range = next_range;
|
||||
}
|
||||
} else if (prev_overwritten || next_overwritten) {
|
||||
struct journal_key_range_overwritten *r = kmalloc(sizeof(*r), GFP_KERNEL);
|
||||
if (!r)
|
||||
return;
|
||||
|
||||
r->start = idx - (size_t) prev_overwritten;
|
||||
r->end = idx + 1 + (size_t) next_overwritten;
|
||||
|
||||
rcu_assign_pointer(k->overwritten_range, r);
|
||||
if (prev_overwritten)
|
||||
prev->overwritten_range = r;
|
||||
if (next_overwritten)
|
||||
next->overwritten_range = r;
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
|
||||
unsigned level, struct bpos pos)
|
||||
{
|
||||
@ -299,8 +438,12 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
|
||||
if (idx < keys->size &&
|
||||
keys->data[idx].btree_id == btree &&
|
||||
keys->data[idx].level == level &&
|
||||
bpos_eq(keys->data[idx].k->k.p, pos))
|
||||
keys->data[idx].overwritten = true;
|
||||
bpos_eq(keys->data[idx].k->k.p, pos) &&
|
||||
!keys->data[idx].overwritten) {
|
||||
mutex_lock(&keys->overwrite_lock);
|
||||
__bch2_journal_key_overwritten(keys, idx);
|
||||
mutex_unlock(&keys->overwrite_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_journal_iter_advance(struct journal_iter *iter)
|
||||
@ -314,24 +457,32 @@ static void bch2_journal_iter_advance(struct journal_iter *iter)
|
||||
|
||||
static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
|
||||
{
|
||||
struct bkey_s_c ret = bkey_s_c_null;
|
||||
|
||||
journal_iter_verify(iter);
|
||||
|
||||
rcu_read_lock();
|
||||
while (iter->idx < iter->keys->size) {
|
||||
struct journal_key *k = iter->keys->data + iter->idx;
|
||||
|
||||
int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
|
||||
cmp_int(k->level, iter->level);
|
||||
if (cmp > 0)
|
||||
int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k);
|
||||
if (cmp < 0)
|
||||
break;
|
||||
BUG_ON(cmp);
|
||||
|
||||
if (!k->overwritten)
|
||||
return bkey_i_to_s_c(k->k);
|
||||
|
||||
bch2_journal_iter_advance(iter);
|
||||
if (!k->overwritten) {
|
||||
ret = bkey_i_to_s_c(k->k);
|
||||
break;
|
||||
}
|
||||
|
||||
return bkey_s_c_null;
|
||||
if (k->overwritten_range)
|
||||
iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end);
|
||||
else
|
||||
bch2_journal_iter_advance(iter);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_journal_iter_exit(struct journal_iter *iter)
|
||||
@ -382,6 +533,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter
|
||||
: (level > 1 ? 1 : 16);
|
||||
|
||||
iter.prefetch = false;
|
||||
iter.fail_if_too_many_whiteouts = true;
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
while (nr--) {
|
||||
@ -400,6 +552,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
|
||||
{
|
||||
struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret;
|
||||
size_t iters = 0;
|
||||
|
||||
if (iter->prefetch && iter->journal.level)
|
||||
btree_and_journal_iter_prefetch(iter);
|
||||
@ -407,6 +560,11 @@ again:
|
||||
if (iter->at_end)
|
||||
return bkey_s_c_null;
|
||||
|
||||
iters++;
|
||||
|
||||
if (iters > 20 && iter->fail_if_too_many_whiteouts)
|
||||
return bkey_s_c_null;
|
||||
|
||||
while ((btree_k = bch2_journal_iter_peek_btree(iter)).k &&
|
||||
bpos_lt(btree_k.k->p, iter->pos))
|
||||
bch2_journal_iter_advance_btree(iter);
|
||||
@ -481,21 +639,13 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
|
||||
|
||||
/* sort and dedup all keys in the journal: */
|
||||
|
||||
void bch2_journal_entries_free(struct bch_fs *c)
|
||||
{
|
||||
struct journal_replay **i;
|
||||
struct genradix_iter iter;
|
||||
|
||||
genradix_for_each(&c->journal_entries, iter, i)
|
||||
kvfree(*i);
|
||||
genradix_free(&c->journal_entries);
|
||||
}
|
||||
|
||||
/*
|
||||
* When keys compare equal, oldest compares first:
|
||||
*/
|
||||
static int journal_sort_key_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
cond_resched();
|
||||
|
||||
const struct journal_key *l = _l;
|
||||
const struct journal_key *r = _r;
|
||||
|
||||
@ -515,15 +665,26 @@ void bch2_journal_keys_put(struct bch_fs *c)
|
||||
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
darray_for_each(*keys, i)
|
||||
darray_for_each(*keys, i) {
|
||||
if (i->overwritten_range &&
|
||||
(i == &darray_last(*keys) ||
|
||||
i->overwritten_range != i[1].overwritten_range))
|
||||
kfree(i->overwritten_range);
|
||||
|
||||
if (i->allocated)
|
||||
kfree(i->k);
|
||||
}
|
||||
|
||||
kvfree(keys->data);
|
||||
keys->data = NULL;
|
||||
keys->nr = keys->gap = keys->size = 0;
|
||||
|
||||
bch2_journal_entries_free(c);
|
||||
struct journal_replay **i;
|
||||
struct genradix_iter iter;
|
||||
|
||||
genradix_for_each(&c->journal_entries, iter, i)
|
||||
kvfree(*i);
|
||||
genradix_free(&c->journal_entries);
|
||||
}
|
||||
|
||||
static void __journal_keys_sort(struct journal_keys *keys)
|
||||
@ -628,8 +789,20 @@ void bch2_journal_keys_dump(struct bch_fs *c)
|
||||
|
||||
darray_for_each(*keys, i) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "btree=");
|
||||
bch2_btree_id_to_text(&buf, i->btree_id);
|
||||
prt_printf(&buf, " l=%u ", i->level);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k));
|
||||
pr_err("%s l=%u %s", bch2_btree_id_str(i->btree_id), i->level, buf.buf);
|
||||
pr_err("%s", buf.buf);
|
||||
}
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
void bch2_fs_journal_keys_init(struct bch_fs *c)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
|
||||
atomic_set(&keys->ref, 1);
|
||||
keys->initial_ref_held = true;
|
||||
mutex_init(&keys->overwrite_lock);
|
||||
}
|
||||
|
@ -26,16 +26,24 @@ struct btree_and_journal_iter {
|
||||
struct bpos pos;
|
||||
bool at_end;
|
||||
bool prefetch;
|
||||
bool fail_if_too_many_whiteouts;
|
||||
};
|
||||
|
||||
static inline int __journal_key_btree_cmp(enum btree_id l_btree_id,
|
||||
unsigned l_level,
|
||||
const struct journal_key *r)
|
||||
{
|
||||
return -cmp_int(l_level, r->level) ?:
|
||||
cmp_int(l_btree_id, r->btree_id);
|
||||
}
|
||||
|
||||
static inline int __journal_key_cmp(enum btree_id l_btree_id,
|
||||
unsigned l_level,
|
||||
struct bpos l_pos,
|
||||
const struct journal_key *r)
|
||||
{
|
||||
return (cmp_int(l_btree_id, r->btree_id) ?:
|
||||
cmp_int(l_level, r->level) ?:
|
||||
bpos_cmp(l_pos, r->k->k.p));
|
||||
return __journal_key_btree_cmp(l_btree_id, l_level, r) ?:
|
||||
bpos_cmp(l_pos, r->k->k.p);
|
||||
}
|
||||
|
||||
static inline int journal_key_cmp(const struct journal_key *l, const struct journal_key *r)
|
||||
@ -43,7 +51,9 @@ static inline int journal_key_cmp(const struct journal_key *l, const struct jour
|
||||
return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r);
|
||||
}
|
||||
|
||||
struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id,
|
||||
struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos, struct bpos, size_t *);
|
||||
struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos, struct bpos, size_t *);
|
||||
struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos);
|
||||
@ -79,8 +89,6 @@ static inline void bch2_journal_keys_put_initial(struct bch_fs *c)
|
||||
c->journal_keys.initial_ref_held = false;
|
||||
}
|
||||
|
||||
void bch2_journal_entries_free(struct bch_fs *);
|
||||
|
||||
int bch2_journal_keys_sort(struct bch_fs *);
|
||||
|
||||
void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id,
|
||||
@ -89,4 +97,6 @@ void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id,
|
||||
|
||||
void bch2_journal_keys_dump(struct bch_fs *);
|
||||
|
||||
void bch2_fs_journal_keys_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */
|
||||
|
36
libbcachefs/btree_journal_iter_types.h
Normal file
36
libbcachefs/btree_journal_iter_types.h
Normal file
@ -0,0 +1,36 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H
|
||||
#define _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H
|
||||
|
||||
struct journal_key_range_overwritten {
|
||||
size_t start, end;
|
||||
};
|
||||
|
||||
struct journal_key {
|
||||
u64 journal_seq;
|
||||
u32 journal_offset;
|
||||
enum btree_id btree_id:8;
|
||||
unsigned level:8;
|
||||
bool allocated;
|
||||
bool overwritten;
|
||||
struct journal_key_range_overwritten __rcu *
|
||||
overwritten_range;
|
||||
struct bkey_i *k;
|
||||
};
|
||||
|
||||
struct journal_keys {
|
||||
/* must match layout in darray_types.h */
|
||||
size_t nr, size;
|
||||
struct journal_key *data;
|
||||
/*
|
||||
* Gap buffer: instead of all the empty space in the array being at the
|
||||
* end of the buffer - from @nr to @size - the empty space is at @gap.
|
||||
* This means that sequential insertions are O(n) instead of O(n^2).
|
||||
*/
|
||||
size_t gap;
|
||||
atomic_t ref;
|
||||
bool initial_ref_held;
|
||||
struct mutex overwrite_lock;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H */
|
@ -156,7 +156,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
|
||||
}
|
||||
|
||||
if (ck) {
|
||||
bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0);
|
||||
bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL);
|
||||
ck->c.cached = true;
|
||||
goto lock;
|
||||
}
|
||||
@ -197,7 +197,9 @@ out:
|
||||
return ck;
|
||||
}
|
||||
|
||||
static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *path,
|
||||
static int btree_key_cache_create(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_path *ck_path,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -217,7 +219,7 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
|
||||
key_u64s = min(256U, (key_u64s * 3) / 2);
|
||||
key_u64s = roundup_pow_of_two(key_u64s);
|
||||
|
||||
struct bkey_cached *ck = bkey_cached_alloc(trans, path, key_u64s);
|
||||
struct bkey_cached *ck = bkey_cached_alloc(trans, ck_path, key_u64s);
|
||||
int ret = PTR_ERR_OR_ZERO(ck);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -226,19 +228,19 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
|
||||
ck = bkey_cached_reuse(bc);
|
||||
if (unlikely(!ck)) {
|
||||
bch_err(c, "error allocating memory for key cache item, btree %s",
|
||||
bch2_btree_id_str(path->btree_id));
|
||||
bch2_btree_id_str(ck_path->btree_id));
|
||||
return -BCH_ERR_ENOMEM_btree_key_cache_create;
|
||||
}
|
||||
}
|
||||
|
||||
ck->c.level = 0;
|
||||
ck->c.btree_id = path->btree_id;
|
||||
ck->key.btree_id = path->btree_id;
|
||||
ck->key.pos = path->pos;
|
||||
ck->c.btree_id = ck_path->btree_id;
|
||||
ck->key.btree_id = ck_path->btree_id;
|
||||
ck->key.pos = ck_path->pos;
|
||||
ck->flags = 1U << BKEY_CACHED_ACCESSED;
|
||||
|
||||
if (unlikely(key_u64s > ck->u64s)) {
|
||||
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
|
||||
mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED);
|
||||
|
||||
struct bkey_i *new_k = allocate_dropping_locks(trans, ret,
|
||||
kmalloc(key_u64s * sizeof(u64), _gfp));
|
||||
@ -258,32 +260,52 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
|
||||
|
||||
bkey_reassemble(ck->k, k);
|
||||
|
||||
ret = bch2_btree_node_lock_write(trans, path, &path_l(path)->b->c);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
ret = rhashtable_lookup_insert_fast(&bc->table, &ck->hash, bch2_btree_key_cache_params);
|
||||
|
||||
bch2_btree_node_unlock_write(trans, path, path_l(path)->b);
|
||||
|
||||
if (unlikely(ret)) /* raced with another fill? */
|
||||
goto err;
|
||||
|
||||
atomic_long_inc(&bc->nr_keys);
|
||||
six_unlock_write(&ck->c.lock);
|
||||
|
||||
enum six_lock_type lock_want = __btree_lock_want(path, 0);
|
||||
enum six_lock_type lock_want = __btree_lock_want(ck_path, 0);
|
||||
if (lock_want == SIX_LOCK_read)
|
||||
six_lock_downgrade(&ck->c.lock);
|
||||
btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want);
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
btree_path_cached_set(trans, ck_path, ck, (enum btree_node_locked_type) lock_want);
|
||||
ck_path->uptodate = BTREE_ITER_UPTODATE;
|
||||
return 0;
|
||||
err:
|
||||
bkey_cached_free(bc, ck);
|
||||
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
|
||||
mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans,
|
||||
struct btree_path *ck_path,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bpos_to_text(&buf, ck_path->pos);
|
||||
prt_char(&buf, ' ');
|
||||
bch2_bkey_val_to_text(&buf, trans->c, k);
|
||||
trace_key_cache_fill(trans, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
static noinline int btree_key_cache_fill(struct btree_trans *trans,
|
||||
struct btree_path *ck_path,
|
||||
unsigned flags)
|
||||
{
|
||||
if (flags & BTREE_ITER_cached_nofill) {
|
||||
ck_path->uptodate = BTREE_ITER_UPTODATE;
|
||||
ck_path->l[0].b = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -293,10 +315,11 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, ck_path->btree_id, ck_path->pos,
|
||||
BTREE_ITER_intent|
|
||||
BTREE_ITER_key_cache_fill|
|
||||
BTREE_ITER_cached_nofill);
|
||||
iter.flags &= ~BTREE_ITER_with_journal;
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
k = bch2_btree_iter_peek_slot(trans, &iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -306,12 +329,15 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
|
||||
if (unlikely(ret))
|
||||
goto out;
|
||||
|
||||
ret = btree_key_cache_create(trans, ck_path, k);
|
||||
ret = btree_key_cache_create(trans, btree_iter_path(trans, &iter), ck_path, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (trace_key_cache_fill_enabled())
|
||||
do_trace_key_cache_fill(trans, ck_path, k);
|
||||
out:
|
||||
/* We're not likely to need this iterator again: */
|
||||
bch2_set_btree_iter_dontneed(&iter);
|
||||
bch2_set_btree_iter_dontneed(trans, &iter);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
@ -392,7 +418,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
BTREE_ITER_intent);
|
||||
b_iter.flags &= ~BTREE_ITER_with_key_cache;
|
||||
|
||||
ret = bch2_btree_iter_traverse(&c_iter);
|
||||
ret = bch2_btree_iter_traverse(trans, &c_iter);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -424,8 +450,15 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
!test_bit(JOURNAL_space_low, &c->journal.flags))
|
||||
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
|
||||
|
||||
ret = bch2_btree_iter_traverse(&b_iter) ?:
|
||||
bch2_trans_update(trans, &b_iter, ck->k,
|
||||
struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(trans, &b_iter);
|
||||
ret = bkey_err(btree_k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* * Check that we're not violating cache coherency rules: */
|
||||
BUG_ON(bkey_deleted(btree_k.k));
|
||||
|
||||
ret = bch2_trans_update(trans, &b_iter, ck->k,
|
||||
BTREE_UPDATE_key_cache_reclaim|
|
||||
BTREE_UPDATE_internal_snapshot_node|
|
||||
BTREE_TRIGGER_norun) ?:
|
||||
@ -433,7 +466,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
commit_flags);
|
||||
|
||||
err:
|
||||
bch2_fs_fatal_err_on(ret &&
|
||||
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
|
||||
!bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
|
||||
@ -586,8 +619,18 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
|
||||
bkey_cached_free(bc, ck);
|
||||
|
||||
mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
|
||||
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
|
||||
path->should_be_locked = false;
|
||||
|
||||
struct btree_path *path2;
|
||||
unsigned i;
|
||||
trans_for_each_path(trans, path2, i)
|
||||
if (path2->l[0].b == (void *) ck) {
|
||||
__bch2_btree_path_unlock(trans, path2);
|
||||
path2->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_drop);
|
||||
path2->should_be_locked = false;
|
||||
btree_path_set_dirty(path2, BTREE_ITER_NEED_TRAVERSE);
|
||||
}
|
||||
|
||||
bch2_trans_verify_locks(trans);
|
||||
}
|
||||
|
||||
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
@ -711,7 +754,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
rcu_read_unlock();
|
||||
mutex_lock(&bc->table.mutex);
|
||||
mutex_unlock(&bc->table.mutex);
|
||||
rcu_read_lock();
|
||||
continue;
|
||||
}
|
||||
for (i = 0; i < tbl->size; i++)
|
||||
|
@ -7,9 +7,10 @@
|
||||
static struct lock_class_key bch2_btree_node_lock_key;
|
||||
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *b,
|
||||
enum six_lock_init_flags flags)
|
||||
enum six_lock_init_flags flags,
|
||||
gfp_t gfp)
|
||||
{
|
||||
__six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags);
|
||||
__six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags, gfp);
|
||||
lockdep_set_notrack_class(&b->lock);
|
||||
}
|
||||
|
||||
@ -90,7 +91,7 @@ static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
for (i = g->g; i != g->g + g->nr; i++) {
|
||||
struct task_struct *task = i->trans->locking_wait.task;
|
||||
struct task_struct *task = READ_ONCE(i->trans->locking_wait.task);
|
||||
if (i != g->g)
|
||||
prt_str(out, "<- ");
|
||||
prt_printf(out, "%u ", task ? task->pid : 0);
|
||||
@ -109,6 +110,12 @@ static noinline void lock_graph_pop_all(struct lock_graph *g)
|
||||
lock_graph_up(g);
|
||||
}
|
||||
|
||||
static noinline void lock_graph_pop_from(struct lock_graph *g, struct trans_waiting_for_lock *i)
|
||||
{
|
||||
while (g->g + g->nr > i)
|
||||
lock_graph_up(g);
|
||||
}
|
||||
|
||||
static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
|
||||
{
|
||||
g->g[g->nr++] = (struct trans_waiting_for_lock) {
|
||||
@ -124,15 +131,20 @@ static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
|
||||
__lock_graph_down(g, trans);
|
||||
}
|
||||
|
||||
static bool lock_graph_remove_non_waiters(struct lock_graph *g)
|
||||
static bool lock_graph_remove_non_waiters(struct lock_graph *g,
|
||||
struct trans_waiting_for_lock *from)
|
||||
{
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
for (i = g->g + 1; i < g->g + g->nr; i++)
|
||||
if (from->trans->locking != from->node_want) {
|
||||
lock_graph_pop_from(g, from);
|
||||
return true;
|
||||
}
|
||||
|
||||
for (i = from + 1; i < g->g + g->nr; i++)
|
||||
if (i->trans->locking != i->node_want ||
|
||||
i->trans->locking_wait.start_time != i[-1].lock_start_time) {
|
||||
while (g->g + g->nr > i)
|
||||
lock_graph_up(g);
|
||||
lock_graph_pop_from(g, i);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -160,7 +172,9 @@ static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
|
||||
{
|
||||
if (i == g->g) {
|
||||
trace_would_deadlock(g, i->trans);
|
||||
return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
|
||||
return btree_trans_restart_foreign_task(i->trans,
|
||||
BCH_ERR_transaction_restart_would_deadlock,
|
||||
_THIS_IP_);
|
||||
} else {
|
||||
i->trans->lock_must_abort = true;
|
||||
wake_up_process(i->trans->locking_wait.task);
|
||||
@ -179,13 +193,14 @@ static int btree_trans_abort_preference(struct btree_trans *trans)
|
||||
return 3;
|
||||
}
|
||||
|
||||
static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
|
||||
static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle,
|
||||
struct trans_waiting_for_lock *from)
|
||||
{
|
||||
struct trans_waiting_for_lock *i, *abort = NULL;
|
||||
unsigned best = 0, pref;
|
||||
int ret;
|
||||
|
||||
if (lock_graph_remove_non_waiters(g))
|
||||
if (lock_graph_remove_non_waiters(g, from))
|
||||
return 0;
|
||||
|
||||
/* Only checking, for debugfs: */
|
||||
@ -195,7 +210,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = g->g; i < g->g + g->nr; i++) {
|
||||
for (i = from; i < g->g + g->nr; i++) {
|
||||
pref = btree_trans_abort_preference(i->trans);
|
||||
if (pref > best) {
|
||||
abort = i;
|
||||
@ -229,8 +244,9 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
|
||||
ret = abort_lock(g, abort);
|
||||
out:
|
||||
if (ret)
|
||||
while (g->nr)
|
||||
lock_graph_up(g);
|
||||
lock_graph_pop_all(g);
|
||||
else
|
||||
lock_graph_pop_from(g, abort);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -243,7 +259,7 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
|
||||
for (i = g->g; i < g->g + g->nr; i++)
|
||||
if (i->trans == trans) {
|
||||
closure_put(&trans->ref);
|
||||
return break_cycle(g, cycle);
|
||||
return break_cycle(g, cycle, i);
|
||||
}
|
||||
|
||||
if (g->nr == ARRAY_SIZE(g->g)) {
|
||||
@ -252,8 +268,7 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
|
||||
if (orig_trans->lock_may_not_fail)
|
||||
return 0;
|
||||
|
||||
while (g->nr)
|
||||
lock_graph_up(g);
|
||||
lock_graph_pop_all(g);
|
||||
|
||||
if (cycle)
|
||||
return 0;
|
||||
@ -281,7 +296,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
|
||||
|
||||
g.nr = 0;
|
||||
|
||||
if (trans->lock_must_abort) {
|
||||
if (trans->lock_must_abort && !trans->lock_may_not_fail) {
|
||||
if (cycle)
|
||||
return -1;
|
||||
|
||||
@ -336,7 +351,7 @@ next:
|
||||
* structures - which means it can't be blocked
|
||||
* waiting on a lock:
|
||||
*/
|
||||
if (!lock_graph_remove_non_waiters(&g)) {
|
||||
if (!lock_graph_remove_non_waiters(&g, g.g)) {
|
||||
/*
|
||||
* If lock_graph_remove_non_waiters()
|
||||
* didn't do anything, it must be
|
||||
@ -512,7 +527,6 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
struct btree *b = path->l[level].b;
|
||||
struct six_lock_count count = bch2_btree_node_lock_counts(trans, path, &b->c, level);
|
||||
|
||||
if (!is_btree_node(path, level))
|
||||
return false;
|
||||
@ -536,24 +550,11 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans,
|
||||
if (race_fault())
|
||||
return false;
|
||||
|
||||
if (btree_node_locked(path, level)) {
|
||||
bool ret;
|
||||
|
||||
six_lock_readers_add(&b->c.lock, -count.n[SIX_LOCK_read]);
|
||||
ret = six_lock_tryupgrade(&b->c.lock);
|
||||
six_lock_readers_add(&b->c.lock, count.n[SIX_LOCK_read]);
|
||||
|
||||
if (ret)
|
||||
if (btree_node_locked(path, level)
|
||||
? six_lock_tryupgrade(&b->c.lock)
|
||||
: six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
|
||||
goto success;
|
||||
} else {
|
||||
if (six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
|
||||
goto success;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do we already have an intent lock via another path? If so, just bump
|
||||
* lock count:
|
||||
*/
|
||||
if (btree_node_lock_seq_matches(path, b, level) &&
|
||||
btree_node_lock_increment(trans, &b->c, level, BTREE_NODE_INTENT_LOCKED)) {
|
||||
btree_node_unlock(trans, path, level);
|
||||
@ -782,7 +783,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
|
||||
return bch2_trans_relock_fail(trans, path, &f, trace);
|
||||
}
|
||||
|
||||
trans_set_locked(trans);
|
||||
trans_set_locked(trans, true);
|
||||
out:
|
||||
bch2_trans_verify_locks(trans);
|
||||
return 0;
|
||||
@ -818,6 +819,17 @@ void bch2_trans_unlock_long(struct btree_trans *trans)
|
||||
bch2_trans_srcu_unlock(trans);
|
||||
}
|
||||
|
||||
void bch2_trans_unlock_write(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
unsigned i;
|
||||
|
||||
trans_for_each_path(trans, path, i)
|
||||
for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++)
|
||||
if (btree_node_write_locked(path, l))
|
||||
bch2_btree_node_unlock_write(trans, path, path->l[l].b);
|
||||
}
|
||||
|
||||
int __bch2_trans_mutex_lock(struct btree_trans *trans,
|
||||
struct mutex *lock)
|
||||
{
|
||||
@ -856,6 +868,9 @@ void bch2_btree_path_verify_locks(struct btree_path *path)
|
||||
(want == BTREE_NODE_UNLOCKED ||
|
||||
have != BTREE_NODE_WRITE_LOCKED) &&
|
||||
want != have);
|
||||
|
||||
BUG_ON(btree_node_locked(path, l) &&
|
||||
path->l[l].lock_seq != six_lock_seq(&path->l[l].b->c.lock));
|
||||
}
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user