mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-04-01 00:00:03 +03:00
Compare commits
99 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
33483738c6 | ||
|
ea5bdadabd | ||
|
66c943be6c | ||
|
3d968816a4 | ||
|
76b3ec9d66 | ||
|
3e2d5b2b9a | ||
|
7c47145f6c | ||
|
396545c2ea | ||
|
3bfe433d22 | ||
|
19d3a1778c | ||
|
e54f288992 | ||
|
7c66e9fac5 | ||
|
2160e9a7bd | ||
|
941dfd7a29 | ||
|
18b4914587 | ||
|
8af5c93e48 | ||
|
6657ce2de3 | ||
|
5504533986 | ||
|
a55c655158 | ||
|
b8435c5693 | ||
|
80d0a21ed3 | ||
|
a1080f2f6a | ||
|
3bf9a19571 | ||
|
00dd889d33 | ||
|
1a8e684ae0 | ||
|
3d972489a8 | ||
|
197437be12 | ||
|
0a23a5ab1a | ||
|
017c3cc66e | ||
|
8b7810403b | ||
|
ce0c2241a4 | ||
|
846f3398a2 | ||
|
e6aecdd3b8 | ||
|
62ea232b09 | ||
|
64ce740ac6 | ||
|
c0836924b1 | ||
|
f42ee45c6e | ||
|
6cbadc946d | ||
|
86cbeaf1c2 | ||
|
dd1a882d17 | ||
|
3e15e96cb9 | ||
|
d7f02b4ed5 | ||
|
1606364799 | ||
|
553e605886 | ||
|
55538d928b | ||
|
7d30f895c6 | ||
|
8cc5bdede9 | ||
|
67c9b378c7 | ||
|
55afc8ad37 | ||
|
3fcc955d00 | ||
|
c4e25c6c3c | ||
|
8361ae1a42 | ||
|
895fc9a101 | ||
|
56b2449ded | ||
|
117e5fd7e9 | ||
|
35a13784a2 | ||
|
16e23eaec1 | ||
|
9cc5c9a79b | ||
|
6f1429b4eb | ||
|
7210d5c911 | ||
|
1055935ffe | ||
|
634c812a1e | ||
|
601deb6e5d | ||
|
ef25411bdc | ||
|
8b31dfb350 | ||
|
2aeeac7785 | ||
|
83cc57700f | ||
|
626b1945eb | ||
|
077677e61c | ||
|
a17e2ca95c | ||
|
ac124c8885 | ||
|
c560ff06f4 | ||
|
51ebefdd6e | ||
|
77d72319a4 | ||
|
c2af793c8c | ||
|
de51418b60 | ||
|
6829fb2010 | ||
|
d4932bf0e3 | ||
|
9beb9fabb2 | ||
|
511a714ed3 | ||
|
b7831af74d | ||
|
64767d6443 | ||
|
8d5f44d3b1 | ||
|
5350fd2b7b | ||
|
cb8dc1b2ba | ||
|
9ed0f3fd72 | ||
|
2541755981 | ||
|
553d6f107a | ||
|
4f9293b045 | ||
|
282331defa | ||
|
b1e6d1ced2 | ||
|
3666da87f2 | ||
|
6153c37e38 | ||
|
4e25d26a81 | ||
|
1dba682527 | ||
|
571ca8b8ce | ||
|
01413354a5 | ||
|
45468a9a6f | ||
|
ad41c2fb76 |
.bcachefs_revision
.github/workflows
Cargo.lockCargo.tomlINSTALL.mdMakefileMakefile.compilerarch/etc
bcachefs.8bch_bindgen
c_src
bcachefs.ccmd_data.ccmd_device.ccmd_format.ccmd_fs.ccmd_fsck.ccmd_fusemount.ccmd_key.ccmd_kill_btree_node.ccmd_list_journal.ccmd_migrate.ccmd_option.ccmd_top.ccmds.hcrypto.ccrypto.hlibbcachefs.clibbcachefs.hposix_to_bcachefs.cposix_to_bcachefs.htools-util.ctools-util.h
flake.lockflake.nixinclude
crypto
linux
libbcachefs
acl.calloc_background.calloc_background.halloc_background_format.halloc_foreground.calloc_foreground.halloc_types.hbackpointers.cbackpointers.hbbpos.hbcachefs.hbcachefs_format.hbcachefs_ioctl.hbkey.cbkey.hbkey_methods.cbkey_methods.hbkey_types.hbset.cbtree_cache.cbtree_cache.hbtree_gc.cbtree_gc.hbtree_io.cbtree_io.hbtree_iter.cbtree_iter.hbtree_journal_iter.cbtree_journal_iter.hbtree_journal_iter_types.hbtree_key_cache.cbtree_locking.cbtree_locking.hbtree_node_scan.cbtree_node_scan_types.hbtree_trans_commit.cbtree_types.hbtree_update.c
@ -1 +1 @@
|
||||
3b80552e70573764bbf38b89c58749aef9dd8753
|
||||
7fdc3fa3cb5fb561f5945b4de418d48d1a726a8d
|
||||
|
41
.github/workflows/nix-flake.yml
vendored
41
.github/workflows/nix-flake.yml
vendored
@ -1,22 +1,31 @@
|
||||
name: "Nix-Tests"
|
||||
name: Nix Flake actions
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
|
||||
jobs:
|
||||
nix-flake-check:
|
||||
nix-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: cachix/install-nix-action@v27
|
||||
with:
|
||||
extra_nix_config: |
|
||||
experimental-features = nix-command flakes
|
||||
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
|
||||
- uses: cachix/cachix-action@v15
|
||||
with:
|
||||
name: bcachefs-tools
|
||||
# If you chose API tokens for write access OR if you have a private cache
|
||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
||||
- run: nix flake show
|
||||
- run: nix flake check --print-build-logs
|
||||
- run: nix build --print-build-logs
|
||||
- uses: actions/checkout@v4
|
||||
- uses: cachix/install-nix-action@v30
|
||||
- id: set-matrix
|
||||
name: Generate Nix Matrix
|
||||
run: |
|
||||
set -Eeu
|
||||
matrix="$(nix eval --json '.#githubActions.matrix')"
|
||||
echo "matrix=$matrix" >> "$GITHUB_OUTPUT"
|
||||
|
||||
nix-build:
|
||||
name: ${{ matrix.name }} (${{ matrix.system }})
|
||||
needs: nix-matrix
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix: ${{fromJSON(needs.nix-matrix.outputs.matrix)}}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: cachix/install-nix-action@v30
|
||||
- run: nix build -L '.#${{ matrix.attr }}'
|
||||
|
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -68,7 +68,7 @@ checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
|
||||
|
||||
[[package]]
|
||||
name = "bcachefs-tools"
|
||||
version = "1.12.0"
|
||||
version = "1.25.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bch_bindgen",
|
||||
|
@ -1,9 +1,13 @@
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
default-members = [".", "bch_bindgen"]
|
||||
|
||||
[package]
|
||||
name = "bcachefs-tools"
|
||||
version = "1.12.0"
|
||||
version = "1.25.0"
|
||||
authors = ["Yuxuan Shui <yshuiv7@gmail.com>", "Kayla Firestack <dev@kaylafire.me>", "Kent Overstreet <kent.overstreet@linux.dev>" ]
|
||||
edition = "2021"
|
||||
rust-version = "1.77"
|
||||
rust-version = "1.77.0"
|
||||
|
||||
[[bin]]
|
||||
name = "bcachefs"
|
||||
|
18
INSTALL.md
18
INSTALL.md
@ -39,12 +39,19 @@ Starting from Debian Trixie and Ubuntu 23.10, you will additionally need:
|
||||
apt install -y systemd-dev
|
||||
```
|
||||
|
||||
Fedora: install the "Development tools" group along with:
|
||||
Fedora: install build dependencies either with `dnf builddep bcachefs-tools` or with:
|
||||
```shell
|
||||
dnf install -y libaio-devel libsodium-devel \
|
||||
dnf install -y @c-development libaio-devel libsodium-devel \
|
||||
libblkid-devel libzstd-devel zlib-devel userspace-rcu-devel \
|
||||
lz4-devel libuuid-devel valgrind-devel keyutils-libs-devel \
|
||||
findutils udev systemd-devel llvm-devel
|
||||
findutils systemd-devel clang-devel llvm-devel rust cargo
|
||||
```
|
||||
|
||||
openSUSE: install build dependencies with:
|
||||
```shell
|
||||
zypper in -y libaio-devel libsodium-devel libblkid-devel liburcu-devel \
|
||||
libzstd-devel zlib-devel liblz4-devel libuuid-devel valgrind-devel \
|
||||
keyutils-devel findutils udev systemd-devel llvm-devel
|
||||
```
|
||||
|
||||
Arch: install bcachefs-tools-git from the AUR.
|
||||
@ -80,6 +87,11 @@ Arch:
|
||||
pacman -S fuse3
|
||||
```
|
||||
|
||||
openSUSE:
|
||||
```shell
|
||||
zypper in -y fuse3-devel
|
||||
```
|
||||
|
||||
Then, make using the `BCACHEFS_FUSE` environment variable (make clean first if
|
||||
previously built without fuse support):
|
||||
|
||||
|
14
Makefile
14
Makefile
@ -1,4 +1,4 @@
|
||||
VERSION=1.12.0
|
||||
VERSION=1.25.0
|
||||
|
||||
PREFIX?=/usr/local
|
||||
LIBEXECDIR?=$(PREFIX)/libexec
|
||||
@ -22,6 +22,13 @@ else
|
||||
CARGO_CLEAN_ARGS = --quiet
|
||||
endif
|
||||
|
||||
# when cross compiling, cargo places the built binary in a different location
|
||||
ifdef CARGO_BUILD_TARGET
|
||||
BUILT_BIN = target/$(CARGO_BUILD_TARGET)/release/bcachefs
|
||||
else
|
||||
BUILT_BIN = target/release/bcachefs
|
||||
endif
|
||||
|
||||
# Prevent recursive expansions of $(CFLAGS) to avoid repeatedly performing
|
||||
# compile tests
|
||||
CFLAGS:=$(CFLAGS)
|
||||
@ -73,12 +80,13 @@ CFLAGS+=$(call cc-disable-warning, zero-length-array)
|
||||
CFLAGS+=$(call cc-disable-warning, shift-overflow)
|
||||
CFLAGS+=$(call cc-disable-warning, enum-conversion)
|
||||
CFLAGS+=$(call cc-disable-warning, gnu-variable-sized-type-not-at-end)
|
||||
export RUSTFLAGS=-C default-linker-libraries
|
||||
|
||||
PKGCONFIG_LIBS="blkid uuid liburcu libsodium zlib liblz4 libzstd libudev libkeyutils"
|
||||
ifdef BCACHEFS_FUSE
|
||||
PKGCONFIG_LIBS+="fuse3 >= 3.7"
|
||||
CFLAGS+=-DBCACHEFS_FUSE
|
||||
export RUSTFLAGS=--cfg fuse
|
||||
RUSTFLAGS+=--cfg feature="fuse"
|
||||
endif
|
||||
|
||||
PKGCONFIG_CFLAGS:=$(shell $(PKG_CONFIG) --cflags $(PKGCONFIG_LIBS))
|
||||
@ -194,7 +202,7 @@ cmd_version.o : .version
|
||||
install: INITRAMFS_HOOK=$(INITRAMFS_DIR)/hooks/bcachefs
|
||||
install: INITRAMFS_SCRIPT=$(INITRAMFS_DIR)/scripts/local-premount/bcachefs
|
||||
install: bcachefs $(optional_install)
|
||||
$(INSTALL) -m0755 -D target/release/bcachefs -t $(DESTDIR)$(ROOT_SBINDIR)
|
||||
$(INSTALL) -m0755 -D $(BUILT_BIN) -t $(DESTDIR)$(ROOT_SBINDIR)
|
||||
$(INSTALL) -m0644 -D bcachefs.8 -t $(DESTDIR)$(PREFIX)/share/man/man8/
|
||||
$(INSTALL) -m0755 -D initramfs/script $(DESTDIR)$(INITRAMFS_SCRIPT)
|
||||
$(INSTALL) -m0755 -D initramfs/hook $(DESTDIR)$(INITRAMFS_HOOK)
|
||||
|
@ -13,7 +13,7 @@ cc-cross-prefix = $(firstword $(foreach c, $(1), \
|
||||
$(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c))))
|
||||
|
||||
# output directory for tests below
|
||||
TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$
|
||||
TMPOUT = .tmp_$$$$
|
||||
|
||||
# try-run
|
||||
# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
|
||||
@ -53,13 +53,11 @@ cc-option = $(call __cc-option, $(CC),\
|
||||
|
||||
# cc-option-yn
|
||||
# Usage: flag := $(call cc-option-yn,-march=winchip-c6)
|
||||
cc-option-yn = $(call try-run,\
|
||||
$(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
|
||||
cc-option-yn = $(if $(call cc-option,$1),y,n)
|
||||
|
||||
# cc-disable-warning
|
||||
# Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
|
||||
cc-disable-warning = $(call try-run,\
|
||||
$(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
|
||||
cc-disable-warning = $(if $(call cc-option,-W$(strip $1)),-Wno-$(strip $1))
|
||||
|
||||
# gcc-min-version
|
||||
# Usage: cflags-$(call gcc-min-version, 70100) += -foo
|
||||
@ -75,8 +73,11 @@ ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
|
||||
|
||||
# __rustc-option
|
||||
# Usage: MY_RUSTFLAGS += $(call __rustc-option,$(RUSTC),$(MY_RUSTFLAGS),-Cinstrument-coverage,-Zinstrument-coverage)
|
||||
# TODO: remove RUSTC_BOOTSTRAP=1 when we raise the minimum GNU Make version to 4.4
|
||||
__rustc-option = $(call try-run,\
|
||||
$(1) $(2) $(3) --crate-type=rlib /dev/null --out-dir=$$TMPOUT -o "$$TMP",$(3),$(4))
|
||||
echo '#![allow(missing_docs)]#![feature(no_core)]#![no_core]' | RUSTC_BOOTSTRAP=1\
|
||||
$(1) --sysroot=/dev/null $(filter-out --sysroot=/dev/null,$(2)) $(3)\
|
||||
--crate-type=rlib --out-dir=$(TMPOUT) --emit=obj=- - >/dev/null,$(3),$(4))
|
||||
|
||||
# rustc-option
|
||||
# Usage: rustflags-y += $(call rustc-option,-Cinstrument-coverage,-Zinstrument-coverage)
|
||||
@ -85,5 +86,4 @@ rustc-option = $(call __rustc-option, $(RUSTC),\
|
||||
|
||||
# rustc-option-yn
|
||||
# Usage: flag := $(call rustc-option-yn,-Cinstrument-coverage)
|
||||
rustc-option-yn = $(call try-run,\
|
||||
$(RUSTC) $(KBUILD_RUSTFLAGS) $(1) --crate-type=rlib /dev/null --out-dir=$$TMPOUT -o "$$TMP",y,n)
|
||||
rustc-option-yn = $(if $(call rustc-option,$1),y,n)
|
||||
|
15
arch/etc/initcpio/hooks/bcachefs
Normal file
15
arch/etc/initcpio/hooks/bcachefs
Normal file
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/ash
|
||||
|
||||
run_hook() {
|
||||
local rootdev
|
||||
if rootdev="$(resolve_device "$root")" && bcachefs unlock -c "$rootdev" >/dev/null 2>&1
|
||||
then
|
||||
echo "Unlocking $rootdev:"
|
||||
while true
|
||||
do
|
||||
bcachefs unlock "$rootdev" && break
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
# vim: set ft=sh ts=4 sw=4 et:
|
16
arch/etc/initcpio/install/bcachefs
Normal file
16
arch/etc/initcpio/install/bcachefs
Normal file
@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
build() {
|
||||
add_module bcachefs
|
||||
add_binary bcachefs
|
||||
|
||||
add_runscript
|
||||
}
|
||||
|
||||
help() {
|
||||
cat <<HELPEOF
|
||||
This hook is for getting the bcachefs unlock prompt at boot
|
||||
HELPEOF
|
||||
}
|
||||
|
||||
# vim set ft=sh ts=4 sw=4 et:
|
66
arch/etc/mkinitcpio.conf
Normal file
66
arch/etc/mkinitcpio.conf
Normal file
@ -0,0 +1,66 @@
|
||||
# vim:set ft=sh
|
||||
# MODULES
|
||||
# The following modules are loaded before any boot hooks are
|
||||
# run. Advanced users may wish to specify all system modules
|
||||
# in this array. For instance:
|
||||
# MODULES=(piix ide_disk reiserfs)
|
||||
MODULES=(bcachefs)
|
||||
|
||||
# BINARIES
|
||||
# This setting includes any additional binaries a given user may
|
||||
# wish into the CPIO image. This is run last, so it may be used to
|
||||
# override the actual binaries included by a given hook
|
||||
# BINARIES are dependency parsed, so you may safely ignore libraries
|
||||
BINARIES=(bcachefs)
|
||||
|
||||
# FILES
|
||||
# This setting is similar to BINARIES above, however, files are added
|
||||
# as-is and are not parsed in any way. This is useful for config files.
|
||||
FILES=()
|
||||
|
||||
# HOOKS
|
||||
# This is the most important setting in this file. The HOOKS control the
|
||||
# modules and scripts added to the image, and what happens at boot time.
|
||||
# Order is important, and it is recommended that you do not change the
|
||||
# order in which HOOKS are added. Run 'mkinitcpio -H <hook name>' for
|
||||
# help on a given hook.
|
||||
# 'base' is _required_ unless you know precisely what you are doing.
|
||||
# 'udev' is _required_ in order to automatically load modules
|
||||
# 'filesystems' is _required_ unless you specify your fs modules in MODULES
|
||||
# Examples:
|
||||
## This setup specifies all modules in the MODULES setting above.
|
||||
## No raid, lvm2, or encrypted root is needed.
|
||||
# HOOKS="base"
|
||||
#
|
||||
## This setup will autodetect all modules for your system and should
|
||||
## work as a sane default
|
||||
# HOOKS="base udev autodetect block filesystems"
|
||||
#
|
||||
## This setup will generate a 'full' image which supports most systems.
|
||||
## No autodetection is done.
|
||||
# HOOKS="base udev block filesystems"
|
||||
#
|
||||
## This setup assembles a pata mdadm array with an encrypted root FS.
|
||||
## Note: See 'mkinitcpio -H mdadm' for more information on raid devices.
|
||||
# HOOKS="base udev block mdadm encrypt filesystems"
|
||||
#
|
||||
## This setup loads an lvm2 volume group on a usb device.
|
||||
# HOOKS="base udev block lvm2 filesystems"
|
||||
#
|
||||
## NOTE: If you have /usr on a separate partition, you MUST include the
|
||||
# usr, fsck and shutdown hooks.
|
||||
HOOKS=(base udev autodetect modconf block filesystems bcachefs keyboard fsck)
|
||||
|
||||
# COMPRESSION
|
||||
# Use this to compress the initramfs image. By default, gzip compression
|
||||
# is used. Use 'cat' to create an uncompressed image.
|
||||
#COMPRESSION="gzip"
|
||||
#COMPRESSION="bzip2"
|
||||
#COMPRESSION="lzma"
|
||||
#COMPRESSION="xz"
|
||||
#COMPRESSION="lzop"
|
||||
#COMPRESSION="lz4"
|
||||
|
||||
# COMPRESSION_OPTIONS
|
||||
# Additional options for the compressor
|
||||
#COMPRESSION_OPTIONS=""
|
@ -91,7 +91,7 @@ Add default superblock, after bcachefs migrate
|
||||
.El
|
||||
.Ss Commands for operating on files in a bcachefs filesystem
|
||||
.Bl -tag -width 18n -compact
|
||||
.It Ic setattr
|
||||
.It Ic set-file-option
|
||||
Set various per file attributes
|
||||
.El
|
||||
.Ss Commands for debugging
|
||||
@ -574,7 +574,7 @@ Offset of existing superblock
|
||||
.El
|
||||
.Sh Commands for operating on files in a bcachefs filesystem
|
||||
.Bl -tag -width Ds
|
||||
.It Nm Ic setattr Oo Ar options Oc Ar devices\ ...
|
||||
.It Nm Ic set-file-option Oo Ar options Oc Ar devices\ ...
|
||||
.Bl -tag -width Ds
|
||||
.It Fl -data_replicas Ns = Ns Ar number
|
||||
Number of data replicas
|
||||
|
@ -63,7 +63,6 @@ fn main() {
|
||||
.allowlist_function("printbuf.*")
|
||||
.blocklist_type("rhash_lock_head")
|
||||
.blocklist_type("srcu_struct")
|
||||
.blocklist_type("bch_ioctl_data.*")
|
||||
.allowlist_var("BCH_.*")
|
||||
.allowlist_var("KEY_SPEC_.*")
|
||||
.allowlist_var("Fix753_.*")
|
||||
|
@ -50,6 +50,7 @@ pub enum BkeyValC<'a> {
|
||||
logged_op_truncate(&'a c::bch_logged_op_truncate),
|
||||
logged_op_finsert(&'a c::bch_logged_op_finsert),
|
||||
accounting(&'a c::bch_accounting),
|
||||
inode_alloc_cursor(&'a c::bch_inode_alloc_cursor),
|
||||
}
|
||||
|
||||
impl<'a, 'b> BkeySC<'a> {
|
||||
@ -64,7 +65,7 @@ impl<'a, 'b> BkeySC<'a> {
|
||||
BkeySCToText { k: self, fs }
|
||||
}
|
||||
|
||||
pub fn v(&'a self) -> BkeyValC {
|
||||
pub fn v(&'a self) -> BkeyValC<'a> {
|
||||
unsafe {
|
||||
let ty: c::bch_bkey_type = transmute(self.k.type_ as u32);
|
||||
|
||||
@ -106,6 +107,7 @@ impl<'a, 'b> BkeySC<'a> {
|
||||
KEY_TYPE_logged_op_truncate => logged_op_truncate(transmute(self.v)),
|
||||
KEY_TYPE_logged_op_finsert => logged_op_finsert(transmute(self.v)),
|
||||
KEY_TYPE_accounting => accounting(transmute(self.v)),
|
||||
KEY_TYPE_inode_alloc_cursor => inode_alloc_cursor(transmute(self.v)),
|
||||
KEY_TYPE_MAX => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ pub struct BtreeTrans<'f> {
|
||||
}
|
||||
|
||||
impl<'f> BtreeTrans<'f> {
|
||||
pub fn new(fs: &'f Fs) -> BtreeTrans {
|
||||
pub fn new(fs: &'f Fs) -> BtreeTrans<'f> {
|
||||
unsafe {
|
||||
BtreeTrans {
|
||||
raw: &mut *c::__bch2_trans_get(fs.raw, 0),
|
||||
@ -81,9 +81,9 @@ impl<'t> BtreeIter<'t> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn peek_upto<'i>(&'i mut self, end: c::bpos) -> Result<Option<BkeySC>, bch_errcode> {
|
||||
pub fn peek_max<'i>(&'i mut self, end: c::bpos) -> Result<Option<BkeySC<'i>>, bch_errcode> {
|
||||
unsafe {
|
||||
let k = c::bch2_btree_iter_peek_upto(&mut self.raw, end);
|
||||
let k = c::bch2_btree_iter_peek_max(&mut self.raw, end);
|
||||
errptr_to_result_c(k.k).map(|_| {
|
||||
if !k.k.is_null() {
|
||||
Some(BkeySC {
|
||||
@ -99,7 +99,7 @@ impl<'t> BtreeIter<'t> {
|
||||
}
|
||||
|
||||
pub fn peek(&mut self) -> Result<Option<BkeySC>, bch_errcode> {
|
||||
self.peek_upto(SPOS_MAX)
|
||||
self.peek_max(SPOS_MAX)
|
||||
}
|
||||
|
||||
pub fn peek_and_restart(&mut self) -> Result<Option<BkeySC>, bch_errcode> {
|
||||
@ -146,7 +146,7 @@ impl<'t> BtreeNodeIter<'t> {
|
||||
locks_want: u32,
|
||||
depth: u32,
|
||||
flags: BtreeIterFlags,
|
||||
) -> BtreeNodeIter {
|
||||
) -> BtreeNodeIter<'t> {
|
||||
unsafe {
|
||||
let mut iter: MaybeUninit<c::btree_iter> = MaybeUninit::uninit();
|
||||
c::bch2_trans_node_iter_init(
|
||||
|
@ -1,3 +1,7 @@
|
||||
use crate::c;
|
||||
use crate::fs::Fs;
|
||||
use std::ffi::{CString, c_char};
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! opt_set {
|
||||
($opts:ident, $n:ident, $v:expr) => {
|
||||
@ -33,3 +37,29 @@ macro_rules! opt_get {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn parse_mount_opts(fs: Option<&mut Fs>, optstr: Option<&str>, ignore_unknown: bool)
|
||||
-> Result<c::bch_opts, c::bch_errcode> {
|
||||
let mut opts: c::bch_opts = Default::default();
|
||||
|
||||
if let Some(optstr) = optstr {
|
||||
let optstr = CString::new(optstr).unwrap();
|
||||
let optstr_ptr = optstr.as_ptr();
|
||||
|
||||
let ret = unsafe {
|
||||
c::bch2_parse_mount_opts(fs.map_or(std::ptr::null_mut(), |f| f.raw),
|
||||
&mut opts as *mut c::bch_opts,
|
||||
std::ptr::null_mut(),
|
||||
optstr_ptr as *mut c_char,
|
||||
ignore_unknown)
|
||||
};
|
||||
|
||||
drop(optstr);
|
||||
|
||||
if ret != 0 {
|
||||
let err: c::bch_errcode = unsafe { std::mem::transmute(-ret) };
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
Ok(opts)
|
||||
}
|
||||
|
@ -33,6 +33,7 @@ void bcachefs_usage(void)
|
||||
"Superblock commands:\n"
|
||||
" format Format a new filesystem\n"
|
||||
" show-super Dump superblock information to stdout\n"
|
||||
" recover-super Attempt to recover overwritten superblock from backups\n"
|
||||
" set-fs-option Set a filesystem option\n"
|
||||
" reset-counters Reset all counters on an unmounted device\n"
|
||||
"\n"
|
||||
@ -52,6 +53,7 @@ void bcachefs_usage(void)
|
||||
#endif
|
||||
"Commands for managing a running filesystem:\n"
|
||||
" fs usage Show disk usage\n"
|
||||
" fs top Show runtime performance information\n"
|
||||
"\n"
|
||||
"Commands for managing devices within a running filesystem:\n"
|
||||
" device add Add a new device to an existing filesystem\n"
|
||||
@ -70,6 +72,7 @@ void bcachefs_usage(void)
|
||||
"\n"
|
||||
"Commands for managing filesystem data:\n"
|
||||
" data rereplicate Rereplicate degraded data\n"
|
||||
" data scrub Verify checksums and correct errors, if possible\n"
|
||||
" data job Kick off low level data jobs\n"
|
||||
"\n"
|
||||
"Encryption:\n"
|
||||
@ -90,9 +93,11 @@ void bcachefs_usage(void)
|
||||
" list List filesystem metadata in textual form\n"
|
||||
" list_journal List contents of journal\n"
|
||||
"\n"
|
||||
#ifdef BCACHEFS_FUSE
|
||||
"FUSE:\n"
|
||||
" fusemount Mount a filesystem via FUSE\n"
|
||||
"\n"
|
||||
#endif
|
||||
"Miscellaneous:\n"
|
||||
" completions Generate shell completions\n"
|
||||
" version Display the version of the invoked bcachefs tool\n");
|
||||
@ -113,14 +118,15 @@ int fs_cmds(int argc, char *argv[])
|
||||
{
|
||||
char *cmd = pop_cmd(&argc, argv);
|
||||
|
||||
if (argc < 1) {
|
||||
bcachefs_usage();
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (argc < 1)
|
||||
return fs_usage();
|
||||
if (!strcmp(cmd, "usage"))
|
||||
return cmd_fs_usage(argc, argv);
|
||||
if (!strcmp(cmd, "top"))
|
||||
return cmd_fs_top(argc, argv);
|
||||
|
||||
return 0;
|
||||
fs_usage();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int device_cmds(int argc, char *argv[])
|
||||
@ -146,7 +152,8 @@ int device_cmds(int argc, char *argv[])
|
||||
if (!strcmp(cmd, "resize-journal"))
|
||||
return cmd_device_resize_journal(argc, argv);
|
||||
|
||||
return 0;
|
||||
device_usage();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int data_cmds(int argc, char *argv[])
|
||||
@ -157,8 +164,11 @@ int data_cmds(int argc, char *argv[])
|
||||
return data_usage();
|
||||
if (!strcmp(cmd, "rereplicate"))
|
||||
return cmd_data_rereplicate(argc, argv);
|
||||
if (!strcmp(cmd, "scrub"))
|
||||
return cmd_data_scrub(argc, argv);
|
||||
if (!strcmp(cmd, "job"))
|
||||
return cmd_data_job(argc, argv);
|
||||
|
||||
return 0;
|
||||
data_usage();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
203
c_src/cmd_data.c
203
c_src/cmd_data.c
@ -1,5 +1,5 @@
|
||||
|
||||
|
||||
#include <getopt.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
@ -64,6 +64,207 @@ int cmd_data_rereplicate(int argc, char *argv[])
|
||||
});
|
||||
}
|
||||
|
||||
static void data_scrub_usage(void)
|
||||
{
|
||||
puts("bcachefs data scrub\n"
|
||||
"Usage: bcachefs data scrub [filesystem|device]\n"
|
||||
"\n"
|
||||
"Check data for errors, fix from another replica if possible\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -m, --metadata check metadata only\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_data_scrub(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "metadata", no_argument, NULL, 'm' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ NULL }
|
||||
};
|
||||
struct bch_ioctl_data cmd = {
|
||||
.op = BCH_DATA_OP_scrub,
|
||||
.scrub.data_types = ~0,
|
||||
};
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "hm", longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'm':
|
||||
cmd.scrub.data_types = BIT(BCH_DATA_btree);
|
||||
break;
|
||||
case 'h':
|
||||
data_scrub_usage();
|
||||
break;
|
||||
}
|
||||
args_shift(optind);
|
||||
|
||||
char *path = arg_pop();
|
||||
if (!path)
|
||||
die("Please supply a filesystem");
|
||||
|
||||
if (argc)
|
||||
die("too many arguments");
|
||||
|
||||
printf("Starting scrub on");
|
||||
|
||||
struct bchfs_handle fs = bcache_fs_open(path);
|
||||
dev_names dev_names = bchu_fs_get_devices(fs);
|
||||
|
||||
struct scrub_device {
|
||||
const char *name;
|
||||
int progress_fd;
|
||||
u64 done, corrected, uncorrected, total;
|
||||
enum bch_ioctl_data_event_ret ret;
|
||||
};
|
||||
DARRAY(struct scrub_device) scrub_devs = {};
|
||||
|
||||
if (fs.dev_idx >= 0) {
|
||||
cmd.scrub.dev = fs.dev_idx;
|
||||
struct scrub_device d = {
|
||||
.name = dev_idx_to_name(&dev_names, fs.dev_idx)->dev,
|
||||
.progress_fd = xioctl(fs.ioctl_fd, BCH_IOCTL_DATA, &cmd),
|
||||
};
|
||||
darray_push(&scrub_devs, d);
|
||||
} else {
|
||||
/* Scrubbing every device */
|
||||
darray_for_each(dev_names, dev) {
|
||||
cmd.scrub.dev = dev->idx;
|
||||
struct scrub_device d = {
|
||||
.name = dev->dev,
|
||||
.progress_fd = xioctl(fs.ioctl_fd, BCH_IOCTL_DATA, &cmd),
|
||||
};
|
||||
darray_push(&scrub_devs, d);
|
||||
}
|
||||
}
|
||||
|
||||
printf(" %zu devices: ", scrub_devs.nr);
|
||||
darray_for_each(scrub_devs, dev)
|
||||
printf(" %s", dev->name);
|
||||
printf("\n");
|
||||
|
||||
struct timespec now, last;
|
||||
bool first = true;
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
printbuf_tabstop_push(&buf, 16);
|
||||
printbuf_tabstop_push(&buf, 12);
|
||||
printbuf_tabstop_push(&buf, 12);
|
||||
printbuf_tabstop_push(&buf, 12);
|
||||
printbuf_tabstop_push(&buf, 12);
|
||||
printbuf_tabstop_push(&buf, 6);
|
||||
|
||||
prt_printf(&buf, "device\t");
|
||||
prt_printf(&buf, "checked\r");
|
||||
prt_printf(&buf, "corrected\r");
|
||||
prt_printf(&buf, "uncorrected\r");
|
||||
prt_printf(&buf, "total\r");
|
||||
puts(buf.buf);
|
||||
|
||||
while (1) {
|
||||
bool done = true;
|
||||
|
||||
printbuf_reset_keep_tabstops(&buf);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
u64 ns_since_last = 0;
|
||||
if (!first)
|
||||
ns_since_last = (now.tv_sec - last.tv_sec) * NSEC_PER_SEC +
|
||||
now.tv_nsec - last.tv_nsec;
|
||||
|
||||
darray_for_each(scrub_devs, dev) {
|
||||
struct bch_ioctl_data_event e;
|
||||
|
||||
if (dev->progress_fd >= 0 &&
|
||||
read(dev->progress_fd, &e, sizeof(e)) != sizeof(e)) {
|
||||
close(dev->progress_fd);
|
||||
dev->progress_fd = -1;
|
||||
}
|
||||
|
||||
u64 rate = 0;
|
||||
|
||||
if (dev->progress_fd >= 0) {
|
||||
if (ns_since_last)
|
||||
rate = ((e.p.sectors_done - dev->done) << 9)
|
||||
* NSEC_PER_SEC
|
||||
/ ns_since_last;
|
||||
|
||||
dev->done = e.p.sectors_done;
|
||||
dev->corrected = e.p.sectors_error_corrected;
|
||||
dev->uncorrected= e.p.sectors_error_uncorrected;
|
||||
dev->total = e.p.sectors_total;
|
||||
}
|
||||
|
||||
if (dev->progress_fd >= 0 && e.ret) {
|
||||
close(dev->progress_fd);
|
||||
dev->progress_fd = -1;
|
||||
dev->ret = e.ret;
|
||||
}
|
||||
|
||||
if (dev->progress_fd >= 0)
|
||||
done = false;
|
||||
|
||||
prt_printf(&buf, "%s\t", dev->name ?: "(offline)");
|
||||
|
||||
prt_human_readable_u64(&buf, dev->done << 9);
|
||||
prt_tab_rjust(&buf);
|
||||
|
||||
prt_human_readable_u64(&buf, dev->corrected << 9);
|
||||
prt_tab_rjust(&buf);
|
||||
|
||||
prt_human_readable_u64(&buf, dev->uncorrected << 9);
|
||||
prt_tab_rjust(&buf);
|
||||
|
||||
prt_human_readable_u64(&buf, dev->total << 9);
|
||||
prt_tab_rjust(&buf);
|
||||
|
||||
prt_printf(&buf, "%llu%%",
|
||||
dev->total
|
||||
? dev->done * 100 / dev->total
|
||||
: 0);
|
||||
prt_tab_rjust(&buf);
|
||||
|
||||
prt_str(&buf, " ");
|
||||
|
||||
if (dev->progress_fd >= 0) {
|
||||
prt_human_readable_u64(&buf, rate);
|
||||
prt_str(&buf, "/sec");
|
||||
} else if (dev->ret == BCH_IOCTL_DATA_EVENT_RET_device_offline) {
|
||||
prt_str(&buf, "offline");
|
||||
} else {
|
||||
prt_str(&buf, "complete");
|
||||
}
|
||||
|
||||
if (dev != &darray_last(scrub_devs))
|
||||
prt_newline(&buf);
|
||||
}
|
||||
|
||||
fputs(buf.buf, stdout);
|
||||
fflush(stdout);
|
||||
|
||||
if (done)
|
||||
break;
|
||||
|
||||
last = now;
|
||||
first = false;
|
||||
sleep(1);
|
||||
|
||||
for (unsigned i = 0; i < scrub_devs.nr; i++) {
|
||||
if (i)
|
||||
printf("\033[1A");
|
||||
printf("\33[2K\r");
|
||||
}
|
||||
}
|
||||
|
||||
fputs("\n", stdout);
|
||||
printbuf_exit(&buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void data_job_usage(void)
|
||||
{
|
||||
puts("bcachefs data job\n"
|
||||
|
@ -47,11 +47,11 @@ static void device_add_usage(void)
|
||||
puts("bcachefs device add - add a device to an existing filesystem\n"
|
||||
"Usage: bcachefs device add [OPTION]... filesystem device\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -S, --fs_size=size Size of filesystem on device\n"
|
||||
" -B, --bucket=size Bucket size\n"
|
||||
" -D, --discard Enable discards\n"
|
||||
" -l, --label=label Disk label\n"
|
||||
"Options:\n");
|
||||
|
||||
bch2_opts_usage(OPT_FORMAT|OPT_DEVICE);
|
||||
|
||||
puts(" -l, --label=label Disk label\n"
|
||||
" -f, --force Use device even if it appears to already be formatted\n"
|
||||
" -h, --help Display this help and exit\n"
|
||||
"\n"
|
||||
@ -61,9 +61,6 @@ static void device_add_usage(void)
|
||||
int cmd_device_add(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "fs_size", required_argument, NULL, 'S' },
|
||||
{ "bucket", required_argument, NULL, 'B' },
|
||||
{ "discard", no_argument, NULL, 'D' },
|
||||
{ "label", required_argument, NULL, 'l' },
|
||||
{ "force", no_argument, NULL, 'f' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
@ -72,22 +69,31 @@ int cmd_device_add(int argc, char *argv[])
|
||||
struct format_opts format_opts = format_opts_default();
|
||||
struct dev_opts dev_opts = dev_opts_default();
|
||||
bool force = false;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "S:B:Dl:fh",
|
||||
longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'S':
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.size))
|
||||
die("invalid filesystem size");
|
||||
break;
|
||||
case 'B':
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.bucket_size))
|
||||
die("bad bucket_size %s", optarg);
|
||||
break;
|
||||
case 'D':
|
||||
dev_opts.discard = true;
|
||||
while (true) {
|
||||
const struct bch_option *opt =
|
||||
bch2_cmdline_opt_parse(argc, argv, OPT_FORMAT|OPT_DEVICE);
|
||||
if (opt) {
|
||||
unsigned id = opt - bch2_opt_table;
|
||||
u64 v;
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_opt_parse(NULL, opt, optarg, &v, &err);
|
||||
if (ret)
|
||||
die("invalid %s: %s", opt->attr.name, err.buf);
|
||||
|
||||
if (opt->flags & OPT_DEVICE)
|
||||
bch2_opt_set_by_id(&dev_opts.opts, id, v);
|
||||
else
|
||||
die("got bch_opt of wrong type %s", opt->attr.name);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
int optid = getopt_long(argc, argv, "S:B:Dl:fh", longopts, NULL);
|
||||
if (optid == -1)
|
||||
break;
|
||||
|
||||
switch (optid) {
|
||||
case 'l':
|
||||
dev_opts.label = strdup(optarg);
|
||||
break;
|
||||
@ -97,7 +103,11 @@ int cmd_device_add(int argc, char *argv[])
|
||||
case 'h':
|
||||
device_add_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
case '?':
|
||||
exit(EXIT_FAILURE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
args_shift(optind);
|
||||
|
||||
char *fs_path = arg_pop();
|
||||
@ -127,10 +137,11 @@ int cmd_device_add(int argc, char *argv[])
|
||||
opt_set(fs_opts, btree_node_size,
|
||||
read_file_u64(fs.sysfs_fd, "options/btree_node_size"));
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs,
|
||||
fs_opts,
|
||||
format_opts,
|
||||
&dev_opts, 1);
|
||||
dev_opts_list devs = {};
|
||||
darray_push(&devs, dev_opts);
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs);
|
||||
darray_exit(&devs);
|
||||
free(sb);
|
||||
bchu_disk_add(fs, dev_opts.path);
|
||||
return 0;
|
||||
|
@ -39,11 +39,7 @@ x('L', fs_label, required_argument) \
|
||||
x('U', uuid, required_argument) \
|
||||
x(0, fs_size, required_argument) \
|
||||
x(0, superblock_size, required_argument) \
|
||||
x(0, bucket_size, required_argument) \
|
||||
x('l', label, required_argument) \
|
||||
x(0, discard, no_argument) \
|
||||
x(0, data_allowed, required_argument) \
|
||||
x(0, durability, required_argument) \
|
||||
x(0, version, required_argument) \
|
||||
x(0, no_initialize, no_argument) \
|
||||
x(0, source, required_argument) \
|
||||
@ -52,17 +48,16 @@ x('q', quiet, no_argument) \
|
||||
x('v', verbose, no_argument) \
|
||||
x('h', help, no_argument)
|
||||
|
||||
static void usage(void)
|
||||
static void format_usage(void)
|
||||
{
|
||||
puts("bcachefs format - create a new bcachefs filesystem on one or more devices\n"
|
||||
"Usage: bcachefs format [OPTION]... <devices>\n"
|
||||
"\n"
|
||||
"Options:");
|
||||
|
||||
bch2_opts_usage(OPT_FORMAT);
|
||||
bch2_opts_usage(OPT_FORMAT|OPT_FS);
|
||||
|
||||
puts(
|
||||
" --replicas=# Sets both data and metadata replicas\n"
|
||||
puts(" --replicas=# Sets both data and metadata replicas\n"
|
||||
" --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
|
||||
" --no_passphrase Don't encrypt master encryption key\n"
|
||||
" -L, --fs_label=label\n"
|
||||
@ -72,9 +67,10 @@ static void usage(void)
|
||||
"\n"
|
||||
"Device specific options:");
|
||||
|
||||
bch2_opts_usage(OPT_DEVICE);
|
||||
bch2_opts_usage(OPT_FORMAT|OPT_DEVICE);
|
||||
|
||||
puts(" -l, --label=label Disk label\n"
|
||||
puts(" --fs_size=size Size of filesystem on device\n"
|
||||
" -l, --label=label Disk label\n"
|
||||
"\n"
|
||||
" -f, --force\n"
|
||||
" -q, --quiet Only print errors\n"
|
||||
@ -125,32 +121,60 @@ void build_fs(struct bch_fs *c, const char *src_path)
|
||||
if (!S_ISDIR(stat.st_mode))
|
||||
die("%s is not a directory", src_path);
|
||||
|
||||
copy_fs(c, src_fd, src_path, &s);
|
||||
copy_fs(c, src_fd, src_path, &s, 0);
|
||||
}
|
||||
|
||||
int cmd_format(int argc, char *argv[])
|
||||
{
|
||||
DARRAY(struct dev_opts) devices = { 0 };
|
||||
DARRAY(char *) device_paths = { 0 };
|
||||
dev_opts_list devices = {};
|
||||
darray_str device_paths = {};
|
||||
struct format_opts opts = format_opts_default();
|
||||
struct dev_opts dev_opts = dev_opts_default();
|
||||
bool force = false, no_passphrase = false, quiet = false, initialize = true, verbose = false;
|
||||
bool unconsumed_dev_option = false;
|
||||
unsigned v;
|
||||
int opt;
|
||||
|
||||
struct bch_opt_strs fs_opt_strs =
|
||||
bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
|
||||
struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
|
||||
struct bch_opt_strs fs_opt_strs = {};
|
||||
struct bch_opts fs_opts = bch2_opts_empty();
|
||||
|
||||
if (getenv("BCACHEFS_KERNEL_ONLY"))
|
||||
initialize = false;
|
||||
|
||||
while ((opt = getopt_long(argc, argv,
|
||||
"-L:U:g:fqhv",
|
||||
format_opts,
|
||||
NULL)) != -1)
|
||||
switch (opt) {
|
||||
while (true) {
|
||||
const struct bch_option *opt =
|
||||
bch2_cmdline_opt_parse(argc, argv, OPT_FORMAT|OPT_FS|OPT_DEVICE);
|
||||
if (opt) {
|
||||
unsigned id = opt - bch2_opt_table;
|
||||
u64 v;
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_opt_parse(NULL, opt, optarg, &v, &err);
|
||||
if (ret == -BCH_ERR_option_needs_open_fs) {
|
||||
fs_opt_strs.by_id[id] = strdup(optarg);
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
die("invalid option: %s", err.buf);
|
||||
|
||||
if (opt->flags & OPT_DEVICE) {
|
||||
bch2_opt_set_by_id(&dev_opts.opts, id, v);
|
||||
unconsumed_dev_option = true;
|
||||
} else if (opt->flags & OPT_FS) {
|
||||
bch2_opt_set_by_id(&fs_opts, id, v);
|
||||
} else {
|
||||
die("got bch_opt of wrong type %s", opt->attr.name);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
int optid = getopt_long(argc, argv,
|
||||
"-L:l:U:g:fqhv",
|
||||
format_opts,
|
||||
NULL);
|
||||
if (optid == -1)
|
||||
break;
|
||||
|
||||
switch (optid) {
|
||||
case O_replicas:
|
||||
if (kstrtouint(optarg, 10, &v) ||
|
||||
!v ||
|
||||
@ -183,7 +207,7 @@ int cmd_format(int argc, char *argv[])
|
||||
force = true;
|
||||
break;
|
||||
case O_fs_size:
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.size))
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.fs_size))
|
||||
die("invalid filesystem size");
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
@ -193,35 +217,13 @@ int cmd_format(int argc, char *argv[])
|
||||
|
||||
opts.superblock_size >>= 9;
|
||||
break;
|
||||
case O_bucket_size:
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.bucket_size))
|
||||
die("bad bucket_size %s", optarg);
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_label:
|
||||
case 'l':
|
||||
dev_opts.label = optarg;
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_discard:
|
||||
dev_opts.discard = true;
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_data_allowed:
|
||||
dev_opts.data_allowed =
|
||||
read_flag_list_or_die(optarg,
|
||||
__bch2_data_types, "data type");
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_durability:
|
||||
if (kstrtouint(optarg, 10, &dev_opts.durability) ||
|
||||
dev_opts.durability > BCH_REPLICAS_MAX)
|
||||
die("invalid durability");
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_version:
|
||||
if (kstrtouint(optarg, 10, &opts.version))
|
||||
die("invalid version");
|
||||
opts.version = version_parse(optarg);
|
||||
break;
|
||||
case O_no_initialize:
|
||||
initialize = false;
|
||||
@ -230,7 +232,7 @@ int cmd_format(int argc, char *argv[])
|
||||
darray_push(&device_paths, optarg);
|
||||
dev_opts.path = optarg;
|
||||
darray_push(&devices, dev_opts);
|
||||
dev_opts.size = 0;
|
||||
dev_opts.fs_size = 0;
|
||||
unconsumed_dev_option = false;
|
||||
break;
|
||||
case O_quiet:
|
||||
@ -239,15 +241,19 @@ int cmd_format(int argc, char *argv[])
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
case O_help:
|
||||
case 'h':
|
||||
usage();
|
||||
format_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
break;
|
||||
case '?':
|
||||
exit(EXIT_FAILURE);
|
||||
break;
|
||||
default:
|
||||
die("getopt ret %i %c", optid, optid);
|
||||
}
|
||||
}
|
||||
|
||||
if (unconsumed_dev_option)
|
||||
die("Options for devices apply to subsequent devices; got a device option with no device");
|
||||
@ -269,11 +275,7 @@ int cmd_format(int argc, char *argv[])
|
||||
die("Error opening %s: %s", dev_opts.path, strerror(-ret));
|
||||
}
|
||||
|
||||
struct bch_sb *sb =
|
||||
bch2_format(fs_opt_strs,
|
||||
fs_opts,
|
||||
opts,
|
||||
devices.data, devices.nr);
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, opts, devices);
|
||||
bch2_opt_strs_free(&fs_opt_strs);
|
||||
|
||||
if (!quiet) {
|
||||
@ -433,3 +435,204 @@ int cmd_show_super(int argc, char *argv[])
|
||||
printbuf_exit(&buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "libbcachefs/super-io.h"
|
||||
#include "libbcachefs/sb-members.h"
|
||||
|
||||
typedef DARRAY(struct bch_sb *) probed_sb_list;
|
||||
|
||||
static void probe_one_super(int dev_fd, unsigned sb_size, u64 offset,
|
||||
probed_sb_list *sbs, bool verbose)
|
||||
{
|
||||
darray_char sb_buf = {};
|
||||
darray_resize(&sb_buf, sb_size);
|
||||
|
||||
xpread(dev_fd, sb_buf.data, sb_buf.size, offset);
|
||||
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_sb_validate((void *) sb_buf.data, offset >> 9, 0, &err);
|
||||
printbuf_exit(&err);
|
||||
|
||||
if (!ret) {
|
||||
if (verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_human_readable_u64(&buf, offset);
|
||||
printf("found superblock at %s\n", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
darray_push(sbs, (void *) sb_buf.data);
|
||||
sb_buf.data = NULL;
|
||||
}
|
||||
|
||||
darray_exit(&sb_buf);
|
||||
}
|
||||
|
||||
static void probe_sb_range(int dev_fd, u64 start_offset, u64 end_offset,
|
||||
probed_sb_list *sbs, bool verbose)
|
||||
{
|
||||
start_offset &= ~((u64) 511);
|
||||
end_offset &= ~((u64) 511);
|
||||
|
||||
size_t buflen = end_offset - start_offset;
|
||||
void *buf = malloc(buflen);
|
||||
xpread(dev_fd, buf, buflen, start_offset);
|
||||
|
||||
for (u64 offset = 0; offset < buflen; offset += 512) {
|
||||
struct bch_sb *sb = buf + offset;
|
||||
|
||||
if (!uuid_equal(&sb->magic, &BCACHE_MAGIC) &&
|
||||
!uuid_equal(&sb->magic, &BCHFS_MAGIC))
|
||||
continue;
|
||||
|
||||
size_t bytes = vstruct_bytes(sb);
|
||||
if (offset + bytes > buflen) {
|
||||
fprintf(stderr, "found sb %llu size %zu that overran buffer\n",
|
||||
start_offset + offset, bytes);
|
||||
continue;
|
||||
}
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_sb_validate(sb, (start_offset + offset) >> 9, 0, &err);
|
||||
if (ret)
|
||||
fprintf(stderr, "found sb %llu that failed to validate: %s\n",
|
||||
start_offset + offset, err.buf);
|
||||
printbuf_exit(&err);
|
||||
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
if (verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_human_readable_u64(&buf, start_offset + offset);
|
||||
printf("found superblock at %s\n", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
void *sb_copy = malloc(bytes);
|
||||
memcpy(sb_copy, sb, bytes);
|
||||
darray_push(sbs, sb_copy);
|
||||
}
|
||||
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static u64 bch2_sb_last_mount_time(struct bch_sb *sb)
|
||||
{
|
||||
u64 ret = 0;
|
||||
for (unsigned i = 0; i < sb->nr_devices; i++)
|
||||
ret = max(ret, le64_to_cpu(bch2_sb_member_get(sb, i).last_mount));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_sb_time_cmp(struct bch_sb *l, struct bch_sb *r)
|
||||
{
|
||||
return cmp_int(bch2_sb_last_mount_time(l),
|
||||
bch2_sb_last_mount_time(r));
|
||||
}
|
||||
|
||||
static void recover_super_usage(void)
|
||||
{
|
||||
puts("bcachefs recover-super \n"
|
||||
"Usage: bcachefs recover-super [OPTION].. device\n"
|
||||
"\n"
|
||||
"Attempt to recover a filesystem on a device that has had the main superblock\n"
|
||||
"and superblock layout overwritten.\n"
|
||||
"All options will be guessed if not provided\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -d, --dev_size size of filessytem on device, in bytes \n"
|
||||
" -o, --offset offset to probe, in bytes\n"
|
||||
" -y, --yes Recover without prompting\n"
|
||||
" -v, --verbose Increase logging level\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_recover_super(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "dev_size", 1, NULL, 'd' },
|
||||
{ "offset", 1, NULL, 'o' },
|
||||
{ "yes", 0, NULL, 'y' },
|
||||
{ "verbose", 0, NULL, 'v' },
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ NULL }
|
||||
};
|
||||
u64 dev_size = 0, offset = 0;
|
||||
bool yes = false, verbose = false;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "d:o:yvh", longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'd':
|
||||
if (bch2_strtoull_h(optarg, &dev_size))
|
||||
die("invalid offset");
|
||||
break;
|
||||
case 'o':
|
||||
if (bch2_strtoull_h(optarg, &offset))
|
||||
die("invalid offset");
|
||||
|
||||
if (offset & 511)
|
||||
die("offset must be a multiple of 512");
|
||||
break;
|
||||
case 'y':
|
||||
yes = true;
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
case 'h':
|
||||
recover_super_usage();
|
||||
break;
|
||||
}
|
||||
args_shift(optind);
|
||||
|
||||
char *dev_path = arg_pop();
|
||||
if (!dev_path)
|
||||
die("please supply a device");
|
||||
if (argc)
|
||||
die("too many arguments");
|
||||
|
||||
int dev_fd = xopen(dev_path, O_RDWR);
|
||||
|
||||
if (!dev_size)
|
||||
dev_size = get_size(dev_fd);
|
||||
|
||||
probed_sb_list sbs = {};
|
||||
|
||||
if (offset) {
|
||||
probe_one_super(dev_fd, SUPERBLOCK_SIZE_DEFAULT, offset, &sbs, verbose);
|
||||
} else {
|
||||
unsigned scan_len = 16 << 20; /* 16MB, start and end of device */
|
||||
|
||||
probe_sb_range(dev_fd, 4096, scan_len, &sbs, verbose);
|
||||
probe_sb_range(dev_fd, dev_size - scan_len, dev_size, &sbs, verbose);
|
||||
}
|
||||
|
||||
if (!sbs.nr) {
|
||||
printf("Found no bcachefs superblocks\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
struct bch_sb *best = NULL;
|
||||
darray_for_each(sbs, sb)
|
||||
if (!best || bch2_sb_time_cmp(best, *sb) < 0)
|
||||
best = *sb;
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_sb_to_text(&buf, best, true, BIT_ULL(BCH_SB_FIELD_members_v2));
|
||||
|
||||
printf("Found superblock:\n%s", buf.buf);
|
||||
printf("Recover?");
|
||||
|
||||
if (yes || ask_yn())
|
||||
bch2_super_write(dev_fd, best);
|
||||
|
||||
printbuf_exit(&buf);
|
||||
darray_for_each(sbs, sb)
|
||||
kfree(*sb);
|
||||
darray_exit(&sbs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -122,14 +122,6 @@ static int dev_by_label_cmp(const void *_l, const void *_r)
|
||||
cmp_int(l->idx, r->idx);
|
||||
}
|
||||
|
||||
static struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx)
|
||||
{
|
||||
darray_for_each(*dev_names, dev)
|
||||
if (dev->idx == idx)
|
||||
return dev;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void devs_usage_to_text(struct printbuf *out,
|
||||
struct bchfs_handle fs,
|
||||
dev_names dev_names)
|
||||
@ -230,8 +222,6 @@ static int accounting_p_cmp(const void *_l, const void *_r)
|
||||
|
||||
struct bpos lp = (*l)->k.p, rp = (*r)->k.p;
|
||||
|
||||
bch2_bpos_swab(&lp);
|
||||
bch2_bpos_swab(&rp);
|
||||
return bpos_cmp(lp, rp);
|
||||
}
|
||||
|
||||
@ -247,6 +237,18 @@ static void accounting_sort(darray_accounting_p *sorted,
|
||||
sort(sorted->data, sorted->nr, sizeof(sorted->data[0]), accounting_p_cmp, NULL);
|
||||
}
|
||||
|
||||
static void accounting_swab_if_old(struct bch_ioctl_query_accounting *in)
|
||||
{
|
||||
unsigned kernel_version = bcachefs_kernel_version();
|
||||
|
||||
if (kernel_version &&
|
||||
kernel_version < bcachefs_metadata_version_disk_accounting_big_endian)
|
||||
for (struct bkey_i_accounting *a = in->accounting;
|
||||
a < (struct bkey_i_accounting *) ((u64 *) in->accounting + in->accounting_u64s);
|
||||
a = bkey_i_to_accounting(bkey_next(&a->k_i)))
|
||||
bch2_bpos_swab(&a->k.p);
|
||||
}
|
||||
|
||||
static int fs_usage_v1_to_text(struct printbuf *out,
|
||||
struct bchfs_handle fs,
|
||||
dev_names dev_names)
|
||||
@ -261,6 +263,8 @@ static int fs_usage_v1_to_text(struct printbuf *out,
|
||||
if (!a)
|
||||
return -1;
|
||||
|
||||
accounting_swab_if_old(a);
|
||||
|
||||
darray_accounting_p a_sorted = {};
|
||||
|
||||
accounting_sort(&a_sorted, a);
|
||||
@ -487,6 +491,19 @@ devs:
|
||||
bcache_fs_close(fs);
|
||||
}
|
||||
|
||||
int fs_usage(void)
|
||||
{
|
||||
puts("bcachefs fs - manage a running filesystem\n"
|
||||
"Usage: bcachefs fs <CMD> [OPTIONS]\n"
|
||||
"\n"
|
||||
"Commands:\n"
|
||||
" usage Display detailed filesystem usage\n"
|
||||
" top Show runtime performance information\n"
|
||||
"\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fs_usage_usage(void)
|
||||
{
|
||||
puts("bcachefs fs usage - display detailed filesystem usage\n"
|
||||
|
@ -21,7 +21,6 @@ static void fsck_usage(void)
|
||||
" -y Assume \"yes\" to all questions\n"
|
||||
" -f Force checking even if filesystem is marked clean\n"
|
||||
" -r, --ratelimit_errors Don't display more than 10 errors of a given type\n"
|
||||
" -R, --reconstruct_alloc Reconstruct the alloc btree\n"
|
||||
" -k, --kernel Use the in-kernel fsck implementation\n"
|
||||
" -v Be verbose\n"
|
||||
" -h, --help Display this help and exit\n"
|
||||
@ -90,12 +89,14 @@ static int splice_fd_to_stdinout(int fd)
|
||||
return close(fd);
|
||||
}
|
||||
|
||||
static int fsck_online(const char *dev_path)
|
||||
static int fsck_online(const char *dev_path, const char *opt_str)
|
||||
{
|
||||
int dev_idx;
|
||||
struct bchfs_handle fs = bchu_fs_open_by_dev(dev_path, &dev_idx);
|
||||
|
||||
struct bch_ioctl_fsck_online fsck = { 0 };
|
||||
struct bch_ioctl_fsck_online fsck = {
|
||||
.opts = (unsigned long) opt_str
|
||||
};
|
||||
|
||||
int fsck_fd = ioctl(fs.ioctl_fd, BCH_IOCTL_FSCK_ONLINE, &fsck);
|
||||
if (fsck_fd < 0)
|
||||
@ -115,9 +116,7 @@ static bool should_use_kernel_fsck(darray_str devs)
|
||||
{
|
||||
system("modprobe bcachefs");
|
||||
|
||||
unsigned kernel_version = !access("/sys/module/bcachefs/parameters/version", R_OK)
|
||||
? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
|
||||
: 0;
|
||||
unsigned kernel_version = bcachefs_kernel_version();
|
||||
|
||||
if (!kernel_version)
|
||||
return false;
|
||||
@ -160,11 +159,49 @@ static bool should_use_kernel_fsck(darray_str devs)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool is_blockdev(const char *path)
|
||||
{
|
||||
struct stat s;
|
||||
if (stat(path, &s))
|
||||
return true;
|
||||
return S_ISBLK(s.st_mode);
|
||||
}
|
||||
|
||||
static void loopdev_free(const char *path)
|
||||
{
|
||||
char *cmd = mprintf("losetup -d %s", path);
|
||||
system(cmd);
|
||||
free(cmd);
|
||||
}
|
||||
|
||||
static char *loopdev_alloc(const char *path)
|
||||
{
|
||||
char *cmd = mprintf("losetup --show -f %s", path);
|
||||
FILE *f = popen(cmd, "r");
|
||||
free(cmd);
|
||||
if (!f) {
|
||||
fprintf(stderr, "error executing losetup: %m\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *line = NULL;
|
||||
size_t n = 0;
|
||||
getline(&line, &n, f);
|
||||
int ret = pclose(f);
|
||||
if (ret) {
|
||||
fprintf(stderr, "error executing losetup: %i\n", ret);
|
||||
free(line);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
strim(line);
|
||||
return line;
|
||||
}
|
||||
|
||||
int cmd_fsck(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "ratelimit_errors", no_argument, NULL, 'r' },
|
||||
{ "reconstruct_alloc", no_argument, NULL, 'R' },
|
||||
{ "kernel", no_argument, NULL, 'k' },
|
||||
{ "no-kernel", no_argument, NULL, 'K' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
@ -183,10 +220,13 @@ int cmd_fsck(int argc, char *argv[])
|
||||
append_opt(&opts_str, "read_only");
|
||||
|
||||
while ((opt = getopt_long(argc, argv,
|
||||
"apynfo:rRkvh",
|
||||
"apynfo:rkKvh",
|
||||
longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'a': /* outdated alias for -p */
|
||||
case 'a':
|
||||
/* "automatic" run, called by the system, for us to do checks as needed.
|
||||
* we don't need checks here: */
|
||||
exit(EXIT_SUCCESS);
|
||||
case 'p':
|
||||
case 'y':
|
||||
append_opt(&opts_str, "fix_errors=yes");
|
||||
@ -204,9 +244,6 @@ int cmd_fsck(int argc, char *argv[])
|
||||
case 'r':
|
||||
append_opt(&opts_str, "ratelimit_errors");
|
||||
break;
|
||||
case 'R':
|
||||
append_opt(&opts_str, "reconstruct_alloc");
|
||||
break;
|
||||
case 'k':
|
||||
kernel = true;
|
||||
break;
|
||||
@ -232,7 +269,7 @@ int cmd_fsck(int argc, char *argv[])
|
||||
darray_for_each(devs, i)
|
||||
if (dev_mounted(*i)) {
|
||||
printf("Running fsck online\n");
|
||||
return fsck_online(*i);
|
||||
return fsck_online(*i, opts_str.buf);
|
||||
}
|
||||
|
||||
int kernel_probed = kernel;
|
||||
@ -243,19 +280,35 @@ int cmd_fsck(int argc, char *argv[])
|
||||
struct printbuf parse_later = PRINTBUF;
|
||||
|
||||
if (kernel_probed) {
|
||||
darray_str loopdevs = {};
|
||||
int fsck_fd = -1;
|
||||
|
||||
printf("Running in-kernel offline fsck\n");
|
||||
struct bch_ioctl_fsck_offline *fsck = calloc(sizeof(*fsck) +
|
||||
sizeof(u64) * devs.nr, 1);
|
||||
struct bch_ioctl_fsck_offline *fsck = calloc(sizeof(*fsck) + sizeof(u64) * devs.nr, 1);
|
||||
|
||||
fsck->opts = (unsigned long)opts_str.buf;
|
||||
darray_for_each(devs, i)
|
||||
fsck->devs[i - devs.data] = (unsigned long) *i;
|
||||
darray_for_each(devs, i) {
|
||||
if (is_blockdev(*i)) {
|
||||
fsck->devs[i - devs.data] = (unsigned long) *i;
|
||||
} else {
|
||||
char *l = loopdev_alloc(*i);
|
||||
if (!l)
|
||||
goto kernel_fsck_err;
|
||||
darray_push(&loopdevs, l);
|
||||
fsck->devs[i - devs.data] = (unsigned long) l;
|
||||
}
|
||||
}
|
||||
fsck->nr_devs = devs.nr;
|
||||
|
||||
int ctl_fd = bcachectl_open();
|
||||
int fsck_fd = ioctl(ctl_fd, BCH_IOCTL_FSCK_OFFLINE, fsck);
|
||||
fsck_fd = ioctl(ctl_fd, BCH_IOCTL_FSCK_OFFLINE, fsck);
|
||||
kernel_fsck_err:
|
||||
free(fsck);
|
||||
|
||||
darray_for_each(loopdevs, i)
|
||||
loopdev_free(*i);
|
||||
darray_exit(&loopdevs);
|
||||
|
||||
if (fsck_fd < 0 && kernel < 0)
|
||||
goto userland_fsck;
|
||||
|
||||
@ -266,7 +319,7 @@ int cmd_fsck(int argc, char *argv[])
|
||||
} else {
|
||||
userland_fsck:
|
||||
printf("Running userspace offline fsck\n");
|
||||
ret = bch2_parse_mount_opts(NULL, &opts, &parse_later, opts_str.buf);
|
||||
ret = bch2_parse_mount_opts(NULL, &opts, &parse_later, opts_str.buf, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include "libbcachefs/dirent.h"
|
||||
#include "libbcachefs/errcode.h"
|
||||
#include "libbcachefs/error.h"
|
||||
#include "libbcachefs/fs-common.h"
|
||||
#include "libbcachefs/namei.h"
|
||||
#include "libbcachefs/inode.h"
|
||||
#include "libbcachefs/io_read.h"
|
||||
#include "libbcachefs/io_write.h"
|
||||
@ -31,9 +31,6 @@
|
||||
|
||||
#include <linux/dcache.h>
|
||||
|
||||
/* XXX cut and pasted from fsck.c */
|
||||
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
|
||||
|
||||
/* used by write_aligned function for waiting on bch2_write closure */
|
||||
struct write_aligned_op_t {
|
||||
struct closure cl;
|
||||
@ -242,7 +239,7 @@ static int do_create(struct bch_fs *c, subvol_inum dir,
|
||||
|
||||
bch2_inode_init_early(c, new_inode);
|
||||
|
||||
return bch2_trans_do(c, NULL, NULL, 0,
|
||||
return bch2_trans_commit_do(c, NULL, NULL, 0,
|
||||
bch2_create_trans(trans,
|
||||
dir, &dir_u,
|
||||
new_inode, &qstr,
|
||||
@ -295,7 +292,7 @@ static void bcachefs_fuse_unlink(fuse_req_t req, fuse_ino_t dir_ino,
|
||||
|
||||
fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_unlink(%llu, %s)\n", dir.inum, name);
|
||||
|
||||
int ret = bch2_trans_do(c, NULL, NULL,
|
||||
int ret = bch2_trans_commit_do(c, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_unlink_trans(trans, dir, &dir_u,
|
||||
&inode_u, &qstr, false));
|
||||
@ -330,7 +327,7 @@ static void bcachefs_fuse_rename(fuse_req_t req,
|
||||
src_dir.inum, srcname, dst_dir.inum, dstname, flags);
|
||||
|
||||
/* XXX handle overwrites */
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
ret = bch2_trans_commit_do(c, NULL, NULL, 0,
|
||||
bch2_rename_trans(trans,
|
||||
src_dir, &src_dir_u,
|
||||
dst_dir, &dst_dir_u,
|
||||
@ -354,7 +351,7 @@ static void bcachefs_fuse_link(fuse_req_t req, fuse_ino_t ino,
|
||||
fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_link(%llu, %llu, %s)\n",
|
||||
inum.inum, newparent.inum, newname);
|
||||
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
ret = bch2_trans_commit_do(c, NULL, NULL, 0,
|
||||
bch2_link_trans(trans, newparent, &dir_u,
|
||||
inum, &inode_u, &qstr));
|
||||
|
||||
@ -478,10 +475,9 @@ static int read_aligned(struct bch_fs *c, subvol_inum inum, size_t aligned_size,
|
||||
closure_init_stack(&cl);
|
||||
|
||||
closure_get(&cl);
|
||||
rbio.bio.bi_end_io = bcachefs_fuse_read_endio;
|
||||
rbio.bio.bi_private = &cl;
|
||||
rbio.bio.bi_private = &cl;
|
||||
|
||||
bch2_read(c, rbio_init(&rbio.bio, io_opts), inum);
|
||||
bch2_read(c, rbio_init(&rbio.bio, c, io_opts, bcachefs_fuse_read_endio), inum);
|
||||
|
||||
closure_sync(&cl);
|
||||
|
||||
@ -1222,9 +1218,9 @@ int cmd_fusemount(int argc, char *argv[])
|
||||
goto out;
|
||||
}
|
||||
if (fuse_opts.show_version) {
|
||||
/* TODO: Show bcachefs version. */
|
||||
printf("FUSE library version %s\n", fuse_pkgversion());
|
||||
fuse_lowlevel_version();
|
||||
printf("bcachefs version: %s\n", VERSION_STRING);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
@ -104,24 +104,19 @@ int cmd_set_passphrase(int argc, char *argv[])
|
||||
if (IS_ERR(c))
|
||||
die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c)));
|
||||
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
|
||||
struct bch_sb *sb = c->disk_sb.sb;
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_field_get(sb, crypt);
|
||||
if (!crypt)
|
||||
die("Filesystem does not have encryption enabled");
|
||||
|
||||
struct bch_encrypted_key new_key;
|
||||
new_key.magic = BCH_KEY_MAGIC;
|
||||
|
||||
int ret = bch2_decrypt_sb_key(c, crypt, &new_key.key);
|
||||
struct bch_key key;
|
||||
int ret = bch2_decrypt_sb_key(c, crypt, &key);
|
||||
if (ret)
|
||||
die("Error getting current key");
|
||||
|
||||
char *new_passphrase = read_passphrase_twice("Enter new passphrase: ");
|
||||
struct bch_key passphrase_key = derive_passphrase(crypt, new_passphrase);
|
||||
|
||||
if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(c->disk_sb.sb),
|
||||
&new_key, sizeof(new_key)))
|
||||
die("error encrypting key");
|
||||
crypt->key = new_key;
|
||||
bch_crypt_update_passphrase(sb, crypt, &key, new_passphrase);
|
||||
|
||||
bch2_revoke_key(c->disk_sb.sb);
|
||||
bch2_write_super(c);
|
||||
@ -142,18 +137,17 @@ int cmd_remove_passphrase(int argc, char *argv[])
|
||||
if (IS_ERR(c))
|
||||
die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c)));
|
||||
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
|
||||
struct bch_sb *sb = c->disk_sb.sb;
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_field_get(sb, crypt);
|
||||
if (!crypt)
|
||||
die("Filesystem does not have encryption enabled");
|
||||
|
||||
struct bch_encrypted_key new_key;
|
||||
new_key.magic = BCH_KEY_MAGIC;
|
||||
|
||||
int ret = bch2_decrypt_sb_key(c, crypt, &new_key.key);
|
||||
struct bch_key key;
|
||||
int ret = bch2_decrypt_sb_key(c, crypt, &key);
|
||||
if (ret)
|
||||
die("Error getting current key");
|
||||
|
||||
crypt->key = new_key;
|
||||
bch_crypt_update_passphrase(sb, crypt, &key, NULL);
|
||||
|
||||
bch2_write_super(c);
|
||||
bch2_fs_stop(c);
|
||||
|
@ -27,30 +27,44 @@ static void kill_btree_node_usage(void)
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
}
|
||||
|
||||
struct kill_node {
|
||||
unsigned btree;
|
||||
unsigned level;
|
||||
u64 idx;
|
||||
};
|
||||
|
||||
int cmd_kill_btree_node(int argc, char *argv[])
|
||||
{
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
enum btree_id btree_id = 0;
|
||||
unsigned level = 0;
|
||||
u64 node_index = 0;
|
||||
DARRAY(struct kill_node) kill_nodes = {};
|
||||
int opt;
|
||||
|
||||
opt_set(opts, read_only, true);
|
||||
|
||||
while ((opt = getopt(argc, argv, "b:l:i:h")) != -1)
|
||||
while ((opt = getopt(argc, argv, "n:h")) != -1)
|
||||
switch (opt) {
|
||||
case 'b':
|
||||
btree_id = read_string_list_or_die(optarg,
|
||||
__bch2_btree_ids, "btree id");
|
||||
break;
|
||||
case 'l':
|
||||
if (kstrtouint(optarg, 10, &level) || level >= BTREE_MAX_DEPTH)
|
||||
case 'n': {
|
||||
char *p = optarg;
|
||||
const char *str_btree = strsep(&p, ":");
|
||||
const char *str_level = strsep(&p, ":");
|
||||
const char *str_idx = strsep(&p, ":");
|
||||
|
||||
struct kill_node n = {
|
||||
.btree = read_string_list_or_die(str_btree,
|
||||
__bch2_btree_ids, "btree id"),
|
||||
};
|
||||
|
||||
if (str_level &&
|
||||
(kstrtouint(str_level, 10, &n.level) || n.level >= BTREE_MAX_DEPTH))
|
||||
die("invalid level");
|
||||
|
||||
if (str_idx &&
|
||||
kstrtoull(str_idx, 10, &n.idx))
|
||||
die("invalid index %s", str_idx);
|
||||
|
||||
darray_push(&kill_nodes, n);
|
||||
break;
|
||||
case 'i':
|
||||
if (kstrtoull(optarg, 10, &node_index))
|
||||
die("invalid index %s", optarg);
|
||||
break;
|
||||
}
|
||||
case 'h':
|
||||
kill_btree_node_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
@ -71,16 +85,19 @@ int cmd_kill_btree_node(int argc, char *argv[])
|
||||
if (ret)
|
||||
die("error %s from posix_memalign", bch2_err_str(ret));
|
||||
|
||||
ret = bch2_trans_run(c,
|
||||
__for_each_btree_node(trans, iter, btree_id, POS_MIN, 0, level, 0, b, ({
|
||||
if (b->c.level != level)
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
darray_for_each(kill_nodes, i) {
|
||||
ret = __for_each_btree_node(trans, iter, i->btree, POS_MIN, 0, i->level, 0, b, ({
|
||||
if (b->c.level != i->level)
|
||||
continue;
|
||||
|
||||
int ret2 = 0;
|
||||
if (!node_index) {
|
||||
if (!i->idx) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
bch_info(c, "killing btree node %s", buf.buf);
|
||||
bch_info(c, "killing btree node %s l=%u %s",
|
||||
bch2_btree_id_str(i->btree), i->level, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
||||
ret2 = 1;
|
||||
@ -102,16 +119,22 @@ int cmd_kill_btree_node(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
|
||||
node_index--;
|
||||
i->idx--;
|
||||
ret2;
|
||||
})));
|
||||
if (ret < 0)
|
||||
bch_err(c, "error %i walking btree nodes", ret);
|
||||
else if (!ret) {
|
||||
bch_err(c, "node at specified index not found");
|
||||
ret = EXIT_FAILURE;
|
||||
}));
|
||||
|
||||
if (ret < 0) {
|
||||
bch_err(c, "error %i walking btree nodes", ret);
|
||||
break;
|
||||
} else if (!ret) {
|
||||
bch_err(c, "node at specified index not found");
|
||||
ret = EXIT_FAILURE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_trans_put(trans);
|
||||
bch2_fs_stop(c);
|
||||
darray_exit(&kill_nodes);
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
|
@ -51,6 +51,11 @@ static inline bool entry_is_transaction_start(struct jset_entry *entry)
|
||||
return entry->type == BCH_JSET_ENTRY_log && !entry->level;
|
||||
}
|
||||
|
||||
static inline bool entry_is_log_msg(struct jset_entry *entry)
|
||||
{
|
||||
return entry->type == BCH_JSET_ENTRY_log && entry->level;
|
||||
}
|
||||
|
||||
typedef DARRAY(struct bbpos_range) d_bbpos_range;
|
||||
typedef DARRAY(enum btree_id) d_btree_id;
|
||||
|
||||
@ -60,9 +65,21 @@ static bool bkey_matches_filter(d_bbpos_range filter, struct jset_entry *entry,
|
||||
struct bbpos k_start = BBPOS(entry->btree_id, bkey_start_pos(&k->k));
|
||||
struct bbpos k_end = BBPOS(entry->btree_id, k->k.p);
|
||||
|
||||
if (bbpos_cmp(k_start, i->end) < 0 &&
|
||||
bbpos_cmp(k_end, i->start) > 0)
|
||||
return true;
|
||||
if (!i->start.pos.snapshot &&
|
||||
!i->end.pos.snapshot) {
|
||||
k_start.pos.snapshot = 0;
|
||||
k_end.pos.snapshot = 0;
|
||||
}
|
||||
|
||||
if (!k->k.size) {
|
||||
if (bbpos_cmp(k_start, i->start) >= 0 &&
|
||||
bbpos_cmp(k_end, i->end) <= 0)
|
||||
return true;
|
||||
} else {
|
||||
if (bbpos_cmp(i->start, k_end) <= 0 &&
|
||||
bbpos_cmp(i->end, k_start) >= 0)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -70,9 +87,9 @@ static bool bkey_matches_filter(d_bbpos_range filter, struct jset_entry *entry,
|
||||
static bool entry_matches_transaction_filter(struct jset_entry *entry,
|
||||
d_bbpos_range filter)
|
||||
{
|
||||
if (entry->type == BCH_JSET_ENTRY_btree_root ||
|
||||
entry->type == BCH_JSET_ENTRY_btree_keys ||
|
||||
entry->type == BCH_JSET_ENTRY_overwrite)
|
||||
if (!entry->level &&
|
||||
(entry->type == BCH_JSET_ENTRY_btree_keys ||
|
||||
entry->type == BCH_JSET_ENTRY_overwrite))
|
||||
jset_entry_for_each_key(entry, k)
|
||||
if (bkey_matches_filter(filter, entry, k))
|
||||
return true;
|
||||
@ -80,17 +97,36 @@ static bool entry_matches_transaction_filter(struct jset_entry *entry,
|
||||
}
|
||||
|
||||
static bool should_print_transaction(struct jset_entry *entry, struct jset_entry *end,
|
||||
d_bbpos_range filter)
|
||||
darray_str msg_filter,
|
||||
d_bbpos_range key_filter)
|
||||
{
|
||||
if (!filter.nr)
|
||||
struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry);
|
||||
unsigned b = jset_entry_log_msg_bytes(l);
|
||||
bool have_log_messages = false;
|
||||
bool have_non_log_messages = false;
|
||||
|
||||
darray_for_each(msg_filter, i)
|
||||
if (!strncmp(*i, l->d, b))
|
||||
return false;
|
||||
|
||||
if (!key_filter.nr)
|
||||
return true;
|
||||
|
||||
for (entry = vstruct_next(entry);
|
||||
entry != end && !entry_is_transaction_start(entry);
|
||||
entry = vstruct_next(entry))
|
||||
if (entry_matches_transaction_filter(entry, filter))
|
||||
entry = vstruct_next(entry)) {
|
||||
if (entry_matches_transaction_filter(entry, key_filter))
|
||||
return true;
|
||||
|
||||
if (entry_is_log_msg(entry))
|
||||
have_log_messages = true;
|
||||
else
|
||||
have_non_log_messages = true;
|
||||
}
|
||||
|
||||
if (have_log_messages && !have_non_log_messages)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -112,8 +148,41 @@ static bool should_print_entry(struct jset_entry *entry, d_btree_id filter)
|
||||
return false;
|
||||
}
|
||||
|
||||
static void journal_entry_header_to_text(struct printbuf *out,
|
||||
struct bch_fs *c,
|
||||
struct journal_replay *p, bool blacklisted)
|
||||
{
|
||||
if (blacklisted)
|
||||
prt_str(out, "blacklisted ");
|
||||
|
||||
prt_printf(out,
|
||||
"\n"
|
||||
"journal entry %llu\n"
|
||||
" version %u\n"
|
||||
" last seq %llu\n"
|
||||
" flush %u\n"
|
||||
" written at ",
|
||||
le64_to_cpu(p->j.seq),
|
||||
le32_to_cpu(p->j.version),
|
||||
le64_to_cpu(p->j.last_seq),
|
||||
!JSET_NO_FLUSH(&p->j));
|
||||
bch2_journal_ptrs_to_text(out, c, p);
|
||||
|
||||
if (blacklisted)
|
||||
star_start_of_lines(out->buf);
|
||||
}
|
||||
|
||||
static void journal_entry_header_print(struct bch_fs *c, struct journal_replay *p, bool blacklisted)
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
journal_entry_header_to_text(&buf, c, p, blacklisted);
|
||||
printf("%s\n", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
static void journal_entries_print(struct bch_fs *c, unsigned nr_entries,
|
||||
d_bbpos_range transaction_filter,
|
||||
darray_str transaction_msg_filter,
|
||||
d_bbpos_range transaction_key_filter,
|
||||
d_btree_id key_filter)
|
||||
{
|
||||
struct journal_replay *p, **_p;
|
||||
@ -121,6 +190,8 @@ static void journal_entries_print(struct bch_fs *c, unsigned nr_entries,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
genradix_for_each(&c->journal_entries, iter, _p) {
|
||||
bool printed_header = false;
|
||||
|
||||
p = *_p;
|
||||
if (!p)
|
||||
continue;
|
||||
@ -132,28 +203,10 @@ static void journal_entries_print(struct bch_fs *c, unsigned nr_entries,
|
||||
bch2_journal_seq_is_blacklisted(c,
|
||||
le64_to_cpu(p->j.seq), false);
|
||||
|
||||
if (!transaction_filter.nr) {
|
||||
if (blacklisted)
|
||||
printf("blacklisted ");
|
||||
|
||||
printf("journal entry %llu\n", le64_to_cpu(p->j.seq));
|
||||
|
||||
printbuf_reset(&buf);
|
||||
|
||||
prt_printf(&buf,
|
||||
" version %u\n"
|
||||
" last seq %llu\n"
|
||||
" flush %u\n"
|
||||
" written at ",
|
||||
le32_to_cpu(p->j.version),
|
||||
le64_to_cpu(p->j.last_seq),
|
||||
!JSET_NO_FLUSH(&p->j));
|
||||
bch2_journal_ptrs_to_text(&buf, c, p);
|
||||
|
||||
if (blacklisted)
|
||||
star_start_of_lines(buf.buf);
|
||||
printf("%s\n", buf.buf);
|
||||
printbuf_reset(&buf);
|
||||
if (!transaction_msg_filter.nr &&
|
||||
!transaction_key_filter.nr) {
|
||||
journal_entry_header_print(c, p, blacklisted);
|
||||
printed_header = true;
|
||||
}
|
||||
|
||||
struct jset_entry *entry = p->j.start;
|
||||
@ -165,7 +218,9 @@ static void journal_entries_print(struct bch_fs *c, unsigned nr_entries,
|
||||
* commit:
|
||||
*/
|
||||
if (entry_is_transaction_start(entry)) {
|
||||
if (!should_print_transaction(entry, end, transaction_filter)) {
|
||||
if (!should_print_transaction(entry, end,
|
||||
transaction_msg_filter,
|
||||
transaction_key_filter)) {
|
||||
do {
|
||||
entry = vstruct_next(entry);
|
||||
} while (entry != end && !entry_is_transaction_start(entry));
|
||||
@ -179,7 +234,11 @@ static void journal_entries_print(struct bch_fs *c, unsigned nr_entries,
|
||||
if (!should_print_entry(entry, key_filter))
|
||||
goto next;
|
||||
|
||||
bool highlight = entry_matches_transaction_filter(entry, transaction_filter);
|
||||
if (!printed_header)
|
||||
journal_entry_header_print(c, p, blacklisted);
|
||||
printed_header = true;
|
||||
|
||||
bool highlight = entry_matches_transaction_filter(entry, transaction_key_filter);
|
||||
if (highlight)
|
||||
fputs(RED, stdout);
|
||||
|
||||
@ -213,8 +272,9 @@ int cmd_list_journal(int argc, char *argv[])
|
||||
};
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
u32 nr_entries = U32_MAX;
|
||||
d_bbpos_range transaction_filter = { 0 };
|
||||
d_btree_id key_filter = { 0 };
|
||||
darray_str transaction_msg_filter = {};
|
||||
d_bbpos_range transaction_key_filter = {};
|
||||
d_btree_id key_filter = {};
|
||||
int opt;
|
||||
|
||||
opt_set(opts, noexcl, true);
|
||||
@ -228,7 +288,7 @@ int cmd_list_journal(int argc, char *argv[])
|
||||
opt_set(opts, retain_recovery_info ,true);
|
||||
opt_set(opts, read_journal_only,true);
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "an:t:k:vh",
|
||||
while ((opt = getopt_long(argc, argv, "an:m:t:k:vh",
|
||||
longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'a':
|
||||
@ -239,8 +299,11 @@ int cmd_list_journal(int argc, char *argv[])
|
||||
die("error parsing nr_entries");
|
||||
opt_set(opts, read_entire_journal, true);
|
||||
break;
|
||||
case 'm':
|
||||
darray_push(&transaction_msg_filter, strdup(optarg));
|
||||
break;
|
||||
case 't':
|
||||
darray_push(&transaction_filter, bbpos_range_parse(optarg));
|
||||
darray_push(&transaction_key_filter, bbpos_range_parse(optarg));
|
||||
break;
|
||||
case 'k':
|
||||
darray_push(&key_filter, read_string_list_or_die(optarg, __bch2_btree_ids, "btree id"));
|
||||
@ -263,7 +326,10 @@ int cmd_list_journal(int argc, char *argv[])
|
||||
if (IS_ERR(c))
|
||||
die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
|
||||
|
||||
journal_entries_print(c, nr_entries, transaction_filter, key_filter);
|
||||
journal_entries_print(c, nr_entries,
|
||||
transaction_msg_filter,
|
||||
transaction_key_filter,
|
||||
key_filter);
|
||||
bch2_fs_stop(c);
|
||||
return 0;
|
||||
}
|
||||
|
@ -31,9 +31,6 @@
|
||||
#include "libbcachefs/replicas.h"
|
||||
#include "libbcachefs/super.h"
|
||||
|
||||
/* XXX cut and pasted from fsck.c */
|
||||
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
|
||||
|
||||
static char *dev_t_to_path(dev_t dev)
|
||||
{
|
||||
char link[PATH_MAX], *p;
|
||||
@ -162,9 +159,9 @@ static void find_superblock_space(ranges extents,
|
||||
{
|
||||
darray_for_each(extents, i) {
|
||||
u64 start = round_up(max(256ULL << 10, i->start),
|
||||
dev->bucket_size << 9);
|
||||
dev->opts.bucket_size << 9);
|
||||
u64 end = round_down(i->end,
|
||||
dev->bucket_size << 9);
|
||||
dev->opts.bucket_size << 9);
|
||||
|
||||
/* Need space for two superblocks: */
|
||||
if (start + (opts.superblock_size << 9) * 2 <= end) {
|
||||
@ -212,38 +209,43 @@ static int migrate_fs(const char *fs_path,
|
||||
if (!S_ISDIR(stat.st_mode))
|
||||
die("%s is not a directory", fs_path);
|
||||
|
||||
struct dev_opts dev = dev_opts_default();
|
||||
dev_opts_list devs = {};
|
||||
darray_push(&devs, dev_opts_default());
|
||||
|
||||
dev.path = dev_t_to_path(stat.st_dev);
|
||||
dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
|
||||
struct dev_opts *dev = &devs.data[0];
|
||||
|
||||
int ret = PTR_ERR_OR_ZERO(dev.file);
|
||||
dev->path = dev_t_to_path(stat.st_dev);
|
||||
dev->file = bdev_file_open_by_path(dev->path, BLK_OPEN_READ|BLK_OPEN_WRITE, dev, NULL);
|
||||
|
||||
int ret = PTR_ERR_OR_ZERO(dev->file);
|
||||
if (ret < 0)
|
||||
die("Error opening device to format %s: %s", dev.path, strerror(-ret));
|
||||
dev.bdev = file_bdev(dev.file);
|
||||
die("Error opening device to format %s: %s", dev->path, strerror(-ret));
|
||||
dev->bdev = file_bdev(dev->file);
|
||||
|
||||
opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));
|
||||
opt_set(fs_opts, block_size, get_blocksize(dev->bdev->bd_fd));
|
||||
|
||||
char *file_path = mprintf("%s/bcachefs", fs_path);
|
||||
printf("Creating new filesystem on %s in space reserved at %s\n",
|
||||
dev.path, file_path);
|
||||
dev->path, file_path);
|
||||
|
||||
dev.size = get_size(dev.bdev->bd_fd);
|
||||
dev.bucket_size = bch2_pick_bucket_size(fs_opts, &dev);
|
||||
dev.nbuckets = dev.size / dev.bucket_size;
|
||||
dev->fs_size = get_size(dev->bdev->bd_fd);
|
||||
opt_set(dev->opts, bucket_size, bch2_pick_bucket_size(fs_opts, devs));
|
||||
|
||||
bch2_check_bucket_size(fs_opts, &dev);
|
||||
dev->nbuckets = dev->fs_size / dev->opts.bucket_size;
|
||||
|
||||
bch2_check_bucket_size(fs_opts, dev);
|
||||
|
||||
u64 bcachefs_inum;
|
||||
ranges extents = reserve_new_fs_space(file_path,
|
||||
fs_opts.block_size >> 9,
|
||||
get_size(dev.bdev->bd_fd) / 5,
|
||||
get_size(dev->bdev->bd_fd) / 5,
|
||||
&bcachefs_inum, stat.st_dev, force);
|
||||
|
||||
find_superblock_space(extents, format_opts, &dev);
|
||||
find_superblock_space(extents, format_opts, dev);
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs);
|
||||
darray_exit(&devs);
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs,
|
||||
fs_opts, format_opts, &dev, 1);
|
||||
u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
|
||||
|
||||
if (format_opts.passphrase)
|
||||
@ -251,16 +253,14 @@ static int migrate_fs(const char *fs_path,
|
||||
|
||||
free(sb);
|
||||
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_fs *c = NULL;
|
||||
char *path[1] = { dev.path };
|
||||
char *path[1] = { dev->path };
|
||||
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
opt_set(opts, sb, sb_offset);
|
||||
opt_set(opts, nostart, true);
|
||||
opt_set(opts, noexcl, true);
|
||||
opt_set(opts, nostart, true);
|
||||
|
||||
c = bch2_fs_open(path, 1, opts);
|
||||
struct bch_fs *c = bch2_fs_open(path, 1, opts);
|
||||
if (IS_ERR(c))
|
||||
die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
|
||||
|
||||
@ -268,10 +268,6 @@ static int migrate_fs(const char *fs_path,
|
||||
if (ret)
|
||||
die("Error allocating buckets_nouse: %s", bch2_err_str(ret));
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
if (IS_ERR(c))
|
||||
die("Error starting new filesystem: %s", bch2_err_str(ret));
|
||||
|
||||
mark_unreserved_space(c, extents);
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
@ -285,7 +281,10 @@ static int migrate_fs(const char *fs_path,
|
||||
.type = BCH_MIGRATE_migrate,
|
||||
};
|
||||
|
||||
copy_fs(c, fs_fd, fs_path, &s);
|
||||
u64 reserve_start = round_up((format_opts.superblock_size * 2 + 8) << 9,
|
||||
dev->opts.bucket_size);
|
||||
|
||||
copy_fs(c, fs_fd, fs_path, &s, reserve_start);
|
||||
|
||||
bch2_fs_stop(c);
|
||||
|
||||
@ -313,7 +312,7 @@ static int migrate_fs(const char *fs_path,
|
||||
"filesystem. That file can be deleted once the old filesystem is\n"
|
||||
"no longer needed (and should be deleted prior to running\n"
|
||||
"bcachefs migrate-superblock)\n",
|
||||
sb_offset, dev.path, dev.path, sb_offset);
|
||||
sb_offset, dev->path, dev->path, sb_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -377,7 +376,7 @@ static void migrate_superblock_usage(void)
|
||||
int cmd_migrate_superblock(int argc, char *argv[])
|
||||
{
|
||||
char *dev = NULL;
|
||||
u64 offset = 0;
|
||||
u64 sb_offset = 0;
|
||||
int opt, ret;
|
||||
|
||||
while ((opt = getopt(argc, argv, "d:o:h")) != -1)
|
||||
@ -386,7 +385,7 @@ int cmd_migrate_superblock(int argc, char *argv[])
|
||||
dev = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
ret = kstrtou64(optarg, 10, &offset);
|
||||
ret = kstrtou64(optarg, 10, &sb_offset);
|
||||
if (ret)
|
||||
die("Invalid offset");
|
||||
break;
|
||||
@ -398,29 +397,72 @@ int cmd_migrate_superblock(int argc, char *argv[])
|
||||
if (!dev)
|
||||
die("Please specify a device");
|
||||
|
||||
if (!offset)
|
||||
if (!sb_offset)
|
||||
die("Please specify offset of existing superblock");
|
||||
|
||||
int fd = xopen(dev, O_RDWR);
|
||||
struct bch_sb *sb = __bch2_super_read(fd, offset);
|
||||
struct bch_sb *sb = __bch2_super_read(fd, sb_offset);
|
||||
unsigned sb_size = 1U << sb->layout.sb_max_size_bits;
|
||||
|
||||
if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
|
||||
die("Can't add superblock: no space left in superblock layout");
|
||||
|
||||
unsigned i;
|
||||
for (i = 0; i < sb->layout.nr_superblocks; i++)
|
||||
if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
|
||||
die("Superblock layout already has default superblock");
|
||||
for (unsigned i = 0; i < sb->layout.nr_superblocks; i++)
|
||||
if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR ||
|
||||
le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR + sb_size)
|
||||
die("Superblock layout already has default superblocks");
|
||||
|
||||
memmove(&sb->layout.sb_offset[1],
|
||||
memmove(&sb->layout.sb_offset[2],
|
||||
&sb->layout.sb_offset[0],
|
||||
sb->layout.nr_superblocks * sizeof(u64));
|
||||
sb->layout.nr_superblocks++;
|
||||
|
||||
sb->layout.nr_superblocks += 2;
|
||||
sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
|
||||
sb->layout.sb_offset[1] = cpu_to_le64(BCH_SB_SECTOR + sb_size);
|
||||
|
||||
/* also write first 0-3.5k bytes with zeroes, ensure we blow away old
|
||||
* superblock */
|
||||
static const char zeroes[BCH_SB_SECTOR << 9];
|
||||
xpwrite(fd, zeroes, BCH_SB_SECTOR << 9, 0, "zeroing start of disk");
|
||||
|
||||
bch2_super_write(fd, sb);
|
||||
close(fd);
|
||||
|
||||
/* mark new superblocks */
|
||||
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
opt_set(opts, nostart, true);
|
||||
opt_set(opts, sb, sb_offset);
|
||||
|
||||
struct bch_fs *c = bch2_fs_open(&dev, 1, opts);
|
||||
ret = PTR_ERR_OR_ZERO(c) ?:
|
||||
bch2_buckets_nouse_alloc(c);
|
||||
if (ret)
|
||||
die("error opening filesystem: %s", bch2_err_str(ret));
|
||||
|
||||
struct bch_dev *ca = c->devs[0];
|
||||
for (u64 b = 0; bucket_to_sector(ca, b) < BCH_SB_SECTOR + sb_size * 2; b++)
|
||||
set_bit(b, ca->buckets_nouse);
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
if (ret)
|
||||
die("Error starting filesystem: %s", bch2_err_str(ret));
|
||||
|
||||
bch2_fs_stop(c);
|
||||
|
||||
opts = bch2_opts_empty();
|
||||
opt_set(opts, fsck, true);
|
||||
opt_set(opts, fix_errors, true);
|
||||
|
||||
/*
|
||||
* Hack: the free space counters are coming out wrong after marking the
|
||||
* new superblock, but it's just the device counters so it's
|
||||
* inconsequential:
|
||||
*/
|
||||
|
||||
c = bch2_fs_open(&dev, 1, opts);
|
||||
ret = PTR_ERR_OR_ZERO(c);
|
||||
if (ret)
|
||||
die("error opening filesystem: %s", bch2_err_str(ret));
|
||||
bch2_fs_stop(c);
|
||||
return 0;
|
||||
}
|
||||
|
@ -30,8 +30,9 @@ static void set_option_usage(void)
|
||||
"Usage: bcachefs set-fs-option [OPTION].. device\n"
|
||||
"\n"
|
||||
"Options:\n");
|
||||
bch2_opts_usage(OPT_MOUNT);
|
||||
puts(" -h, --help display this help and exit\n"
|
||||
bch2_opts_usage(OPT_MOUNT|OPT_RUNTIME);
|
||||
puts(" -d, --dev-idx index for device specific options\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
@ -55,11 +56,24 @@ int cmd_set_option(int argc, char *argv[])
|
||||
{
|
||||
struct bch_opt_strs new_opt_strs = bch2_cmdline_opts_get(&argc, argv, OPT_MOUNT|OPT_DEVICE);
|
||||
struct bch_opts new_opts = bch2_parse_opts(new_opt_strs);
|
||||
unsigned i;
|
||||
DARRAY(unsigned) dev_idxs = {};
|
||||
int opt, ret = 0;
|
||||
|
||||
while ((opt = getopt(argc, argv, "h")) != -1)
|
||||
static const struct option longopts[] = {
|
||||
{ "dev-idx", required_argument, NULL, 'd' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "d:h", longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'd': {
|
||||
unsigned dev_idx;
|
||||
if (kstrtoint(optarg, 10, &dev_idx))
|
||||
die("error parsing %s", optarg);
|
||||
darray_push(&dev_idxs, dev_idx);
|
||||
break;
|
||||
}
|
||||
case 'h':
|
||||
set_option_usage();
|
||||
break;
|
||||
@ -72,6 +86,7 @@ int cmd_set_option(int argc, char *argv[])
|
||||
}
|
||||
|
||||
bool online = false;
|
||||
unsigned i;
|
||||
for (i = 0; i < argc; i++)
|
||||
if (dev_mounted(argv[i])) {
|
||||
online = true;
|
||||
@ -96,28 +111,47 @@ int cmd_set_option(int argc, char *argv[])
|
||||
if (!bch2_opt_defined_by_id(&new_opts, i))
|
||||
continue;
|
||||
|
||||
ret = bch2_opt_check_may_set(c, i, v);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(opt->flags & (OPT_FS|OPT_DEVICE)))
|
||||
fprintf(stderr, "Can't set option %s\n", opt->attr.name);
|
||||
|
||||
if (opt->flags & OPT_FS) {
|
||||
ret = bch2_opt_check_may_set(c, NULL, i, v);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
|
||||
continue;
|
||||
}
|
||||
|
||||
bch2_opt_set_sb(c, NULL, opt, v);
|
||||
}
|
||||
|
||||
if (opt->flags & OPT_DEVICE) {
|
||||
for (unsigned dev = 0; dev < argc; dev++) {
|
||||
int dev_idx = name_to_dev_idx(c, argv[dev]);
|
||||
if (dev_idx < 0) {
|
||||
fprintf(stderr, "Couldn't look up device %s\n", argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (dev_idxs.nr) {
|
||||
darray_for_each(dev_idxs, dev) {
|
||||
struct bch_dev *ca = bch2_dev_tryget_noerror(c, *dev);
|
||||
if (!ca) {
|
||||
fprintf(stderr, "Couldn't look up device %u\n", *dev);
|
||||
continue;
|
||||
}
|
||||
|
||||
bch2_opt_set_sb(c, c->devs[dev_idx], opt, v);
|
||||
ret = bch2_opt_check_may_set(c, ca, i, v);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
|
||||
continue;
|
||||
}
|
||||
|
||||
bch2_opt_set_sb(c, ca, opt, v);
|
||||
bch2_dev_put(ca);
|
||||
}
|
||||
} else {
|
||||
for (unsigned dev = 0; dev < argc; dev++) {
|
||||
int dev_idx = name_to_dev_idx(c, argv[dev]);
|
||||
if (dev_idx < 0) {
|
||||
fprintf(stderr, "Couldn't look up device %s\n", argv[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
bch2_opt_set_sb(c, c->devs[dev_idx], opt, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
98
c_src/cmd_top.c
Normal file
98
c_src/cmd_top.c
Normal file
@ -0,0 +1,98 @@
|
||||
#include <dirent.h>
|
||||
#include <getopt.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "cmds.h"
|
||||
#include "libbcachefs.h"
|
||||
#include "libbcachefs/sb-counters.h"
|
||||
|
||||
static const u8 counters_to_stable_map[] = {
|
||||
#define x(n, id, ...) [BCH_COUNTER_##n] = BCH_COUNTER_STABLE_##n,
|
||||
BCH_PERSISTENT_COUNTERS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
static struct bch_ioctl_query_counters *read_counters(struct bchfs_handle fs)
|
||||
{
|
||||
struct bch_ioctl_query_counters *ret =
|
||||
kzalloc(sizeof(*ret) + sizeof(ret->d[0]) * BCH_COUNTER_NR, GFP_KERNEL);
|
||||
|
||||
ret->nr = BCH_COUNTER_NR;
|
||||
|
||||
xioctl(fs.ioctl_fd, BCH_IOCTL_QUERY_COUNTERS, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void fs_top(const char *path, bool human_readable)
|
||||
{
|
||||
struct bchfs_handle fs = bcache_fs_open(path);
|
||||
|
||||
struct bch_ioctl_query_counters *curr, *prev = NULL;
|
||||
|
||||
curr = read_counters(fs);
|
||||
|
||||
while (true) {
|
||||
sleep(1);
|
||||
kfree(prev);
|
||||
prev = curr;
|
||||
curr = read_counters(fs);
|
||||
|
||||
printf("\033[2J");
|
||||
printf("\033[H");
|
||||
|
||||
for (unsigned i = 0; i < BCH_COUNTER_NR; i++) {
|
||||
unsigned stable = counters_to_stable_map[i];
|
||||
u64 v = stable < curr->nr
|
||||
? curr->d[stable] - prev->d[stable]
|
||||
: 0;
|
||||
printf("%-48s %llu\n",
|
||||
bch2_counter_names[i],
|
||||
v);
|
||||
}
|
||||
}
|
||||
|
||||
bcache_fs_close(fs);
|
||||
}
|
||||
|
||||
static void fs_top_usage(void)
|
||||
{
|
||||
puts("bcachefs fs top - display runtime perfomance info\n"
|
||||
"Usage: bcachefs fs top [OPTION]... <mountpoint>\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -h, --human-readable Human readable units\n"
|
||||
" -H, --help Display this help and exit\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
}
|
||||
|
||||
int cmd_fs_top(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "help", no_argument, NULL, 'H' },
|
||||
{ "human-readable", no_argument, NULL, 'h' },
|
||||
{ NULL }
|
||||
};
|
||||
bool human_readable = false;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "Hh",
|
||||
longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'h':
|
||||
human_readable = true;
|
||||
break;
|
||||
case 'H':
|
||||
fs_top_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
default:
|
||||
fs_top_usage();
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
args_shift(optind);
|
||||
|
||||
fs_top(arg_pop() ?: ".", human_readable) ;
|
||||
return 0;
|
||||
}
|
@ -11,10 +11,13 @@
|
||||
|
||||
int cmd_format(int argc, char *argv[]);
|
||||
int cmd_show_super(int argc, char *argv[]);
|
||||
int cmd_recover_super(int argc, char *argv[]);
|
||||
int cmd_reset_counters(int argc, char *argv[]);
|
||||
int cmd_set_option(int argc, char *argv[]);
|
||||
|
||||
int fs_usage(void);
|
||||
int cmd_fs_usage(int argc, char *argv[]);
|
||||
int cmd_fs_top(int argc, char *argv[]);
|
||||
|
||||
int device_usage(void);
|
||||
int cmd_device_add(int argc, char *argv[]);
|
||||
@ -28,6 +31,7 @@ int cmd_device_resize_journal(int argc, char *argv[]);
|
||||
|
||||
int data_usage(void);
|
||||
int cmd_data_rereplicate(int argc, char *argv[]);
|
||||
int cmd_data_scrub(int argc, char *argv[]);
|
||||
int cmd_data_job(int argc, char *argv[]);
|
||||
|
||||
int cmd_unlock(int argc, char *argv[]);
|
||||
|
@ -176,26 +176,47 @@ void bch_sb_crypt_init(struct bch_sb *sb,
|
||||
struct bch_sb_field_crypt *crypt,
|
||||
const char *passphrase)
|
||||
{
|
||||
struct bch_key key;
|
||||
get_random_bytes(&key, sizeof(key));
|
||||
|
||||
crypt->key.magic = BCH_KEY_MAGIC;
|
||||
get_random_bytes(&crypt->key.key, sizeof(crypt->key.key));
|
||||
crypt->key.key = key;
|
||||
|
||||
if (passphrase) {
|
||||
bch_crypt_update_passphrase(sb, crypt, &key, passphrase);
|
||||
}
|
||||
|
||||
void bch_crypt_update_passphrase(
|
||||
struct bch_sb *sb,
|
||||
struct bch_sb_field_crypt *crypt,
|
||||
struct bch_key *key,
|
||||
const char *new_passphrase)
|
||||
{
|
||||
|
||||
struct bch_encrypted_key new_key;
|
||||
new_key.magic = BCH_KEY_MAGIC;
|
||||
new_key.key = *key;
|
||||
|
||||
if(!new_passphrase) {
|
||||
crypt->key = new_key;
|
||||
return;
|
||||
}
|
||||
|
||||
// If crypt already has an encrypted key reuse it's encryption params
|
||||
if (!bch2_key_is_encrypted(&crypt->key)) {
|
||||
SET_BCH_CRYPT_KDF_TYPE(crypt, BCH_KDF_SCRYPT);
|
||||
SET_BCH_KDF_SCRYPT_N(crypt, ilog2(16384));
|
||||
SET_BCH_KDF_SCRYPT_R(crypt, ilog2(8));
|
||||
SET_BCH_KDF_SCRYPT_P(crypt, ilog2(16));
|
||||
|
||||
struct bch_key passphrase_key = derive_passphrase(crypt, passphrase);
|
||||
|
||||
assert(!bch2_key_is_encrypted(&crypt->key));
|
||||
|
||||
if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(sb),
|
||||
&crypt->key, sizeof(crypt->key)))
|
||||
die("error encrypting key");
|
||||
|
||||
assert(bch2_key_is_encrypted(&crypt->key));
|
||||
|
||||
memzero_explicit(&passphrase_key, sizeof(passphrase_key));
|
||||
}
|
||||
|
||||
struct bch_key passphrase_key = derive_passphrase(crypt, new_passphrase);
|
||||
|
||||
if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(sb),
|
||||
&new_key, sizeof(new_key)))
|
||||
die("error encrypting key");
|
||||
|
||||
memzero_explicit(&passphrase_key, sizeof(passphrase_key));
|
||||
|
||||
crypt->key = new_key;
|
||||
assert(bch2_key_is_encrypted(&crypt->key));
|
||||
}
|
||||
|
@ -19,4 +19,7 @@ void bch2_add_key(struct bch_sb *, const char *, const char *, const char *);
|
||||
void bch_sb_crypt_init(struct bch_sb *sb, struct bch_sb_field_crypt *,
|
||||
const char *);
|
||||
|
||||
void bch_crypt_update_passphrase(struct bch_sb *sb, struct bch_sb_field_crypt *crypt,
|
||||
struct bch_key *key, const char *new_passphrase);
|
||||
|
||||
#endif /* _CRYPTO_H */
|
||||
|
@ -16,6 +16,8 @@
|
||||
|
||||
#include <uuid/uuid.h>
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include "libbcachefs.h"
|
||||
#include "crypto.h"
|
||||
#include "libbcachefs/bcachefs_format.h"
|
||||
@ -31,10 +33,10 @@
|
||||
|
||||
#define NSEC_PER_SEC 1000000000L
|
||||
|
||||
static void init_layout(struct bch_sb_layout *l,
|
||||
unsigned block_size,
|
||||
unsigned sb_size,
|
||||
u64 sb_start, u64 sb_end)
|
||||
void bch2_sb_layout_init(struct bch_sb_layout *l,
|
||||
unsigned block_size,
|
||||
unsigned sb_size,
|
||||
u64 sb_start, u64 sb_end)
|
||||
{
|
||||
u64 sb_pos = sb_start;
|
||||
unsigned i;
|
||||
@ -60,83 +62,86 @@ static void init_layout(struct bch_sb_layout *l,
|
||||
sb_start, sb_pos, sb_end, sb_size);
|
||||
}
|
||||
|
||||
/* minimum size filesystem we can create, given a bucket size: */
|
||||
static u64 min_size(unsigned bucket_size)
|
||||
static u64 dev_max_bucket_size(u64 dev_size)
|
||||
{
|
||||
return BCH_MIN_NR_NBUCKETS * bucket_size;
|
||||
return rounddown_pow_of_two(dev_size / (BCH_MIN_NR_NBUCKETS * 4));
|
||||
}
|
||||
|
||||
u64 bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev)
|
||||
u64 bch2_pick_bucket_size(struct bch_opts opts, dev_opts_list devs)
|
||||
{
|
||||
u64 bucket_size;
|
||||
|
||||
if (dev->size < min_size(opts.block_size))
|
||||
die("cannot format %s, too small (%llu bytes, min %llu)",
|
||||
dev->path, dev->size, min_size(opts.block_size));
|
||||
|
||||
/* Bucket size must be >= block size: */
|
||||
bucket_size = opts.block_size;
|
||||
u64 bucket_size = opts.block_size;
|
||||
|
||||
/* Bucket size must be >= btree node size: */
|
||||
if (opt_defined(opts, btree_node_size))
|
||||
bucket_size = max_t(unsigned, bucket_size,
|
||||
opts.btree_node_size);
|
||||
bucket_size = max_t(u64, bucket_size, opts.btree_node_size);
|
||||
|
||||
/* Want a bucket size of at least 128k, if possible: */
|
||||
bucket_size = max(bucket_size, 128ULL << 10);
|
||||
u64 min_dev_size = BCH_MIN_NR_NBUCKETS * bucket_size;
|
||||
darray_for_each(devs, i)
|
||||
if (i->fs_size < min_dev_size)
|
||||
die("cannot format %s, too small (%llu bytes, min %llu)",
|
||||
i->path, i->fs_size, min_dev_size);
|
||||
|
||||
if (dev->size >= min_size(bucket_size)) {
|
||||
unsigned scale = max(1,
|
||||
ilog2(dev->size / min_size(bucket_size)) / 4);
|
||||
u64 total_fs_size = 0;
|
||||
darray_for_each(devs, i)
|
||||
total_fs_size += i->fs_size;
|
||||
|
||||
scale = rounddown_pow_of_two(scale);
|
||||
struct sysinfo info;
|
||||
si_meminfo(&info);
|
||||
|
||||
/* max bucket size 1 mb */
|
||||
bucket_size = min(bucket_size * scale, 1ULL << 20);
|
||||
} else {
|
||||
do {
|
||||
bucket_size /= 2;
|
||||
} while (dev->size < min_size(bucket_size));
|
||||
}
|
||||
/*
|
||||
* Large fudge factor to allow for other fsck processes and devices
|
||||
* being added after creation
|
||||
*/
|
||||
u64 mem_available_for_fsck = info.totalram / 8;
|
||||
u64 buckets_can_fsck = mem_available_for_fsck / (sizeof(struct bucket) * 1.5);
|
||||
u64 mem_lower_bound = roundup_pow_of_two(total_fs_size / buckets_can_fsck);
|
||||
|
||||
/*
|
||||
* Lower bound to avoid fragmenting encoded (checksummed, compressed)
|
||||
* extents too much as they're moved:
|
||||
*/
|
||||
bucket_size = max(bucket_size, opt_get(opts, encoded_extent_max) * 4);
|
||||
|
||||
/* Lower bound to ensure we can fsck: */
|
||||
bucket_size = max(bucket_size, mem_lower_bound);
|
||||
|
||||
u64 perf_lower_bound = min(2ULL << 20, total_fs_size / (1ULL << 20));
|
||||
|
||||
/* We also prefer larger buckets for performance, up to 2MB at 2T */
|
||||
bucket_size = max(bucket_size, perf_lower_bound);
|
||||
|
||||
return bucket_size;
|
||||
}
|
||||
|
||||
void bch2_check_bucket_size(struct bch_opts opts, struct dev_opts *dev)
|
||||
{
|
||||
if (dev->bucket_size < opts.block_size)
|
||||
die("Bucket size (%llu) cannot be smaller than block size (%u)",
|
||||
dev->bucket_size, opts.block_size);
|
||||
if (dev->opts.bucket_size < opts.block_size)
|
||||
die("Bucket size (%u) cannot be smaller than block size (%u)",
|
||||
dev->opts.bucket_size, opts.block_size);
|
||||
|
||||
if (opt_defined(opts, btree_node_size) &&
|
||||
dev->bucket_size < opts.btree_node_size)
|
||||
die("Bucket size (%llu) cannot be smaller than btree node size (%u)",
|
||||
dev->bucket_size, opts.btree_node_size);
|
||||
dev->opts.bucket_size < opts.btree_node_size)
|
||||
die("Bucket size (%u) cannot be smaller than btree node size (%u)",
|
||||
dev->opts.bucket_size, opts.btree_node_size);
|
||||
|
||||
if (dev->nbuckets < BCH_MIN_NR_NBUCKETS)
|
||||
die("Not enough buckets: %llu, need %u (bucket size %llu)",
|
||||
dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->bucket_size);
|
||||
|
||||
if (dev->bucket_size > (u32) U16_MAX << 9)
|
||||
die("Bucket size (%llu) too big (max %u)",
|
||||
dev->bucket_size, (u32) U16_MAX << 9);
|
||||
die("Not enough buckets: %llu, need %u (bucket size %u)",
|
||||
dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->opts.bucket_size);
|
||||
}
|
||||
|
||||
static unsigned parse_target(struct bch_sb_handle *sb,
|
||||
struct dev_opts *devs, size_t nr_devs,
|
||||
dev_opts_list devs,
|
||||
const char *s)
|
||||
{
|
||||
struct dev_opts *i;
|
||||
int idx;
|
||||
|
||||
if (!s)
|
||||
return 0;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
darray_for_each(devs, i)
|
||||
if (!strcmp(s, i->path))
|
||||
return dev_to_target(i - devs);
|
||||
return dev_to_target(i - devs.data);
|
||||
|
||||
idx = bch2_disk_path_find(sb, s);
|
||||
int idx = bch2_disk_path_find(sb, s);
|
||||
if (idx >= 0)
|
||||
return group_to_target(idx);
|
||||
|
||||
@ -144,56 +149,61 @@ static unsigned parse_target(struct bch_sb_handle *sb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bch2_opt_set_sb_all(struct bch_sb *sb, int dev_idx, struct bch_opts *opts)
|
||||
{
|
||||
for (unsigned id = 0; id < bch2_opts_nr; id++) {
|
||||
u64 v = bch2_opt_defined_by_id(opts, id)
|
||||
? bch2_opt_get_by_id(opts, id)
|
||||
: bch2_opt_get_by_id(&bch2_opts_default, id);
|
||||
|
||||
__bch2_opt_set_sb(sb, dev_idx, &bch2_opt_table[id], v);
|
||||
}
|
||||
}
|
||||
|
||||
struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
struct bch_opts fs_opts,
|
||||
struct format_opts opts,
|
||||
struct dev_opts *devs,
|
||||
size_t nr_devs)
|
||||
dev_opts_list devs)
|
||||
{
|
||||
struct bch_sb_handle sb = { NULL };
|
||||
struct dev_opts *i;
|
||||
unsigned max_dev_block_size = 0;
|
||||
unsigned opt_id;
|
||||
u64 min_bucket_size = U64_MAX;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
darray_for_each(devs, i)
|
||||
max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd));
|
||||
|
||||
/* calculate block size: */
|
||||
if (!opt_defined(fs_opts, block_size)) {
|
||||
if (!opt_defined(fs_opts, block_size))
|
||||
opt_set(fs_opts, block_size, max_dev_block_size);
|
||||
} else if (fs_opts.block_size < max_dev_block_size)
|
||||
|
||||
if (fs_opts.block_size < max_dev_block_size)
|
||||
die("blocksize too small: %u, must be greater than device blocksize %u",
|
||||
fs_opts.block_size, max_dev_block_size);
|
||||
|
||||
/* get device size, if it wasn't specified: */
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
if (!i->size)
|
||||
i->size = get_size(i->bdev->bd_fd);
|
||||
darray_for_each(devs, i)
|
||||
if (!i->fs_size)
|
||||
i->fs_size = get_size(i->bdev->bd_fd);
|
||||
|
||||
/* calculate bucket sizes: */
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
min_bucket_size = min(min_bucket_size,
|
||||
i->bucket_size ?: bch2_pick_bucket_size(fs_opts, i));
|
||||
u64 fs_bucket_size = bch2_pick_bucket_size(fs_opts, devs);
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
if (!i->bucket_size)
|
||||
i->bucket_size = min_bucket_size;
|
||||
darray_for_each(devs, i)
|
||||
if (!opt_defined(i->opts, bucket_size))
|
||||
opt_set(i->opts, bucket_size,
|
||||
min(fs_bucket_size, dev_max_bucket_size(i->fs_size)));
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
i->nbuckets = i->size / i->bucket_size;
|
||||
darray_for_each(devs, i) {
|
||||
i->nbuckets = i->fs_size / i->opts.bucket_size;
|
||||
bch2_check_bucket_size(fs_opts, i);
|
||||
}
|
||||
|
||||
/* calculate btree node size: */
|
||||
if (!opt_defined(fs_opts, btree_node_size)) {
|
||||
/* 256k default btree node size */
|
||||
opt_set(fs_opts, btree_node_size, 256 << 10);
|
||||
unsigned s = bch2_opts_default.btree_node_size;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
fs_opts.btree_node_size =
|
||||
min_t(unsigned, fs_opts.btree_node_size,
|
||||
i->bucket_size);
|
||||
darray_for_each(devs, i)
|
||||
s = min(s, i->opts.bucket_size);
|
||||
opt_set(fs_opts, btree_node_size, s);
|
||||
}
|
||||
|
||||
if (uuid_is_null(opts.uuid.b))
|
||||
@ -206,7 +216,8 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
sb.sb->version_min = le16_to_cpu(opts.version);
|
||||
sb.sb->magic = BCHFS_MAGIC;
|
||||
sb.sb->user_uuid = opts.uuid;
|
||||
sb.sb->nr_devices = nr_devs;
|
||||
sb.sb->nr_devices = devs.nr;
|
||||
SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb.sb, opts.version);
|
||||
|
||||
if (opts.version == bcachefs_metadata_version_current)
|
||||
sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
|
||||
@ -218,17 +229,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
opts.label,
|
||||
min(strlen(opts.label), sizeof(sb.sb->label)));
|
||||
|
||||
for (opt_id = 0;
|
||||
opt_id < bch2_opts_nr;
|
||||
opt_id++) {
|
||||
u64 v;
|
||||
|
||||
v = bch2_opt_defined_by_id(&fs_opts, opt_id)
|
||||
? bch2_opt_get_by_id(&fs_opts, opt_id)
|
||||
: bch2_opt_get_by_id(&bch2_opts_default, opt_id);
|
||||
|
||||
__bch2_opt_set_sb(sb.sb, -1, &bch2_opt_table[opt_id], v);
|
||||
}
|
||||
bch2_opt_set_sb_all(sb.sb, -1, &fs_opts);
|
||||
|
||||
struct timespec now;
|
||||
if (clock_gettime(CLOCK_REALTIME, &now))
|
||||
@ -240,31 +241,25 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
/* Member info: */
|
||||
struct bch_sb_field_members_v2 *mi =
|
||||
bch2_sb_field_resize(&sb, members_v2,
|
||||
(sizeof(*mi) + sizeof(struct bch_member) *
|
||||
nr_devs) / sizeof(u64));
|
||||
(sizeof(*mi) + sizeof(struct bch_member) * devs.nr) / sizeof(u64));
|
||||
|
||||
mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
struct bch_member *m = bch2_members_v2_get_mut(sb.sb, (i - devs));
|
||||
darray_for_each(devs, i) {
|
||||
unsigned idx = i - devs.data;
|
||||
struct bch_member *m = bch2_members_v2_get_mut(sb.sb, idx);
|
||||
|
||||
uuid_generate(m->uuid.b);
|
||||
m->nbuckets = cpu_to_le64(i->nbuckets);
|
||||
m->first_bucket = 0;
|
||||
m->bucket_size = cpu_to_le16(i->bucket_size >> 9);
|
||||
|
||||
SET_BCH_MEMBER_DISCARD(m, i->discard);
|
||||
SET_BCH_MEMBER_DATA_ALLOWED(m, i->data_allowed);
|
||||
SET_BCH_MEMBER_DURABILITY(m, i->durability + 1);
|
||||
bch2_opt_set_sb_all(sb.sb, idx, &i->opts);
|
||||
}
|
||||
|
||||
/* Disk labels*/
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
struct bch_member *m;
|
||||
int idx;
|
||||
|
||||
darray_for_each(devs, i) {
|
||||
if (!i->label)
|
||||
continue;
|
||||
|
||||
idx = bch2_disk_path_find_or_create(&sb, i->label);
|
||||
int idx = bch2_disk_path_find_or_create(&sb, i->label);
|
||||
if (idx < 0)
|
||||
die("error creating disk path: %s", strerror(-idx));
|
||||
|
||||
@ -272,18 +267,18 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
* Recompute mi and m after each sb modification: its location
|
||||
* in memory may have changed due to reallocation.
|
||||
*/
|
||||
m = bch2_members_v2_get_mut(sb.sb, (i - devs));
|
||||
struct bch_member *m = bch2_members_v2_get_mut(sb.sb, (i - devs.data));
|
||||
SET_BCH_MEMBER_GROUP(m, idx + 1);
|
||||
}
|
||||
|
||||
SET_BCH_SB_FOREGROUND_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.foreground_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.foreground_target));
|
||||
SET_BCH_SB_BACKGROUND_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.background_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.background_target));
|
||||
SET_BCH_SB_PROMOTE_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.promote_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.promote_target));
|
||||
SET_BCH_SB_METADATA_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.metadata_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.metadata_target));
|
||||
|
||||
/* Crypt: */
|
||||
if (opts.encrypted) {
|
||||
@ -296,19 +291,19 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
|
||||
bch2_sb_members_cpy_v2_v1(&sb);
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
u64 size_sectors = i->size >> 9;
|
||||
darray_for_each(devs, i) {
|
||||
u64 size_sectors = i->fs_size >> 9;
|
||||
|
||||
sb.sb->dev_idx = i - devs;
|
||||
sb.sb->dev_idx = i - devs.data;
|
||||
|
||||
if (!i->sb_offset) {
|
||||
i->sb_offset = BCH_SB_SECTOR;
|
||||
i->sb_end = size_sectors;
|
||||
}
|
||||
|
||||
init_layout(&sb.sb->layout, fs_opts.block_size,
|
||||
opts.superblock_size,
|
||||
i->sb_offset, i->sb_end);
|
||||
bch2_sb_layout_init(&sb.sb->layout, fs_opts.block_size,
|
||||
opts.superblock_size,
|
||||
i->sb_offset, i->sb_end);
|
||||
|
||||
/*
|
||||
* Also create a backup superblock at the end of the disk:
|
||||
@ -321,7 +316,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
struct bch_sb_layout *l = &sb.sb->layout;
|
||||
u64 backup_sb = size_sectors - (1 << l->sb_max_size_bits);
|
||||
|
||||
backup_sb = rounddown(backup_sb, i->bucket_size >> 9);
|
||||
backup_sb = rounddown(backup_sb, i->opts.bucket_size >> 9);
|
||||
l->sb_offset[l->nr_superblocks++] = cpu_to_le64(backup_sb);
|
||||
}
|
||||
|
||||
@ -352,9 +347,9 @@ void bch2_super_write(int fd, struct bch_sb *sb)
|
||||
if (sb->offset == BCH_SB_SECTOR) {
|
||||
/* Write backup layout */
|
||||
|
||||
BUG_ON(bs > 4096);
|
||||
unsigned buflen = max(bs, 4096);
|
||||
|
||||
char *buf = aligned_alloc(bs, bs);
|
||||
char *buf = aligned_alloc(buflen, buflen);
|
||||
xpread(fd, buf, bs, 4096 - bs);
|
||||
memcpy(buf + bs - sizeof(sb->layout),
|
||||
&sb->layout,
|
||||
@ -411,43 +406,107 @@ void bcache_fs_close(struct bchfs_handle fs)
|
||||
close(fs.sysfs_fd);
|
||||
}
|
||||
|
||||
struct bchfs_handle bcache_fs_open(const char *path)
|
||||
static int bcache_fs_open_by_uuid(const char *uuid_str, struct bchfs_handle *fs)
|
||||
{
|
||||
struct bchfs_handle ret;
|
||||
if (uuid_parse(uuid_str, fs->uuid.b))
|
||||
return -1;
|
||||
|
||||
if (!uuid_parse(path, ret.uuid.b)) {
|
||||
/* It's a UUID, look it up in sysfs: */
|
||||
char *sysfs = mprintf(SYSFS_BASE "%s", path);
|
||||
ret.sysfs_fd = xopen(sysfs, O_RDONLY);
|
||||
char *sysfs = mprintf(SYSFS_BASE "%s", uuid_str);
|
||||
fs->sysfs_fd = open(sysfs, O_RDONLY);
|
||||
free(sysfs);
|
||||
|
||||
char *minor = read_file_str(ret.sysfs_fd, "minor");
|
||||
char *ctl = mprintf("/dev/bcachefs%s-ctl", minor);
|
||||
ret.ioctl_fd = xopen(ctl, O_RDWR);
|
||||
if (fs->sysfs_fd < 0)
|
||||
return -errno;
|
||||
|
||||
free(sysfs);
|
||||
free(minor);
|
||||
free(ctl);
|
||||
} else {
|
||||
/* It's a path: */
|
||||
ret.ioctl_fd = open(path, O_RDONLY);
|
||||
if (ret.ioctl_fd < 0)
|
||||
die("Error opening filesystem at %s: %m", path);
|
||||
char *minor = read_file_str(fs->sysfs_fd, "minor");
|
||||
char *ctl = mprintf("/dev/bcachefs%s-ctl", minor);
|
||||
fs->ioctl_fd = open(ctl, O_RDWR);
|
||||
free(minor);
|
||||
free(ctl);
|
||||
|
||||
struct bch_ioctl_query_uuid uuid;
|
||||
if (ioctl(ret.ioctl_fd, BCH_IOCTL_QUERY_UUID, &uuid) < 0)
|
||||
die("error opening %s: not a bcachefs filesystem", path);
|
||||
return fs->ioctl_fd < 0 ? -errno : 0;
|
||||
}
|
||||
|
||||
ret.uuid = uuid.uuid;
|
||||
int bcache_fs_open_fallible(const char *path, struct bchfs_handle *fs)
|
||||
{
|
||||
memset(fs, 0, sizeof(*fs));
|
||||
fs->dev_idx = -1;
|
||||
|
||||
if (!uuid_parse(path, fs->uuid.b))
|
||||
return bcache_fs_open_by_uuid(path, fs);
|
||||
|
||||
/* It's a path: */
|
||||
int path_fd = open(path, O_RDONLY);
|
||||
if (path_fd < 0)
|
||||
return -errno;
|
||||
|
||||
struct bch_ioctl_query_uuid uuid;
|
||||
if (!ioctl(path_fd, BCH_IOCTL_QUERY_UUID, &uuid)) {
|
||||
/* It's a path to the mounted filesystem: */
|
||||
fs->ioctl_fd = path_fd;
|
||||
|
||||
fs->uuid = uuid.uuid;
|
||||
|
||||
char uuid_str[40];
|
||||
uuid_unparse(uuid.uuid.b, uuid_str);
|
||||
|
||||
char *sysfs = mprintf(SYSFS_BASE "%s", uuid_str);
|
||||
ret.sysfs_fd = xopen(sysfs, O_RDONLY);
|
||||
fs->sysfs_fd = xopen(sysfs, O_RDONLY);
|
||||
free(sysfs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
char buf[1024], *uuid_str;
|
||||
|
||||
struct stat stat = xstat(path);
|
||||
close(path_fd);
|
||||
|
||||
if (S_ISBLK(stat.st_mode)) {
|
||||
char *sysfs = mprintf("/sys/dev/block/%u:%u/bcachefs",
|
||||
major(stat.st_rdev),
|
||||
minor(stat.st_rdev));
|
||||
|
||||
ssize_t len = readlink(sysfs, buf, sizeof(buf));
|
||||
free(sysfs);
|
||||
|
||||
if (len <= 0)
|
||||
goto read_super;
|
||||
|
||||
char *p = strrchr(buf, '/');
|
||||
if (!p || sscanf(p + 1, "dev-%u", &fs->dev_idx) != 1)
|
||||
die("error parsing sysfs");
|
||||
|
||||
*p = '\0';
|
||||
p = strrchr(buf, '/');
|
||||
uuid_str = p + 1;
|
||||
} else {
|
||||
read_super:
|
||||
opt_set(opts, noexcl, true);
|
||||
opt_set(opts, nochanges, true);
|
||||
|
||||
struct bch_sb_handle sb;
|
||||
int ret = bch2_read_super(path, &opts, &sb);
|
||||
if (ret)
|
||||
die("Error opening %s: %s", path, strerror(-ret));
|
||||
|
||||
fs->dev_idx = sb.sb->dev_idx;
|
||||
uuid_str = buf;
|
||||
uuid_unparse(sb.sb->user_uuid.b, uuid_str);
|
||||
|
||||
bch2_free_super(&sb);
|
||||
}
|
||||
|
||||
return bcache_fs_open_by_uuid(uuid_str, fs);
|
||||
}
|
||||
|
||||
struct bchfs_handle bcache_fs_open(const char *path)
|
||||
{
|
||||
struct bchfs_handle fs;
|
||||
int ret = bcache_fs_open_fallible(path, &fs);
|
||||
if (ret)
|
||||
die("Error opening filesystem at %s: %s", path, strerror(-ret));
|
||||
return fs;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -523,7 +582,7 @@ int bchu_data(struct bchfs_handle fs, struct bch_ioctl_data cmd)
|
||||
if (e.type)
|
||||
continue;
|
||||
|
||||
if (e.p.data_type == U8_MAX)
|
||||
if (e.ret || e.p.data_type == U8_MAX)
|
||||
break;
|
||||
|
||||
printf("\33[2K\r");
|
||||
@ -554,6 +613,8 @@ int bchu_data(struct bchfs_handle fs, struct bch_ioctl_data cmd)
|
||||
|
||||
/* option parsing */
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
void bch2_opt_strs_free(struct bch_opt_strs *opts)
|
||||
{
|
||||
unsigned i;
|
||||
@ -564,6 +625,64 @@ void bch2_opt_strs_free(struct bch_opt_strs *opts)
|
||||
}
|
||||
}
|
||||
|
||||
static bool opt_type_filter(const struct bch_option *opt, unsigned opt_types)
|
||||
{
|
||||
if (!(opt->flags & opt_types))
|
||||
return false;
|
||||
|
||||
if ((opt_types & OPT_FORMAT) &&
|
||||
!opt->set_sb && !opt->set_member)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const struct bch_option *bch2_cmdline_opt_parse(int argc, char *argv[],
|
||||
unsigned opt_types)
|
||||
{
|
||||
if (optind >= argc)
|
||||
return NULL;
|
||||
|
||||
if (argv[optind][0] != '-' ||
|
||||
argv[optind][1] != '-')
|
||||
return NULL;
|
||||
|
||||
char *optstr = strdup(argv[optind] + 2);
|
||||
optarg = argv[optind + 1];
|
||||
|
||||
char *eq = strchr(optstr, '=');
|
||||
if (eq) {
|
||||
*eq = '\0';
|
||||
optarg = eq + 1;
|
||||
}
|
||||
|
||||
if (!optarg)
|
||||
optarg = "1";
|
||||
|
||||
|
||||
int optid = bch2_opt_lookup(optstr);
|
||||
if (optid < 0)
|
||||
goto noopt;
|
||||
|
||||
const struct bch_option *opt = bch2_opt_table + optid;
|
||||
if (!opt_type_filter(opt, opt_types))
|
||||
goto noopt;
|
||||
|
||||
optind++;
|
||||
|
||||
if (opt->type != BCH_OPT_BOOL) {
|
||||
if (optarg == argv[optind])
|
||||
optind++;
|
||||
} else {
|
||||
optarg = NULL;
|
||||
}
|
||||
|
||||
return opt;
|
||||
noopt:
|
||||
free(optstr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct bch_opt_strs bch2_cmdline_opts_get(int *argc, char *argv[],
|
||||
unsigned opt_types)
|
||||
{
|
||||
@ -651,19 +770,17 @@ struct bch_opts bch2_parse_opts(struct bch_opt_strs strs)
|
||||
#define newline(c) \
|
||||
do { \
|
||||
printf("\n"); \
|
||||
c = 0; \
|
||||
c = 0; \
|
||||
} while(0)
|
||||
void bch2_opts_usage(unsigned opt_types)
|
||||
{
|
||||
const struct bch_option *opt;
|
||||
unsigned i, c = 0, helpcol = 30;
|
||||
|
||||
|
||||
|
||||
for (opt = bch2_opt_table;
|
||||
opt < bch2_opt_table + bch2_opts_nr;
|
||||
opt++) {
|
||||
if (!(opt->flags & opt_types))
|
||||
if (!opt_type_filter(opt, opt_types))
|
||||
continue;
|
||||
|
||||
c += printf(" --%s", opt->attr.name);
|
||||
@ -733,6 +850,8 @@ dev_names bchu_fs_get_devices(struct bchfs_handle fs)
|
||||
if (r > 0) {
|
||||
sysfs_block_buf[r] = '\0';
|
||||
n.dev = strdup(basename(sysfs_block_buf));
|
||||
} else {
|
||||
n.dev = mprintf("(offline dev %u)", n.idx);
|
||||
}
|
||||
|
||||
free(block_attr);
|
||||
@ -752,3 +871,11 @@ dev_names bchu_fs_get_devices(struct bchfs_handle fs)
|
||||
|
||||
return devs;
|
||||
}
|
||||
|
||||
struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx)
|
||||
{
|
||||
darray_for_each(*dev_names, dev)
|
||||
if (dev->idx == idx)
|
||||
return dev;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -28,6 +28,9 @@ struct {
|
||||
};
|
||||
|
||||
void bch2_opt_strs_free(struct bch_opt_strs *);
|
||||
|
||||
const struct bch_option *bch2_cmdline_opt_parse(int argc, char *argv[],
|
||||
unsigned opt_types);
|
||||
struct bch_opt_strs bch2_cmdline_opts_get(int *, char *[], unsigned);
|
||||
struct bch_opts bch2_parse_opts(struct bch_opt_strs);
|
||||
void bch2_opts_usage(unsigned);
|
||||
@ -42,14 +45,18 @@ struct format_opts {
|
||||
char *source;
|
||||
};
|
||||
|
||||
static inline unsigned bcachefs_kernel_version(void)
|
||||
{
|
||||
return !access("/sys/module/bcachefs/parameters/version", R_OK)
|
||||
? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
|
||||
: 0;
|
||||
}
|
||||
|
||||
static inline struct format_opts format_opts_default()
|
||||
{
|
||||
unsigned version = !access( "/sys/module/bcachefs/parameters/version", R_OK)
|
||||
? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
|
||||
: bcachefs_metadata_version_current;
|
||||
|
||||
return (struct format_opts) {
|
||||
.version = version,
|
||||
.version = bcachefs_kernel_version() ?:
|
||||
bcachefs_metadata_version_current,
|
||||
.superblock_size = SUPERBLOCK_SIZE_DEFAULT,
|
||||
};
|
||||
}
|
||||
@ -58,33 +65,35 @@ struct dev_opts {
|
||||
struct file *file;
|
||||
struct block_device *bdev;
|
||||
char *path;
|
||||
u64 size; /* bytes*/
|
||||
u64 bucket_size; /* bytes */
|
||||
const char *label;
|
||||
unsigned data_allowed;
|
||||
unsigned durability;
|
||||
bool discard;
|
||||
|
||||
u64 nbuckets;
|
||||
|
||||
u64 sb_offset;
|
||||
u64 sb_end;
|
||||
|
||||
u64 nbuckets;
|
||||
u64 fs_size;
|
||||
|
||||
const char *label; /* make this a bch_opt */
|
||||
|
||||
struct bch_opts opts;
|
||||
};
|
||||
|
||||
typedef DARRAY(struct dev_opts) dev_opts_list;
|
||||
|
||||
static inline struct dev_opts dev_opts_default()
|
||||
{
|
||||
return (struct dev_opts) {
|
||||
.data_allowed = ~0U << 2,
|
||||
.durability = 1,
|
||||
};
|
||||
return (struct dev_opts) { .opts = bch2_opts_empty() };
|
||||
}
|
||||
|
||||
u64 bch2_pick_bucket_size(struct bch_opts, struct dev_opts *);
|
||||
void bch2_sb_layout_init(struct bch_sb_layout *,
|
||||
unsigned, unsigned, u64, u64);
|
||||
|
||||
u64 bch2_pick_bucket_size(struct bch_opts, dev_opts_list);
|
||||
void bch2_check_bucket_size(struct bch_opts, struct dev_opts *);
|
||||
|
||||
struct bch_sb *bch2_format(struct bch_opt_strs,
|
||||
struct bch_opts,
|
||||
struct format_opts, struct dev_opts *, size_t);
|
||||
struct format_opts,
|
||||
dev_opts_list devs);
|
||||
|
||||
void bch2_super_write(int, struct bch_sb *);
|
||||
struct bch_sb *__bch2_super_read(int, u64);
|
||||
@ -97,11 +106,16 @@ struct bchfs_handle {
|
||||
__uuid_t uuid;
|
||||
int ioctl_fd;
|
||||
int sysfs_fd;
|
||||
int dev_idx;
|
||||
};
|
||||
|
||||
void bcache_fs_close(struct bchfs_handle);
|
||||
|
||||
int bcache_fs_open_fallible(const char *, struct bchfs_handle *);
|
||||
|
||||
struct bchfs_handle bcache_fs_open(const char *);
|
||||
struct bchfs_handle bchu_fs_open_by_dev(const char *, int *);
|
||||
|
||||
int bchu_dev_path_to_idx(struct bchfs_handle, const char *);
|
||||
|
||||
static inline void bchu_disk_add(struct bchfs_handle fs, char *dev)
|
||||
@ -296,5 +310,6 @@ struct dev_name {
|
||||
typedef DARRAY(struct dev_name) dev_names;
|
||||
|
||||
dev_names bchu_fs_get_devices(struct bchfs_handle);
|
||||
struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx);
|
||||
|
||||
#endif /* _LIBBCACHE_H */
|
||||
|
@ -1,12 +1,13 @@
|
||||
#include <dirent.h>
|
||||
#include <sys/xattr.h>
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/xattr.h>
|
||||
|
||||
#include "posix_to_bcachefs.h"
|
||||
#include "libbcachefs/alloc_foreground.h"
|
||||
#include "libbcachefs/buckets.h"
|
||||
#include "libbcachefs/fs-common.h"
|
||||
#include "libbcachefs/io_write.h"
|
||||
#include "libbcachefs/namei.h"
|
||||
#include "libbcachefs/str_hash.h"
|
||||
#include "libbcachefs/xattr.h"
|
||||
|
||||
@ -32,7 +33,7 @@ void create_link(struct bch_fs *c,
|
||||
struct bch_inode_unpacked parent_u;
|
||||
struct bch_inode_unpacked inode;
|
||||
|
||||
int ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
int ret = bch2_trans_commit_do(c, NULL, NULL, 0,
|
||||
bch2_link_trans(trans,
|
||||
(subvol_inum) { 1, parent->bi_inum }, &parent_u,
|
||||
(subvol_inum) { 1, inum }, &inode, &qstr));
|
||||
@ -51,7 +52,7 @@ struct bch_inode_unpacked create_file(struct bch_fs *c,
|
||||
|
||||
bch2_inode_init_early(c, &new_inode);
|
||||
|
||||
int ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
int ret = bch2_trans_commit_do(c, NULL, NULL, 0,
|
||||
bch2_create_trans(trans,
|
||||
(subvol_inum) { 1, parent->bi_inum }, parent,
|
||||
&new_inode, &qstr,
|
||||
@ -125,7 +126,7 @@ void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
if (IS_ERR(h))
|
||||
continue;
|
||||
|
||||
int ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
int ret = bch2_trans_commit_do(c, NULL, NULL, 0,
|
||||
bch2_xattr_set(trans,
|
||||
(subvol_inum) { 1, dst->bi_inum },
|
||||
dst, &hash_info, attr,
|
||||
@ -158,7 +159,7 @@ static void write_data(struct bch_fs *c,
|
||||
op.nr_replicas = 1;
|
||||
op.subvol = 1;
|
||||
op.pos = SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX);
|
||||
op.flags |= BCH_WRITE_SYNC;
|
||||
op.flags |= BCH_WRITE_sync;
|
||||
|
||||
int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
|
||||
c->opts.data_replicas, 0);
|
||||
@ -167,7 +168,7 @@ static void write_data(struct bch_fs *c,
|
||||
|
||||
closure_call(&op.cl, bch2_write, NULL, NULL);
|
||||
|
||||
BUG_ON(!(op.flags & BCH_WRITE_SUBMITTED));
|
||||
BUG_ON(!(op.flags & BCH_WRITE_submitted));
|
||||
dst_inode->bi_sectors += len >> 9;
|
||||
|
||||
if (op.error)
|
||||
@ -263,7 +264,8 @@ void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
|
||||
static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
int src_fd, u64 src_size,
|
||||
char *src_path, struct copy_fs_state *s)
|
||||
char *src_path, struct copy_fs_state *s,
|
||||
u64 reserve_start)
|
||||
{
|
||||
struct fiemap_iter iter;
|
||||
struct fiemap_extent e;
|
||||
@ -294,11 +296,8 @@ static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* if the data is below 1 MB, copy it so it doesn't conflict
|
||||
* with bcachefs's potentially larger superblock:
|
||||
*/
|
||||
if (e.fe_physical < 1 << 20) {
|
||||
/* If the data is in bcachefs's superblock region, copy it: */
|
||||
if (e.fe_physical < reserve_start) {
|
||||
copy_data(c, dst, src_fd, e.fe_logical,
|
||||
e.fe_logical + min(src_size - e.fe_logical,
|
||||
e.fe_length));
|
||||
@ -317,7 +316,8 @@ static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
static void copy_dir(struct copy_fs_state *s,
|
||||
struct bch_fs *c,
|
||||
struct bch_inode_unpacked *dst,
|
||||
int src_fd, const char *src_path)
|
||||
int src_fd, const char *src_path,
|
||||
u64 reserve_start)
|
||||
{
|
||||
DIR *dir = fdopendir(src_fd);
|
||||
struct dirent *d;
|
||||
@ -368,7 +368,7 @@ static void copy_dir(struct copy_fs_state *s,
|
||||
switch (mode_to_type(stat.st_mode)) {
|
||||
case DT_DIR:
|
||||
fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
|
||||
copy_dir(s, c, &inode, fd, child_path);
|
||||
copy_dir(s, c, &inode, fd, child_path, reserve_start);
|
||||
close(fd);
|
||||
break;
|
||||
case DT_REG:
|
||||
@ -376,7 +376,7 @@ static void copy_dir(struct copy_fs_state *s,
|
||||
|
||||
fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
|
||||
copy_file(c, &inode, fd, stat.st_size,
|
||||
child_path, s);
|
||||
child_path, s, reserve_start);
|
||||
close(fd);
|
||||
break;
|
||||
case DT_LNK:
|
||||
@ -408,7 +408,8 @@ next:
|
||||
|
||||
static void reserve_old_fs_space(struct bch_fs *c,
|
||||
struct bch_inode_unpacked *root_inode,
|
||||
ranges *extents)
|
||||
ranges *extents,
|
||||
u64 reserve_start)
|
||||
{
|
||||
struct bch_dev *ca = c->devs[0];
|
||||
struct bch_inode_unpacked dst;
|
||||
@ -421,14 +422,20 @@ static void reserve_old_fs_space(struct bch_fs *c,
|
||||
|
||||
ranges_sort_merge(extents);
|
||||
|
||||
for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
|
||||
link_data(c, &dst, i.start, i.start, i.end - i.start);
|
||||
for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
|
||||
if (i.end <= reserve_start)
|
||||
continue;
|
||||
|
||||
u64 start = max(i.start, reserve_start);
|
||||
|
||||
link_data(c, &dst, start, start, i.end - start);
|
||||
}
|
||||
|
||||
update_inode(c, &dst);
|
||||
}
|
||||
|
||||
void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
struct copy_fs_state *s)
|
||||
struct copy_fs_state *s, u64 reserve_start)
|
||||
{
|
||||
syncfs(src_fd);
|
||||
|
||||
@ -447,10 +454,10 @@ void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
|
||||
|
||||
/* now, copy: */
|
||||
copy_dir(s, c, &root_inode, src_fd, src_path);
|
||||
copy_dir(s, c, &root_inode, src_fd, src_path, reserve_start);
|
||||
|
||||
if (BCH_MIGRATE_migrate == s->type)
|
||||
reserve_old_fs_space(c, &root_inode, &s->extents);
|
||||
reserve_old_fs_space(c, &root_inode, &s->extents, reserve_start);
|
||||
|
||||
update_inode(c, &root_inode);
|
||||
|
||||
|
@ -50,5 +50,5 @@ struct copy_fs_state {
|
||||
* initialized (`hardlinks` is initialized with zeroes).
|
||||
*/
|
||||
void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
struct copy_fs_state *s);
|
||||
struct copy_fs_state *s, u64);
|
||||
#endif /* _LIBBCACHE_H */
|
||||
|
@ -185,6 +185,24 @@ unsigned get_blocksize(int fd)
|
||||
/* Open a block device, do magic blkid stuff to probe for existing filesystems: */
|
||||
int open_for_format(struct dev_opts *dev, bool force)
|
||||
{
|
||||
int blkid_version_code = blkid_get_library_version(NULL, NULL);
|
||||
if (blkid_version_code < 2401) {
|
||||
if (force) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"Continuing with out of date libblkid %s because --force was passed.\n",
|
||||
BLKID_VERSION);
|
||||
} else {
|
||||
// Reference for picking 2.40.1:
|
||||
// https://mirrors.edge.kernel.org/pub/linux/utils/util-linux/v2.40/v2.40.1-ReleaseNotes
|
||||
// https://github.com/util-linux/util-linux/issues/3103
|
||||
die(
|
||||
"Refusing to format when using libblkid %s\n"
|
||||
"libblkid >= 2.40.1 is required to check for existing filesystems\n"
|
||||
"Earlier versions may not recognize some bcachefs filesystems.\n", BLKID_VERSION);
|
||||
}
|
||||
}
|
||||
|
||||
blkid_probe pr;
|
||||
const char *fs_type = NULL, *fs_label = NULL;
|
||||
size_t fs_type_len, fs_label_len;
|
||||
@ -708,6 +726,28 @@ struct bbpos_range bbpos_range_parse(char *buf)
|
||||
return (struct bbpos_range) { .start = start, .end = end };
|
||||
}
|
||||
|
||||
unsigned version_parse(char *buf)
|
||||
{
|
||||
char *s = buf;
|
||||
char *major_str = strsep(&s, ".");
|
||||
char *minor_str = strsep(&s, ".");
|
||||
|
||||
unsigned major, minor;
|
||||
|
||||
if (!minor_str) {
|
||||
major = 0;
|
||||
if (kstrtouint(major_str, 10, &minor))
|
||||
die("invalid version %s", buf);
|
||||
} else {
|
||||
|
||||
if (kstrtouint(major_str, 10, &major) ||
|
||||
kstrtouint(minor_str, 10, &minor))
|
||||
die("invalid version %s", buf);
|
||||
}
|
||||
|
||||
return BCH_VERSION(major, minor);
|
||||
}
|
||||
|
||||
darray_str get_or_split_cmdline_devs(int argc, char *argv[])
|
||||
{
|
||||
darray_str ret = {};
|
||||
|
@ -174,6 +174,8 @@ struct fiemap_extent fiemap_iter_next(struct fiemap_iter *);
|
||||
|
||||
char *strcmp_prefix(char *, const char *);
|
||||
|
||||
/* Avoid conflicts with libblkid's crc32 function in static builds */
|
||||
#define crc32c bch_crc32c
|
||||
u32 crc32c(u32, const void *, size_t);
|
||||
|
||||
char *dev_to_name(dev_t);
|
||||
@ -207,6 +209,8 @@ struct bbpos_range {
|
||||
|
||||
struct bbpos_range bbpos_range_parse(char *);
|
||||
|
||||
unsigned version_parse(char *);
|
||||
|
||||
darray_str get_or_split_cmdline_devs(int argc, char *argv[]);
|
||||
|
||||
#endif /* _TOOLS_UTIL_H */
|
||||
|
117
flake.lock
generated
117
flake.lock
generated
@ -1,17 +1,12 @@
|
||||
{
|
||||
"nodes": {
|
||||
"crane": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1721842668,
|
||||
"narHash": "sha256-k3oiD2z2AAwBFLa4+xfU+7G5fisRXfkvrMTCJrjZzXo=",
|
||||
"lastModified": 1742394900,
|
||||
"narHash": "sha256-vVOAp9ahvnU+fQoKd4SEXB2JG2wbENkpqcwlkIXgUC0=",
|
||||
"owner": "ipetkov",
|
||||
"repo": "crane",
|
||||
"rev": "529c1a0b1f29f0d78fa3086b8f6a134c71ef3aaf",
|
||||
"rev": "70947c1908108c0c551ddfd73d4f750ff2ea67cd",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -20,35 +15,14 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"fenix": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
],
|
||||
"rust-analyzer-src": "rust-analyzer-src"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722320953,
|
||||
"narHash": "sha256-DfGaJtgrzcwPQYLTvjL1KaVIjpvi85b2MpM6yEGvJzM=",
|
||||
"owner": "nix-community",
|
||||
"repo": "fenix",
|
||||
"rev": "483df76def3e5010d709aa3a0418ba2088503994",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-community",
|
||||
"repo": "fenix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-compat": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1696426674,
|
||||
"narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
|
||||
"lastModified": 1733328505,
|
||||
"narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=",
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
|
||||
"rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -62,11 +36,11 @@
|
||||
"nixpkgs-lib": "nixpkgs-lib"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1719994518,
|
||||
"narHash": "sha256-pQMhCCHyQGRzdfAkdJ4cIWiw+JNuWsTX7f0ZYSyz0VY=",
|
||||
"lastModified": 1741352980,
|
||||
"narHash": "sha256-+u2UunDA4Cl5Fci3m7S643HzKmIDAe+fiXrLqYsR2fs=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "flake-parts",
|
||||
"rev": "9227223f6d922fee3c7b190b2cc238a99527bbb7",
|
||||
"rev": "f4330d22f1c5d2ba72d3d22df5597d123fdb60a9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -75,13 +49,33 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nix-github-actions": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1737420293,
|
||||
"narHash": "sha256-F1G5ifvqTpJq7fdkT34e/Jy9VCyzd5XfJ9TO8fHhJWE=",
|
||||
"owner": "nix-community",
|
||||
"repo": "nix-github-actions",
|
||||
"rev": "f4158fa080ef4503c8f4c820967d946c2af31ec9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-community",
|
||||
"repo": "nix-github-actions",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1722185531,
|
||||
"narHash": "sha256-veKR07psFoJjINLC8RK4DiLniGGMgF3QMlS4tb74S6k=",
|
||||
"lastModified": 1742422364,
|
||||
"narHash": "sha256-mNqIplmEohk5jRkqYqG19GA8MbQ/D4gQSK0Mu4LvfRQ=",
|
||||
"owner": "nixos",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "52ec9ac3b12395ad677e8b62106f0b98c1f8569d",
|
||||
"rev": "a84ebe20c6bc2ecbcfb000a50776219f48d134cc",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -93,40 +87,47 @@
|
||||
},
|
||||
"nixpkgs-lib": {
|
||||
"locked": {
|
||||
"lastModified": 1719876945,
|
||||
"narHash": "sha256-Fm2rDDs86sHy0/1jxTOKB1118Q0O3Uc7EC0iXvXKpbI=",
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/5daf0514482af3f97abaefc78a6606365c9108e2.tar.gz"
|
||||
"lastModified": 1740877520,
|
||||
"narHash": "sha256-oiwv/ZK/2FhGxrCkQkB83i7GnWXPPLzoqFHpDD3uYpk=",
|
||||
"owner": "nix-community",
|
||||
"repo": "nixpkgs.lib",
|
||||
"rev": "147dee35aab2193b174e4c0868bd80ead5ce755c",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/5daf0514482af3f97abaefc78a6606365c9108e2.tar.gz"
|
||||
"owner": "nix-community",
|
||||
"repo": "nixpkgs.lib",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"crane": "crane",
|
||||
"fenix": "fenix",
|
||||
"flake-compat": "flake-compat",
|
||||
"flake-parts": "flake-parts",
|
||||
"nix-github-actions": "nix-github-actions",
|
||||
"nixpkgs": "nixpkgs",
|
||||
"rust-overlay": "rust-overlay",
|
||||
"treefmt-nix": "treefmt-nix"
|
||||
}
|
||||
},
|
||||
"rust-analyzer-src": {
|
||||
"flake": false,
|
||||
"rust-overlay": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722262053,
|
||||
"narHash": "sha256-KxjkPVn9rQqYam6DhiN/V2NcMXtYW25maxkJoiVMpmE=",
|
||||
"owner": "rust-lang",
|
||||
"repo": "rust-analyzer",
|
||||
"rev": "a021b85be57d34b1eed687fcafd5d5ec64b2d853",
|
||||
"lastModified": 1742524367,
|
||||
"narHash": "sha256-KzTwk/5ETJavJZYV1DEWdCx05M4duFCxCpRbQSKWpng=",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"rev": "70bf752d176b2ce07417e346d85486acea9040ef",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "rust-lang",
|
||||
"ref": "nightly",
|
||||
"repo": "rust-analyzer",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
@ -137,11 +138,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722330636,
|
||||
"narHash": "sha256-uru7JzOa33YlSRwf9sfXpJG+UAV+bnBEYMjrzKrQZFw=",
|
||||
"lastModified": 1742370146,
|
||||
"narHash": "sha256-XRE8hL4vKIQyVMDXykFh4ceo3KSpuJF3ts8GKwh5bIU=",
|
||||
"owner": "numtide",
|
||||
"repo": "treefmt-nix",
|
||||
"rev": "768acdb06968e53aa1ee8de207fd955335c754b7",
|
||||
"rev": "adc195eef5da3606891cedf80c0d9ce2d3190808",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
329
flake.nix
329
flake.nix
@ -11,13 +11,10 @@
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
crane = {
|
||||
url = "github:ipetkov/crane";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
crane.url = "github:ipetkov/crane";
|
||||
|
||||
fenix = {
|
||||
url = "github:nix-community/fenix";
|
||||
rust-overlay = {
|
||||
url = "github:oxalica/rust-overlay";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
@ -25,6 +22,11 @@
|
||||
url = "github:edolstra/flake-compat";
|
||||
flake = false;
|
||||
};
|
||||
|
||||
nix-github-actions = {
|
||||
url = "github:nix-community/nix-github-actions";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
};
|
||||
|
||||
outputs =
|
||||
@ -33,27 +35,31 @@
|
||||
nixpkgs,
|
||||
flake-parts,
|
||||
treefmt-nix,
|
||||
fenix,
|
||||
crane,
|
||||
...
|
||||
rust-overlay,
|
||||
flake-compat,
|
||||
nix-github-actions,
|
||||
}:
|
||||
let
|
||||
systems = nixpkgs.lib.filter (s: nixpkgs.lib.hasSuffix "-linux" s) nixpkgs.lib.systems.flakeExposed;
|
||||
in
|
||||
flake-parts.lib.mkFlake { inherit inputs; } {
|
||||
imports = [ inputs.treefmt-nix.flakeModule ];
|
||||
|
||||
# can be extended, but these have proper binary cache support in nixpkgs
|
||||
# as of writing.
|
||||
systems = [
|
||||
"aarch64-linux"
|
||||
"x86_64-linux"
|
||||
"i686-linux"
|
||||
];
|
||||
flake = {
|
||||
githubActions = nix-github-actions.lib.mkGithubMatrix {
|
||||
# github actions supports fewer architectures
|
||||
checks = nixpkgs.lib.getAttrs [ "aarch64-linux" "x86_64-linux" ] self.checks;
|
||||
};
|
||||
};
|
||||
|
||||
inherit systems;
|
||||
|
||||
perSystem =
|
||||
{
|
||||
self',
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
system,
|
||||
...
|
||||
}:
|
||||
@ -62,119 +68,225 @@
|
||||
inherit (lib.lists) findFirst;
|
||||
inherit (lib.strings) hasPrefix removePrefix substring;
|
||||
|
||||
pkgs = import nixpkgs {
|
||||
inherit system;
|
||||
overlays = [ (import rust-overlay) ];
|
||||
};
|
||||
|
||||
cargoToml = builtins.fromTOML (builtins.readFile ./Cargo.toml);
|
||||
rustfmtToml = builtins.fromTOML (builtins.readFile ./rustfmt.toml);
|
||||
|
||||
craneLib = crane.mkLib pkgs;
|
||||
|
||||
rev = self.shortRev or self.dirtyShortRev or (substring 0 8 self.lastModifiedDate);
|
||||
makefileVersion = removePrefix "VERSION=" (
|
||||
findFirst (line: hasPrefix "VERSION=" line) "VERSION=0.0.0" (split "\n" (readFile ./Makefile))
|
||||
);
|
||||
version = "${makefileVersion}+${rev}";
|
||||
|
||||
commonArgs = {
|
||||
inherit version;
|
||||
src = self;
|
||||
mkCommon =
|
||||
{
|
||||
crane,
|
||||
pkgs,
|
||||
rustVersion ? "latest",
|
||||
|
||||
env = {
|
||||
PKG_CONFIG_SYSTEMD_SYSTEMDSYSTEMUNITDIR = "${placeholder "out"}/lib/systemd/system";
|
||||
PKG_CONFIG_UDEV_UDEVDIR = "${placeholder "out"}/lib/udev";
|
||||
# build time
|
||||
buildPackages,
|
||||
pkg-config,
|
||||
rustPlatform,
|
||||
stdenv,
|
||||
|
||||
# run time
|
||||
keyutils,
|
||||
libaio,
|
||||
libsodium,
|
||||
liburcu,
|
||||
libuuid,
|
||||
lz4,
|
||||
udev,
|
||||
zlib,
|
||||
zstd,
|
||||
}:
|
||||
let
|
||||
inherit (stdenv) cc hostPlatform;
|
||||
|
||||
craneLib = (crane.mkLib pkgs).overrideToolchain (
|
||||
p: p.rust-bin.stable."${rustVersion}".minimal.override { extensions = [ "clippy" ]; }
|
||||
);
|
||||
|
||||
args = {
|
||||
inherit version;
|
||||
src = self;
|
||||
strictDeps = true;
|
||||
|
||||
env = {
|
||||
PKG_CONFIG_SYSTEMD_SYSTEMDSYSTEMUNITDIR = "${placeholder "out"}/lib/systemd/system";
|
||||
PKG_CONFIG_UDEV_UDEVDIR = "${placeholder "out"}/lib/udev";
|
||||
|
||||
CARGO_BUILD_TARGET = hostPlatform.rust.rustcTargetSpec;
|
||||
"CARGO_TARGET_${hostPlatform.rust.cargoEnvVarTarget}_LINKER" = "${cc.targetPrefix}cc";
|
||||
HOST_CC = "${cc.nativePrefix}cc";
|
||||
TARGET_CC = "${cc.targetPrefix}cc";
|
||||
};
|
||||
|
||||
makeFlags = [
|
||||
"INITRAMFS_DIR=${placeholder "out"}/etc/initramfs-tools"
|
||||
"PREFIX=${placeholder "out"}"
|
||||
"VERSION=${version}"
|
||||
];
|
||||
|
||||
dontStrip = true;
|
||||
|
||||
depsBuildBuild = [
|
||||
buildPackages.stdenv.cc
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
pkg-config
|
||||
rustPlatform.bindgenHook
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
keyutils
|
||||
libaio
|
||||
libsodium
|
||||
liburcu
|
||||
libuuid
|
||||
lz4
|
||||
udev
|
||||
zlib
|
||||
zstd
|
||||
];
|
||||
|
||||
meta = {
|
||||
description = "Userspace tools for bcachefs";
|
||||
license = lib.licenses.gpl2Only;
|
||||
mainProgram = "bcachefs";
|
||||
};
|
||||
};
|
||||
|
||||
cargoArtifacts = craneLib.buildDepsOnly args;
|
||||
in
|
||||
{
|
||||
inherit args cargoArtifacts craneLib;
|
||||
};
|
||||
common = pkgs.callPackage mkCommon { inherit crane; };
|
||||
|
||||
makeFlags = [
|
||||
"INITRAMFS_DIR=${placeholder "out"}/etc/initramfs-tools"
|
||||
"PREFIX=${placeholder "out"}"
|
||||
"VERSION=${version}"
|
||||
];
|
||||
mkPackage =
|
||||
{ common, name }:
|
||||
common.craneLib.buildPackage (
|
||||
common.args
|
||||
// {
|
||||
inherit (common) cargoArtifacts;
|
||||
pname = name;
|
||||
|
||||
dontStrip = true;
|
||||
enableParallelBuilding = true;
|
||||
buildPhaseCargoCommand = ''
|
||||
make ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}} $makeFlags
|
||||
'';
|
||||
doNotPostBuildInstallCargoBinaries = true;
|
||||
installPhaseCommand = ''
|
||||
make ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}} $makeFlags install
|
||||
'';
|
||||
|
||||
nativeBuildInputs = with pkgs; [
|
||||
pkg-config
|
||||
rustPlatform.bindgenHook
|
||||
];
|
||||
doInstallCheck = true;
|
||||
installCheckPhase = ''
|
||||
runHook preInstallCheck
|
||||
|
||||
buildInputs = with pkgs; [
|
||||
attr
|
||||
keyutils
|
||||
libaio
|
||||
libsodium
|
||||
liburcu
|
||||
libuuid
|
||||
lz4
|
||||
udev
|
||||
zlib
|
||||
zstd
|
||||
];
|
||||
test "$($out/bin/bcachefs version)" = "${version}"
|
||||
|
||||
meta = {
|
||||
description = "Userspace tools for bcachefs";
|
||||
license = lib.licenses.gpl2Only;
|
||||
mainProgram = "bcachefs";
|
||||
};
|
||||
};
|
||||
runHook postInstallCheck
|
||||
'';
|
||||
}
|
||||
);
|
||||
|
||||
cargoArtifacts = craneLib.buildDepsOnly (commonArgs // { pname = cargoToml.package.name; });
|
||||
mkPackages =
|
||||
name: systems:
|
||||
let
|
||||
packagesForSystem =
|
||||
crossSystem:
|
||||
let
|
||||
localSystem = system;
|
||||
pkgs' = import nixpkgs {
|
||||
inherit crossSystem localSystem;
|
||||
overlays = [ (import rust-overlay) ];
|
||||
};
|
||||
|
||||
common = pkgs'.callPackage mkCommon { inherit crane; };
|
||||
package = pkgs'.callPackage mkPackage { inherit common name; };
|
||||
packageFuse = package.overrideAttrs (
|
||||
final: prev: {
|
||||
makeFlags = prev.makeFlags ++ [ "BCACHEFS_FUSE=1" ];
|
||||
buildInputs = prev.buildInputs ++ [ pkgs'.fuse3 ];
|
||||
}
|
||||
);
|
||||
in
|
||||
[
|
||||
(lib.nameValuePair "${name}-${crossSystem}" package)
|
||||
(lib.nameValuePair "${name}-fuse-${crossSystem}" packageFuse)
|
||||
];
|
||||
in
|
||||
lib.listToAttrs (lib.flatten (map packagesForSystem systems));
|
||||
in
|
||||
{
|
||||
packages.default = config.packages.bcachefs-tools;
|
||||
packages.bcachefs-tools = craneLib.buildPackage (
|
||||
commonArgs
|
||||
packages =
|
||||
let
|
||||
inherit (cargoToml.package) name;
|
||||
in
|
||||
(mkPackages name systems)
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
${name} = config.packages."${name}-${system}";
|
||||
"${name}-fuse" = config.packages."${name}-fuse-${system}";
|
||||
default = config.packages.${name};
|
||||
};
|
||||
|
||||
enableParallelBuilding = true;
|
||||
buildPhaseCargoCommand = ''
|
||||
make ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}} $makeFlags
|
||||
'';
|
||||
installPhaseCommand = ''
|
||||
make ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}} $makeFlags install
|
||||
'';
|
||||
checks = {
|
||||
inherit (config.packages)
|
||||
bcachefs-tools
|
||||
bcachefs-tools-fuse
|
||||
bcachefs-tools-fuse-i686-linux
|
||||
;
|
||||
|
||||
doInstallCheck = true;
|
||||
installCheckPhase = ''
|
||||
runHook preInstallCheck
|
||||
cargo-clippy = common.craneLib.cargoClippy (
|
||||
common.args
|
||||
// {
|
||||
inherit (common) cargoArtifacts;
|
||||
cargoClippyExtraArgs = "--all-targets --all-features -- --deny warnings";
|
||||
}
|
||||
);
|
||||
|
||||
test "$($out/bin/bcachefs version)" = "${version}"
|
||||
# we have to build our own `craneLib.cargoTest`
|
||||
cargo-test = common.craneLib.mkCargoDerivation (
|
||||
common.args
|
||||
// {
|
||||
inherit (common) cargoArtifacts;
|
||||
doCheck = true;
|
||||
|
||||
runHook postInstallCheck
|
||||
'';
|
||||
}
|
||||
);
|
||||
enableParallelChecking = true;
|
||||
|
||||
packages.bcachefs-tools-fuse = config.packages.bcachefs-tools.overrideAttrs (
|
||||
final: prev: {
|
||||
makeFlags = prev.makeFlags ++ [ "BCACHEFS_FUSE=1" ];
|
||||
buildInputs = prev.buildInputs ++ [ pkgs.fuse3 ];
|
||||
}
|
||||
);
|
||||
pnameSuffix = "-test";
|
||||
buildPhaseCargoCommand = "";
|
||||
checkPhaseCargoCommand = ''
|
||||
make ''${enableParallelChecking:+-j''${NIX_BUILD_CORES}} $makeFlags libbcachefs.a
|
||||
cargo test --profile release -- --nocapture
|
||||
'';
|
||||
}
|
||||
);
|
||||
|
||||
checks.cargo-clippy = craneLib.cargoClippy (
|
||||
commonArgs
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
cargoClippyExtraArgs = "--all-targets -- --deny warnings";
|
||||
}
|
||||
);
|
||||
|
||||
# we have to build our own `craneLib.cargoTest`
|
||||
checks.cargo-test = craneLib.mkCargoDerivation (
|
||||
commonArgs
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
doCheck = true;
|
||||
|
||||
enableParallelChecking = true;
|
||||
|
||||
pnameSuffix = "-test";
|
||||
buildPhaseCargoCommand = "";
|
||||
checkPhaseCargoCommand = ''
|
||||
make ''${enableParallelChecking:+-j''${NIX_BUILD_CORES}} $makeFlags libbcachefs.a
|
||||
cargo test --profile release -- --nocapture
|
||||
'';
|
||||
}
|
||||
);
|
||||
# cargo clippy with the current minimum supported rust version
|
||||
# according to Cargo.toml
|
||||
msrv =
|
||||
let
|
||||
rustVersion = cargoToml.package.rust-version;
|
||||
common = pkgs.callPackage mkCommon { inherit crane rustVersion; };
|
||||
in
|
||||
common.craneLib.cargoClippy (
|
||||
common.args
|
||||
// {
|
||||
pname = "msrv";
|
||||
inherit (common) cargoArtifacts;
|
||||
cargoClippyExtraArgs = "--all-targets --all-features -- --deny warnings";
|
||||
}
|
||||
);
|
||||
};
|
||||
|
||||
devShells.default = pkgs.mkShell {
|
||||
inputsFrom = [
|
||||
@ -190,9 +302,12 @@
|
||||
cargo-audit
|
||||
cargo-outdated
|
||||
clang-tools
|
||||
clippy
|
||||
rust-analyzer
|
||||
rustc
|
||||
(rust-bin.stable.latest.minimal.override {
|
||||
extensions = [
|
||||
"rust-analyzer"
|
||||
"rust-src"
|
||||
];
|
||||
})
|
||||
];
|
||||
};
|
||||
|
||||
@ -204,7 +319,7 @@
|
||||
nixfmt.enable = true;
|
||||
rustfmt.edition = rustfmtToml.edition;
|
||||
rustfmt.enable = true;
|
||||
rustfmt.package = fenix.packages.${system}.default.rustfmt;
|
||||
rustfmt.package = pkgs.rust-bin.selectLatestNightlyWith (toolchain: toolchain.rustfmt);
|
||||
};
|
||||
};
|
||||
};
|
||||
|
@ -7,6 +7,7 @@
|
||||
#define _CRYPTO_SHA_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <sodium/crypto_hash_sha256.h>
|
||||
|
||||
#define SHA1_DIGEST_SIZE 20
|
||||
#define SHA1_BLOCK_SIZE 64
|
||||
@ -112,4 +113,9 @@ extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
|
||||
|
||||
extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *hash);
|
||||
|
||||
static inline void sha256(const u8 *data, unsigned int len, u8 *out)
|
||||
{
|
||||
crypto_hash_sha256(out, data, len);
|
||||
}
|
||||
#endif
|
||||
|
@ -16,53 +16,6 @@ typedef struct {
|
||||
u64 counter;
|
||||
} atomic64_t;
|
||||
|
||||
#ifndef C11_ATOMICS
|
||||
|
||||
#include <urcu/uatomic.h>
|
||||
|
||||
#if (CAA_BITS_PER_LONG != 64)
|
||||
#define ATOMIC64_SPINLOCK
|
||||
#endif
|
||||
|
||||
#define __ATOMIC_READ(p) uatomic_read(p)
|
||||
#define __ATOMIC_SET(p, v) uatomic_set(p, v)
|
||||
#define __ATOMIC_SET_RELEASE(p, v) uatomic_set(p, v)
|
||||
#define __ATOMIC_ADD_RETURN(v, p) uatomic_add_return(p, v)
|
||||
#define __ATOMIC_SUB_RETURN(v, p) uatomic_sub_return(p, v)
|
||||
#define __ATOMIC_ADD(v, p) uatomic_add(p, v)
|
||||
#define __ATOMIC_SUB(v, p) uatomic_sub(p, v)
|
||||
#define __ATOMIC_INC(p) uatomic_inc(p)
|
||||
#define __ATOMIC_DEC(p) uatomic_dec(p)
|
||||
#define __ATOMIC_AND(v, p) uatomic_and(p, v)
|
||||
#define __ATOMIC_OR(v, p) uatomic_or(p, v)
|
||||
|
||||
#define xchg(p, v) uatomic_xchg(p, v)
|
||||
#define xchg_acquire(p, v) uatomic_xchg(p, v)
|
||||
#define cmpxchg(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
#define cmpxchg_acquire(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
#define cmpxchg_release(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
|
||||
#define try_cmpxchg(p, _old, _new) \
|
||||
({ \
|
||||
typeof(*(_old)) _v = cmpxchg(p, *(_old), _new); \
|
||||
bool _ret = _v == *(_old); \
|
||||
*(_old) = _v; \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define try_cmpxchg_acquire(p, _old, _new) \
|
||||
try_cmpxchg(p, _old, _new)
|
||||
|
||||
#define smp_mb__before_atomic() cmm_smp_mb__before_uatomic_add()
|
||||
#define smp_mb__after_atomic() cmm_smp_mb__after_uatomic_add()
|
||||
#define smp_wmb() cmm_smp_wmb()
|
||||
#define smp_rmb() cmm_smp_rmb()
|
||||
#define smp_mb() cmm_smp_mb()
|
||||
#define smp_read_barrier_depends() cmm_smp_read_barrier_depends()
|
||||
#define smp_acquire__after_ctrl_dep() cmm_smp_mb()
|
||||
|
||||
#else /* C11_ATOMICS */
|
||||
|
||||
#define __ATOMIC_READ(p) __atomic_load_n(p, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SET(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SET_RELEASE(p, v) __atomic_store_n(p, v, __ATOMIC_RELEASE)
|
||||
@ -72,17 +25,22 @@ typedef struct {
|
||||
#define __ATOMIC_SUB_RETURN(v, p) __atomic_sub_fetch(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SUB_RETURN_RELEASE(v, p) \
|
||||
__atomic_sub_fetch(p, v, __ATOMIC_RELEASE)
|
||||
#define __ATOMIC_AND(p) __atomic_and_fetch(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_OR(p) __atomic_or_fetch(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_AND(v, p) __atomic_and_fetch(&(p)->counter, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_OR(v, p) __atomic_or_fetch(&(p)->counter, v, __ATOMIC_RELAXED)
|
||||
|
||||
#define xchg(p, v) __atomic_exchange_n(p, v, __ATOMIC_SEQ_CST)
|
||||
#define xchg_acquire(p, v) __atomic_exchange_n(p, v, __ATOMIC_ACQUIRE)
|
||||
|
||||
#define try_cmpxchg(p, old, new) \
|
||||
__atomic_compare_exchange_n((p), __old, new, false, \
|
||||
__atomic_compare_exchange_n((p), old, new, false, \
|
||||
__ATOMIC_SEQ_CST, \
|
||||
__ATOMIC_SEQ_CST)
|
||||
|
||||
#define try_cmpxchg_acquire(p, old, new) \
|
||||
__atomic_compare_exchange_n((p), old, new, false, \
|
||||
__ATOMIC_ACQUIRE, \
|
||||
__ATOMIC_RELAXED)
|
||||
|
||||
#define cmpxchg(p, old, new) \
|
||||
({ \
|
||||
typeof(*(p)) __old = (old); \
|
||||
@ -109,7 +67,7 @@ typedef struct {
|
||||
\
|
||||
__atomic_compare_exchange_n((p), &__old, new, false, \
|
||||
__ATOMIC_RELEASE, \
|
||||
__ATOMIC_RELEASE); \
|
||||
__ATOMIC_RELAXED); \
|
||||
__old; \
|
||||
})
|
||||
|
||||
@ -119,9 +77,7 @@ typedef struct {
|
||||
#define smp_rmb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_mb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_read_barrier_depends()
|
||||
|
||||
#endif
|
||||
|
||||
#define smp_acquire__after_ctrl_dep() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
@ -188,12 +144,12 @@ static inline i_type a_type##_read_acquire(const a_type##_t *v) \
|
||||
\
|
||||
static inline void a_type##_set(a_type##_t *v, i_type i) \
|
||||
{ \
|
||||
return __ATOMIC_SET(&v->counter, i); \
|
||||
__ATOMIC_SET(&v->counter, i); \
|
||||
} \
|
||||
\
|
||||
static inline void a_type##_set_release(a_type##_t *v, i_type i) \
|
||||
{ \
|
||||
return __ATOMIC_SET_RELEASE(&v->counter, i); \
|
||||
__ATOMIC_SET_RELEASE(&v->counter, i); \
|
||||
} \
|
||||
\
|
||||
static inline i_type a_type##_add_return(i_type i, a_type##_t *v) \
|
||||
|
@ -6,9 +6,12 @@
|
||||
#define __LINUX_BLK_TYPES_H
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/bvec.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/rwsem.h>
|
||||
|
||||
struct bio_set;
|
||||
struct bio;
|
||||
@ -62,6 +65,8 @@ struct block_device {
|
||||
struct gendisk * bd_disk;
|
||||
struct gendisk __bd_disk;
|
||||
int bd_fd;
|
||||
|
||||
struct mutex bd_holder_lock;
|
||||
};
|
||||
|
||||
#define bdev_kobj(_bdev) (&((_bdev)->kobj))
|
||||
|
@ -65,7 +65,10 @@ unsigned bdev_logical_block_size(struct block_device *bdev);
|
||||
sector_t get_capacity(struct gendisk *disk);
|
||||
|
||||
struct blk_holder_ops {
|
||||
void (*mark_dead)(struct block_device *bdev);
|
||||
void (*mark_dead)(struct block_device *bdev, bool surprise);
|
||||
void (*sync)(struct block_device *bdev);
|
||||
int (*freeze)(struct block_device *bdev);
|
||||
int (*thaw)(struct block_device *bdev);
|
||||
};
|
||||
|
||||
static inline struct block_device *file_bdev(struct file *file)
|
||||
@ -80,8 +83,12 @@ int lookup_bdev(const char *path, dev_t *);
|
||||
|
||||
struct super_block {
|
||||
void *s_fs_info;
|
||||
struct rw_semaphore s_umount;
|
||||
};
|
||||
|
||||
static inline void evict_inodes(struct super_block *sb) {}
|
||||
static inline int sync_filesystem(struct super_block *) { return 0; }
|
||||
|
||||
/*
|
||||
* File types
|
||||
*
|
||||
|
@ -67,6 +67,7 @@
|
||||
#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
|
||||
#define fallthrough __attribute__((__fallthrough__))
|
||||
#define __noreturn __attribute__((__noreturn__))
|
||||
#define __no_kmsan_checks
|
||||
|
||||
#ifndef __counted_by
|
||||
#define __counted_by(nr)
|
||||
|
@ -9,4 +9,9 @@ struct dentry {
|
||||
struct inode *d_inode;
|
||||
};
|
||||
|
||||
static inline void shrink_dcache_sb(struct super_block *) {}
|
||||
|
||||
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
|
||||
#define QSTR(n) (struct qstr)QSTR_INIT(n, strlen(n))
|
||||
|
||||
#endif /* __LINUX_DCACHE_H */
|
||||
|
20
include/linux/fs_parser.h
Normal file
20
include/linux/fs_parser.h
Normal file
@ -0,0 +1,20 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/* Filesystem parameter description and parser
|
||||
*
|
||||
* Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_FS_PARSER_H
|
||||
#define _LINUX_FS_PARSER_H
|
||||
|
||||
struct constant_table {
|
||||
const char *name;
|
||||
int value;
|
||||
};
|
||||
|
||||
extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found);
|
||||
|
||||
extern const struct constant_table bool_names[];
|
||||
|
||||
#endif /* _LINUX_FS_PARSER_H */
|
@ -11,6 +11,8 @@
|
||||
#include <linux/bug.h>
|
||||
#include <linux/byteorder.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/kmsan-checks.h>
|
||||
#include <linux/math.h>
|
||||
#include <linux/minmax.h>
|
||||
|
||||
|
@ -26,6 +26,7 @@ extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
|
||||
extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
|
||||
extern void kmemleak_update_trace(const void *ptr) __ref;
|
||||
extern void kmemleak_not_leak(const void *ptr) __ref;
|
||||
extern void kmemleak_transient_leak(const void *ptr) __ref;
|
||||
extern void kmemleak_ignore(const void *ptr) __ref;
|
||||
extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
|
||||
extern void kmemleak_no_scan(const void *ptr) __ref;
|
||||
@ -93,6 +94,9 @@ static inline void kmemleak_update_trace(const void *ptr)
|
||||
static inline void kmemleak_not_leak(const void *ptr)
|
||||
{
|
||||
}
|
||||
static inline void kmemleak_transient_leak(const void *ptr)
|
||||
{
|
||||
}
|
||||
static inline void kmemleak_ignore(const void *ptr)
|
||||
{
|
||||
}
|
||||
|
98
include/linux/kmsan-checks.h
Normal file
98
include/linux/kmsan-checks.h
Normal file
@ -0,0 +1,98 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* KMSAN checks to be used for one-off annotations in subsystems.
|
||||
*
|
||||
* Copyright (C) 2017-2022 Google LLC
|
||||
* Author: Alexander Potapenko <glider@google.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_KMSAN_CHECKS_H
|
||||
#define _LINUX_KMSAN_CHECKS_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef CONFIG_KMSAN
|
||||
|
||||
/**
|
||||
* kmsan_poison_memory() - Mark the memory range as uninitialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to poison.
|
||||
* @flags: GFP flags for allocations done by this function.
|
||||
*
|
||||
* Until other data is written to this range, KMSAN will treat it as
|
||||
* uninitialized. Error reports for this memory will reference the call site of
|
||||
* kmsan_poison_memory() as origin.
|
||||
*/
|
||||
void kmsan_poison_memory(const void *address, size_t size, gfp_t flags);
|
||||
|
||||
/**
|
||||
* kmsan_unpoison_memory() - Mark the memory range as initialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to unpoison.
|
||||
*
|
||||
* Until other data is written to this range, KMSAN will treat it as
|
||||
* initialized.
|
||||
*/
|
||||
void kmsan_unpoison_memory(const void *address, size_t size);
|
||||
|
||||
/**
|
||||
* kmsan_check_memory() - Check the memory range for being initialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to check.
|
||||
*
|
||||
* If any piece of the given range is marked as uninitialized, KMSAN will report
|
||||
* an error.
|
||||
*/
|
||||
void kmsan_check_memory(const void *address, size_t size);
|
||||
|
||||
/**
|
||||
* kmsan_copy_to_user() - Notify KMSAN about a data transfer to userspace.
|
||||
* @to: destination address in the userspace.
|
||||
* @from: source address in the kernel.
|
||||
* @to_copy: number of bytes to copy.
|
||||
* @left: number of bytes not copied.
|
||||
*
|
||||
* If this is a real userspace data transfer, KMSAN checks the bytes that were
|
||||
* actually copied to ensure there was no information leak. If @to belongs to
|
||||
* the kernel space (which is possible for compat syscalls), KMSAN just copies
|
||||
* the metadata.
|
||||
*/
|
||||
void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
|
||||
size_t left);
|
||||
|
||||
/**
|
||||
* kmsan_memmove() - Notify KMSAN about a data copy within kernel.
|
||||
* @to: destination address in the kernel.
|
||||
* @from: source address in the kernel.
|
||||
* @size: number of bytes to copy.
|
||||
*
|
||||
* Invoked after non-instrumented version (e.g. implemented using assembly
|
||||
* code) of memmove()/memcpy() is called, in order to copy KMSAN's metadata.
|
||||
*/
|
||||
void kmsan_memmove(void *to, const void *from, size_t to_copy);
|
||||
|
||||
#else
|
||||
|
||||
static inline void kmsan_poison_memory(const void *address, size_t size,
|
||||
gfp_t flags)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_unpoison_memory(const void *address, size_t size)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_check_memory(const void *address, size_t size)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_copy_to_user(void __user *to, const void *from,
|
||||
size_t to_copy, size_t left)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void kmsan_memmove(void *to, const void *from, size_t to_copy)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_KMSAN_CHECKS_H */
|
@ -20,6 +20,7 @@
|
||||
#include <linux/bug.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
@ -82,4 +82,71 @@ static inline s64 div_s64(s64 dividend, s32 divisor)
|
||||
return div_s64_rem(dividend, divisor, &remainder);
|
||||
}
|
||||
|
||||
#ifndef mul_u32_u32
|
||||
/*
|
||||
* Many a GCC version messes this up and generates a 64x64 mult :-(
|
||||
*/
|
||||
static inline u64 mul_u32_u32(u32 a, u32 b)
|
||||
{
|
||||
return (u64)a * b;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
|
||||
|
||||
#ifndef mul_u64_u64_shr
|
||||
static __always_inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
|
||||
{
|
||||
return (u64)(((unsigned __int128)a * mul) >> shift);
|
||||
}
|
||||
#endif /* mul_u64_u64_shr */
|
||||
|
||||
#else
|
||||
|
||||
#ifndef mul_u64_u64_shr
|
||||
static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
|
||||
{
|
||||
union {
|
||||
u64 ll;
|
||||
struct {
|
||||
#ifdef __BIG_ENDIAN
|
||||
u32 high, low;
|
||||
#else
|
||||
u32 low, high;
|
||||
#endif
|
||||
} l;
|
||||
} rl, rm, rn, rh, a0, b0;
|
||||
u64 c;
|
||||
|
||||
a0.ll = a;
|
||||
b0.ll = b;
|
||||
|
||||
rl.ll = mul_u32_u32(a0.l.low, b0.l.low);
|
||||
rm.ll = mul_u32_u32(a0.l.low, b0.l.high);
|
||||
rn.ll = mul_u32_u32(a0.l.high, b0.l.low);
|
||||
rh.ll = mul_u32_u32(a0.l.high, b0.l.high);
|
||||
|
||||
/*
|
||||
* Each of these lines computes a 64-bit intermediate result into "c",
|
||||
* starting at bits 32-95. The low 32-bits go into the result of the
|
||||
* multiplication, the high 32-bits are carried into the next step.
|
||||
*/
|
||||
rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low;
|
||||
rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low;
|
||||
rh.l.high = (c >> 32) + rh.l.high;
|
||||
|
||||
/*
|
||||
* The 128-bit result of the multiplication is in rl.ll and rh.ll,
|
||||
* shift it right and throw away the high part of the result.
|
||||
*/
|
||||
if (shift == 0)
|
||||
return rl.ll;
|
||||
if (shift < 64)
|
||||
return (rl.ll >> shift) | (rh.ll << (64 - shift));
|
||||
return rh.ll >> (shift & 63);
|
||||
}
|
||||
#endif /* mul_u64_u64_shr */
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_MATH64_H */
|
||||
|
@ -15,8 +15,8 @@
|
||||
*/
|
||||
#define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \
|
||||
struct _name { \
|
||||
int nr; \
|
||||
int size; \
|
||||
size_t nr; \
|
||||
size_t size; \
|
||||
_type *data; \
|
||||
_type preallocated[_nr]; \
|
||||
}
|
||||
@ -38,9 +38,176 @@ struct min_heap_callbacks {
|
||||
void (*swp)(void *lhs, void *rhs, void *args);
|
||||
};
|
||||
|
||||
/**
|
||||
* is_aligned - is this pointer & size okay for word-wide copying?
|
||||
* @base: pointer to data
|
||||
* @size: size of each element
|
||||
* @align: required alignment (typically 4 or 8)
|
||||
*
|
||||
* Returns true if elements can be copied using word loads and stores.
|
||||
* The size must be a multiple of the alignment, and the base address must
|
||||
* be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
|
||||
*
|
||||
* For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
|
||||
* to "if ((a | b) & mask)", so we do that by hand.
|
||||
*/
|
||||
__attribute_const__ __always_inline
|
||||
static bool is_aligned(const void *base, size_t size, unsigned char align)
|
||||
{
|
||||
unsigned char lsbits = (unsigned char)size;
|
||||
|
||||
(void)base;
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
lsbits |= (unsigned char)(uintptr_t)base;
|
||||
#endif
|
||||
return (lsbits & (align - 1)) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_words_32 - swap two elements in 32-bit chunks
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size (must be a multiple of 4)
|
||||
*
|
||||
* Exchange the two objects in memory. This exploits base+index addressing,
|
||||
* which basically all CPUs have, to minimize loop overhead computations.
|
||||
*
|
||||
* For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
|
||||
* bottom of the loop, even though the zero flag is still valid from the
|
||||
* subtract (since the intervening mov instructions don't alter the flags).
|
||||
* Gcc 8.1.0 doesn't have that problem.
|
||||
*/
|
||||
static __always_inline
|
||||
void swap_words_32(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
u32 t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_words_64 - swap two elements in 64-bit chunks
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size (must be a multiple of 8)
|
||||
*
|
||||
* Exchange the two objects in memory. This exploits base+index
|
||||
* addressing, which basically all CPUs have, to minimize loop overhead
|
||||
* computations.
|
||||
*
|
||||
* We'd like to use 64-bit loads if possible. If they're not, emulating
|
||||
* one requires base+index+4 addressing which x86 has but most other
|
||||
* processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
|
||||
* but it's possible to have 64-bit loads without 64-bit pointers (e.g.
|
||||
* x32 ABI). Are there any cases the kernel needs to worry about?
|
||||
*/
|
||||
static __always_inline
|
||||
void swap_words_64(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
#ifdef CONFIG_64BIT
|
||||
u64 t = *(u64 *)(a + (n -= 8));
|
||||
*(u64 *)(a + n) = *(u64 *)(b + n);
|
||||
*(u64 *)(b + n) = t;
|
||||
#else
|
||||
/* Use two 32-bit transfers to avoid base+index+4 addressing */
|
||||
u32 t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
|
||||
t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
#endif
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_bytes - swap two elements a byte at a time
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size
|
||||
*
|
||||
* This is the fallback if alignment doesn't allow using larger chunks.
|
||||
*/
|
||||
static __always_inline
|
||||
void swap_bytes(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
char t = ((char *)a)[--n];
|
||||
((char *)a)[n] = ((char *)b)[n];
|
||||
((char *)b)[n] = t;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/*
|
||||
* The values are arbitrary as long as they can't be confused with
|
||||
* a pointer, but small integers make for the smallest compare
|
||||
* instructions.
|
||||
*/
|
||||
#define SWAP_WORDS_64 ((void (*)(void *, void *, void *))0)
|
||||
#define SWAP_WORDS_32 ((void (*)(void *, void *, void *))1)
|
||||
#define SWAP_BYTES ((void (*)(void *, void *, void *))2)
|
||||
|
||||
/*
|
||||
* Selects the appropriate swap function based on the element size.
|
||||
*/
|
||||
static __always_inline
|
||||
void *select_swap_func(const void *base, size_t size)
|
||||
{
|
||||
if (is_aligned(base, size, 8))
|
||||
return SWAP_WORDS_64;
|
||||
else if (is_aligned(base, size, 4))
|
||||
return SWAP_WORDS_32;
|
||||
else
|
||||
return SWAP_BYTES;
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
void do_swap(void *a, void *b, size_t size, void (*swap_func)(void *lhs, void *rhs, void *args),
|
||||
void *priv)
|
||||
{
|
||||
if (swap_func == SWAP_WORDS_64)
|
||||
swap_words_64(a, b, size);
|
||||
else if (swap_func == SWAP_WORDS_32)
|
||||
swap_words_32(a, b, size);
|
||||
else if (swap_func == SWAP_BYTES)
|
||||
swap_bytes(a, b, size);
|
||||
else
|
||||
swap_func(a, b, priv);
|
||||
}
|
||||
|
||||
/**
|
||||
* parent - given the offset of the child, find the offset of the parent.
|
||||
* @i: the offset of the heap element whose parent is sought. Non-zero.
|
||||
* @lsbit: a precomputed 1-bit mask, equal to "size & -size"
|
||||
* @size: size of each element
|
||||
*
|
||||
* In terms of array indexes, the parent of element j = @i/@size is simply
|
||||
* (j-1)/2. But when working in byte offsets, we can't use implicit
|
||||
* truncation of integer divides.
|
||||
*
|
||||
* Fortunately, we only need one bit of the quotient, not the full divide.
|
||||
* @size has a least significant bit. That bit will be clear if @i is
|
||||
* an even multiple of @size, and set if it's an odd multiple.
|
||||
*
|
||||
* Logically, we're doing "if (i & lsbit) i -= size;", but since the
|
||||
* branch is unpredictable, it's done with a bit of clever branch-free
|
||||
* code instead.
|
||||
*/
|
||||
__attribute_const__ __always_inline
|
||||
static size_t parent(size_t i, unsigned int lsbit, size_t size)
|
||||
{
|
||||
i -= size;
|
||||
i -= size & -(i & lsbit);
|
||||
return i / 2;
|
||||
}
|
||||
|
||||
/* Initialize a min-heap. */
|
||||
static __always_inline
|
||||
void __min_heap_init(min_heap_char *heap, void *data, int size)
|
||||
void __min_heap_init_inline(min_heap_char *heap, void *data, int size)
|
||||
{
|
||||
heap->nr = 0;
|
||||
heap->size = size;
|
||||
@ -50,105 +217,114 @@ void __min_heap_init(min_heap_char *heap, void *data, int size)
|
||||
heap->data = heap->preallocated;
|
||||
}
|
||||
|
||||
#define min_heap_init(_heap, _data, _size) \
|
||||
__min_heap_init((min_heap_char *)_heap, _data, _size)
|
||||
#define min_heap_init_inline(_heap, _data, _size) \
|
||||
__min_heap_init_inline((min_heap_char *)_heap, _data, _size)
|
||||
|
||||
/* Get the minimum element from the heap. */
|
||||
static __always_inline
|
||||
void *__min_heap_peek(struct min_heap_char *heap)
|
||||
void *__min_heap_peek_inline(struct min_heap_char *heap)
|
||||
{
|
||||
return heap->nr ? heap->data : NULL;
|
||||
}
|
||||
|
||||
#define min_heap_peek(_heap) \
|
||||
(__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap))
|
||||
#define min_heap_peek_inline(_heap) \
|
||||
(__minheap_cast(_heap) __min_heap_peek_inline((min_heap_char *)_heap))
|
||||
|
||||
/* Check if the heap is full. */
|
||||
static __always_inline
|
||||
bool __min_heap_full(min_heap_char *heap)
|
||||
bool __min_heap_full_inline(min_heap_char *heap)
|
||||
{
|
||||
return heap->nr == heap->size;
|
||||
}
|
||||
|
||||
#define min_heap_full(_heap) \
|
||||
__min_heap_full((min_heap_char *)_heap)
|
||||
#define min_heap_full_inline(_heap) \
|
||||
__min_heap_full_inline((min_heap_char *)_heap)
|
||||
|
||||
/* Sift the element at pos down the heap. */
|
||||
static __always_inline
|
||||
void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
void *left, *right;
|
||||
const unsigned long lsbit = elem_size & -elem_size;
|
||||
void *data = heap->data;
|
||||
void *root = data + pos * elem_size;
|
||||
int i = pos, j;
|
||||
void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
|
||||
/* pre-scale counters for performance */
|
||||
size_t a = pos * elem_size;
|
||||
size_t b, c, d;
|
||||
size_t n = heap->nr * elem_size;
|
||||
|
||||
if (!swp)
|
||||
swp = select_swap_func(data, elem_size);
|
||||
|
||||
/* Find the sift-down path all the way to the leaves. */
|
||||
for (;;) {
|
||||
if (i * 2 + 2 >= heap->nr)
|
||||
break;
|
||||
left = data + (i * 2 + 1) * elem_size;
|
||||
right = data + (i * 2 + 2) * elem_size;
|
||||
i = func->less(left, right, args) ? i * 2 + 1 : i * 2 + 2;
|
||||
}
|
||||
for (b = a; c = 2 * b + elem_size, (d = c + elem_size) < n;)
|
||||
b = func->less(data + c, data + d, args) ? c : d;
|
||||
|
||||
/* Special case for the last leaf with no sibling. */
|
||||
if (i * 2 + 2 == heap->nr)
|
||||
i = i * 2 + 1;
|
||||
if (d == n)
|
||||
b = c;
|
||||
|
||||
/* Backtrack to the correct location. */
|
||||
while (i != pos && func->less(root, data + i * elem_size, args))
|
||||
i = (i - 1) / 2;
|
||||
while (b != a && func->less(data + a, data + b, args))
|
||||
b = parent(b, lsbit, elem_size);
|
||||
|
||||
/* Shift the element into its correct place. */
|
||||
j = i;
|
||||
while (i != pos) {
|
||||
i = (i - 1) / 2;
|
||||
func->swp(data + i * elem_size, data + j * elem_size, args);
|
||||
c = b;
|
||||
while (b != a) {
|
||||
b = parent(b, lsbit, elem_size);
|
||||
do_swap(data + b, data + c, elem_size, swp, args);
|
||||
}
|
||||
}
|
||||
|
||||
#define min_heap_sift_down(_heap, _pos, _func, _args) \
|
||||
__min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_sift_down_inline(_heap, _pos, _func, _args) \
|
||||
__min_heap_sift_down_inline((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), \
|
||||
_func, _args)
|
||||
|
||||
/* Sift up ith element from the heap, O(log2(nr)). */
|
||||
static __always_inline
|
||||
void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
const unsigned long lsbit = elem_size & -elem_size;
|
||||
void *data = heap->data;
|
||||
size_t parent;
|
||||
void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
|
||||
/* pre-scale counters for performance */
|
||||
size_t a = idx * elem_size, b;
|
||||
|
||||
while (idx) {
|
||||
parent = (idx - 1) / 2;
|
||||
if (func->less(data + parent * elem_size, data + idx * elem_size, args))
|
||||
if (!swp)
|
||||
swp = select_swap_func(data, elem_size);
|
||||
|
||||
while (a) {
|
||||
b = parent(a, lsbit, elem_size);
|
||||
if (func->less(data + b, data + a, args))
|
||||
break;
|
||||
func->swp(data + parent * elem_size, data + idx * elem_size, args);
|
||||
idx = parent;
|
||||
do_swap(data + a, data + b, elem_size, swp, args);
|
||||
a = b;
|
||||
}
|
||||
}
|
||||
|
||||
#define min_heap_sift_up(_heap, _idx, _func, _args) \
|
||||
__min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
|
||||
#define min_heap_sift_up_inline(_heap, _idx, _func, _args) \
|
||||
__min_heap_sift_up_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \
|
||||
_func, _args)
|
||||
|
||||
/* Floyd's approach to heapification that is O(nr). */
|
||||
static __always_inline
|
||||
void __min_heapify_all(min_heap_char *heap, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = heap->nr / 2 - 1; i >= 0; i--)
|
||||
__min_heap_sift_down(heap, i, elem_size, func, args);
|
||||
__min_heap_sift_down_inline(heap, i, elem_size, func, args);
|
||||
}
|
||||
|
||||
#define min_heapify_all(_heap, _func, _args) \
|
||||
__min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heapify_all_inline(_heap, _func, _args) \
|
||||
__min_heapify_all_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
|
||||
|
||||
/* Remove minimum element from the heap, O(log2(nr)). */
|
||||
static __always_inline
|
||||
bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
void *data = heap->data;
|
||||
|
||||
@ -158,13 +334,13 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
|
||||
/* Place last element at the root (position 0) and then sift down. */
|
||||
heap->nr--;
|
||||
memcpy(data, data + (heap->nr * elem_size), elem_size);
|
||||
__min_heap_sift_down(heap, 0, elem_size, func, args);
|
||||
__min_heap_sift_down_inline(heap, 0, elem_size, func, args);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define min_heap_pop(_heap, _func, _args) \
|
||||
__min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_pop_inline(_heap, _func, _args) \
|
||||
__min_heap_pop_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
|
||||
|
||||
/*
|
||||
* Remove the minimum element and then push the given element. The
|
||||
@ -172,22 +348,21 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
|
||||
* efficient than a pop followed by a push that does 2.
|
||||
*/
|
||||
static __always_inline
|
||||
void __min_heap_pop_push(min_heap_char *heap,
|
||||
const void *element, size_t elem_size,
|
||||
const struct min_heap_callbacks *func,
|
||||
void *args)
|
||||
void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
memcpy(heap->data, element, elem_size);
|
||||
__min_heap_sift_down(heap, 0, elem_size, func, args);
|
||||
__min_heap_sift_down_inline(heap, 0, elem_size, func, args);
|
||||
}
|
||||
|
||||
#define min_heap_pop_push(_heap, _element, _func, _args) \
|
||||
__min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_pop_push_inline(_heap, _element, _func, _args) \
|
||||
__min_heap_pop_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \
|
||||
_func, _args)
|
||||
|
||||
/* Push an element on to the heap, O(log2(nr)). */
|
||||
static __always_inline
|
||||
bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
void *data = heap->data;
|
||||
int pos;
|
||||
@ -201,35 +376,81 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
|
||||
heap->nr++;
|
||||
|
||||
/* Sift child at pos up. */
|
||||
__min_heap_sift_up(heap, elem_size, pos, func, args);
|
||||
__min_heap_sift_up_inline(heap, elem_size, pos, func, args);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define min_heap_push(_heap, _element, _func, _args) \
|
||||
__min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_push_inline(_heap, _element, _func, _args) \
|
||||
__min_heap_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \
|
||||
_func, _args)
|
||||
|
||||
/* Remove ith element from the heap, O(log2(nr)). */
|
||||
static __always_inline
|
||||
bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx,
|
||||
const struct min_heap_callbacks *func, void *args)
|
||||
{
|
||||
void *data = heap->data;
|
||||
void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
|
||||
|
||||
if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap"))
|
||||
return false;
|
||||
|
||||
if (!swp)
|
||||
swp = select_swap_func(data, elem_size);
|
||||
|
||||
/* Place last element at the root (position 0) and then sift down. */
|
||||
heap->nr--;
|
||||
if (idx == heap->nr)
|
||||
return true;
|
||||
func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args);
|
||||
__min_heap_sift_up(heap, elem_size, idx, func, args);
|
||||
__min_heap_sift_down(heap, idx, elem_size, func, args);
|
||||
do_swap(data + (idx * elem_size), data + (heap->nr * elem_size), elem_size, swp, args);
|
||||
__min_heap_sift_up_inline(heap, elem_size, idx, func, args);
|
||||
__min_heap_sift_down_inline(heap, idx, elem_size, func, args);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define min_heap_del_inline(_heap, _idx, _func, _args) \
|
||||
__min_heap_del_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \
|
||||
_func, _args)
|
||||
|
||||
void __min_heap_init(min_heap_char *heap, void *data, int size);
|
||||
void *__min_heap_peek(struct min_heap_char *heap);
|
||||
bool __min_heap_full(min_heap_char *heap);
|
||||
void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
void __min_heapify_all(min_heap_char *heap, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
|
||||
const struct min_heap_callbacks *func, void *args);
|
||||
|
||||
#define min_heap_init(_heap, _data, _size) \
|
||||
__min_heap_init((min_heap_char *)_heap, _data, _size)
|
||||
#define min_heap_peek(_heap) \
|
||||
(__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap))
|
||||
#define min_heap_full(_heap) \
|
||||
__min_heap_full((min_heap_char *)_heap)
|
||||
#define min_heap_sift_down(_heap, _pos, _func, _args) \
|
||||
__min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_sift_up(_heap, _idx, _func, _args) \
|
||||
__min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
|
||||
#define min_heapify_all(_heap, _func, _args) \
|
||||
__min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_pop(_heap, _func, _args) \
|
||||
__min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_pop_push(_heap, _element, _func, _args) \
|
||||
__min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \
|
||||
_func, _args)
|
||||
#define min_heap_push(_heap, _element, _func, _args) \
|
||||
__min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
|
||||
#define min_heap_del(_heap, _idx, _func, _args) \
|
||||
__min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct sysinfo {
|
||||
|
@ -9,7 +9,9 @@
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/math64.h>
|
||||
|
||||
#ifdef SYS_getrandom
|
||||
static inline int getrandom(void *buf, size_t buflen, unsigned int flags)
|
||||
@ -67,4 +69,24 @@ static inline u32 get_random_u32_below(u32 ceil)
|
||||
}
|
||||
}
|
||||
|
||||
static inline u32 __get_random_u32_below(u32 ceil)
|
||||
{
|
||||
return get_random_u32_below(ceil);
|
||||
}
|
||||
|
||||
static inline u64 get_random_u64_below(u64 ceil)
|
||||
{
|
||||
if (ceil <= 1)
|
||||
return 0;
|
||||
if (ceil <= U32_MAX)
|
||||
return get_random_u32_below(ceil);
|
||||
|
||||
for (;;) {
|
||||
u64 rand = get_random_u64();
|
||||
u64 mult = ceil * rand;
|
||||
if (likely(mult >= -ceil % ceil))
|
||||
return mul_u64_u64_shr(ceil, rand, 64);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _LINUX_RANDOM_H */
|
||||
|
@ -97,6 +97,11 @@ struct task_struct {
|
||||
struct signal_struct {
|
||||
struct rw_semaphore exec_update_lock;
|
||||
} *signal, _signal;
|
||||
|
||||
struct {
|
||||
u64 sum_exec_runtime;
|
||||
u64 exec_start;
|
||||
} se;
|
||||
};
|
||||
|
||||
extern __thread struct task_struct *current;
|
||||
|
6
include/linux/sched/sysctl.h
Normal file
6
include/linux/sched/sysctl.h
Normal file
@ -0,0 +1,6 @@
|
||||
#ifndef __TOOLS_LINUX_SCHED_SYSCTL_H
|
||||
#define __TOOLS_LINUX_SCHED_SYSCTL_H
|
||||
|
||||
#define sysctl_hung_task_timeout_secs (HZ * 10)
|
||||
|
||||
#endif /* __TOOLS_LINUX_SCHED_SYSCTL_H */
|
@ -103,6 +103,8 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
|
||||
((size) != 0 && (n) > SIZE_MAX / (size) \
|
||||
? NULL : kmalloc((n) * (size), flags))
|
||||
|
||||
#define kvcalloc(n, size, flags) kvmalloc_array(n, size, flags|__GFP_ZERO)
|
||||
|
||||
#define kvmalloc_array_noprof(...) kvmalloc_array(__VA_ARGS__)
|
||||
|
||||
#define kcalloc(n, size, flags) kmalloc_array(n, size, flags|__GFP_ZERO)
|
||||
@ -111,6 +113,7 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
|
||||
#define kzfree(p) free((void *) p)
|
||||
|
||||
#define kvmalloc(size, flags) kmalloc(size, flags)
|
||||
#define kvmalloc_noprof(size, flags) kmalloc(size, flags)
|
||||
#define kvzalloc(size, flags) kzalloc(size, flags)
|
||||
#define kvfree(p) kfree(p)
|
||||
|
||||
@ -274,6 +277,8 @@ static inline void *vmalloc(unsigned long size)
|
||||
return __vmalloc(size, GFP_KERNEL);
|
||||
}
|
||||
|
||||
#define vmalloc_noprof(...) vmalloc(__VA_ARGS__)
|
||||
|
||||
static inline void *vzalloc(unsigned long size)
|
||||
{
|
||||
return __vmalloc(size, GFP_KERNEL|__GFP_ZERO);
|
||||
|
@ -7,6 +7,11 @@
|
||||
|
||||
typedef void (*rcu_callback_t)(struct rcu_head *head);
|
||||
|
||||
static inline struct urcu_gp_poll_state get_state_synchronize_rcu()
|
||||
{
|
||||
return start_poll_synchronize_rcu();
|
||||
}
|
||||
|
||||
struct srcu_struct {
|
||||
};
|
||||
|
||||
@ -17,36 +22,19 @@ static inline int srcu_read_lock(struct srcu_struct *ssp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
|
||||
static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, struct urcu_gp_poll_state cookie)
|
||||
{
|
||||
return true;
|
||||
return poll_state_synchronize_rcu(cookie);
|
||||
}
|
||||
|
||||
static inline unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp)
|
||||
static inline struct urcu_gp_poll_state start_poll_synchronize_srcu(struct srcu_struct *ssp)
|
||||
{
|
||||
return 0;
|
||||
return start_poll_synchronize_rcu();
|
||||
}
|
||||
|
||||
static inline unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp)
|
||||
static inline struct urcu_gp_poll_state get_state_synchronize_srcu(struct srcu_struct *ssp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#undef poll_state_synchronize_rcu
|
||||
static inline bool poll_state_synchronize_rcu(unsigned long cookie)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#undef start_poll_synchronize_rcu
|
||||
static inline unsigned long start_poll_synchronize_rcu()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned long get_state_synchronize_rcu()
|
||||
{
|
||||
return 0;
|
||||
return get_state_synchronize_rcu();
|
||||
}
|
||||
|
||||
static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) {}
|
||||
|
85
include/linux/string_choices.h
Normal file
85
include/linux/string_choices.h
Normal file
@ -0,0 +1,85 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_STRING_CHOICES_H_
|
||||
#define _LINUX_STRING_CHOICES_H_
|
||||
|
||||
/*
|
||||
* Here provide a series of helpers in the str_$TRUE_$FALSE format (you can
|
||||
* also expand some helpers as needed), where $TRUE and $FALSE are their
|
||||
* corresponding literal strings. These helpers can be used in the printing
|
||||
* and also in other places where constant strings are required. Using these
|
||||
* helpers offers the following benefits:
|
||||
* 1) Reducing the hardcoding of strings, which makes the code more elegant
|
||||
* through these simple literal-meaning helpers.
|
||||
* 2) Unifying the output, which prevents the same string from being printed
|
||||
* in various forms, such as enable/disable, enabled/disabled, en/dis.
|
||||
* 3) Deduping by the linker, which results in a smaller binary file.
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
static inline const char *str_enable_disable(bool v)
|
||||
{
|
||||
return v ? "enable" : "disable";
|
||||
}
|
||||
#define str_disable_enable(v) str_enable_disable(!(v))
|
||||
|
||||
static inline const char *str_enabled_disabled(bool v)
|
||||
{
|
||||
return v ? "enabled" : "disabled";
|
||||
}
|
||||
#define str_disabled_enabled(v) str_enabled_disabled(!(v))
|
||||
|
||||
static inline const char *str_hi_lo(bool v)
|
||||
{
|
||||
return v ? "hi" : "lo";
|
||||
}
|
||||
#define str_lo_hi(v) str_hi_lo(!(v))
|
||||
|
||||
static inline const char *str_high_low(bool v)
|
||||
{
|
||||
return v ? "high" : "low";
|
||||
}
|
||||
#define str_low_high(v) str_high_low(!(v))
|
||||
|
||||
static inline const char *str_read_write(bool v)
|
||||
{
|
||||
return v ? "read" : "write";
|
||||
}
|
||||
#define str_write_read(v) str_read_write(!(v))
|
||||
|
||||
static inline const char *str_on_off(bool v)
|
||||
{
|
||||
return v ? "on" : "off";
|
||||
}
|
||||
#define str_off_on(v) str_on_off(!(v))
|
||||
|
||||
static inline const char *str_yes_no(bool v)
|
||||
{
|
||||
return v ? "yes" : "no";
|
||||
}
|
||||
#define str_no_yes(v) str_yes_no(!(v))
|
||||
|
||||
static inline const char *str_up_down(bool v)
|
||||
{
|
||||
return v ? "up" : "down";
|
||||
}
|
||||
#define str_down_up(v) str_up_down(!(v))
|
||||
|
||||
static inline const char *str_true_false(bool v)
|
||||
{
|
||||
return v ? "true" : "false";
|
||||
}
|
||||
#define str_false_true(v) str_true_false(!(v))
|
||||
|
||||
/**
|
||||
* str_plural - Return the simple pluralization based on English counts
|
||||
* @num: Number used for deciding pluralization
|
||||
*
|
||||
* If @num is 1, returns empty string, otherwise returns "s".
|
||||
*/
|
||||
static inline const char *str_plural(size_t num)
|
||||
{
|
||||
return num == 1 ? "" : "s";
|
||||
}
|
||||
|
||||
#endif
|
1
include/linux/unaligned.h
Normal file
1
include/linux/unaligned.h
Normal file
@ -0,0 +1 @@
|
||||
#include <asm/unaligned.h>
|
0
include/linux/unicode.h
Normal file
0
include/linux/unicode.h
Normal file
@ -96,6 +96,7 @@ do { \
|
||||
#define wait_event_freezable(wq, condition) ({wait_event(wq, condition); 0; })
|
||||
#define wait_event_killable(wq, condition) ({wait_event(wq, condition); 0; })
|
||||
#define wait_event_interruptible(wq, condition) ({wait_event(wq, condition); 0; })
|
||||
#define wait_event_state(wq, condition, state) ({wait_event(wq, condition); 0; })
|
||||
|
||||
#define __wait_event_timeout(wq, condition, timeout) \
|
||||
___wait_event(wq, ___wait_cond_timeout(condition), \
|
||||
|
@ -184,11 +184,6 @@ invalid:
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
#define acl_for_each_entry(acl, acl_e) \
|
||||
for (acl_e = acl->a_entries; \
|
||||
acl_e < acl->a_entries + acl->a_count; \
|
||||
acl_e++)
|
||||
|
||||
/*
|
||||
* Convert from in-memory to filesystem representation.
|
||||
*/
|
||||
@ -199,11 +194,11 @@ bch2_acl_to_xattr(struct btree_trans *trans,
|
||||
{
|
||||
struct bkey_i_xattr *xattr;
|
||||
bch_acl_header *acl_header;
|
||||
const struct posix_acl_entry *acl_e;
|
||||
const struct posix_acl_entry *acl_e, *pe;
|
||||
void *outptr;
|
||||
unsigned nr_short = 0, nr_long = 0, acl_len, u64s;
|
||||
|
||||
acl_for_each_entry(acl, acl_e) {
|
||||
FOREACH_ACL_ENTRY(acl_e, acl, pe) {
|
||||
switch (acl_e->e_tag) {
|
||||
case ACL_USER:
|
||||
case ACL_GROUP:
|
||||
@ -241,7 +236,7 @@ bch2_acl_to_xattr(struct btree_trans *trans,
|
||||
|
||||
outptr = (void *) acl_header + sizeof(*acl_header);
|
||||
|
||||
acl_for_each_entry(acl, acl_e) {
|
||||
FOREACH_ACL_ENTRY(acl_e, acl, pe) {
|
||||
bch_acl_entry *entry = outptr;
|
||||
|
||||
entry->e_tag = cpu_to_le16(acl_e->e_tag);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -8,8 +8,6 @@
|
||||
#include "debug.h"
|
||||
#include "super.h"
|
||||
|
||||
enum bch_validate_flags;
|
||||
|
||||
/* How out of date a pointer gen is allowed to be: */
|
||||
#define BUCKET_GC_GEN_MAX 96U
|
||||
|
||||
@ -133,7 +131,7 @@ static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
|
||||
if (a.stripe)
|
||||
return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe;
|
||||
if (bch2_bucket_sectors_dirty(a))
|
||||
return data_type;
|
||||
return bucket_data_type(data_type);
|
||||
if (a.cached_sectors)
|
||||
return BCH_DATA_cached;
|
||||
if (BCH_ALLOC_V4_NEED_DISCARD(&a))
|
||||
@ -168,6 +166,9 @@ static inline bool data_type_movable(enum bch_data_type type)
|
||||
static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
|
||||
struct bch_dev *ca)
|
||||
{
|
||||
if (a.data_type >= BCH_DATA_NR)
|
||||
return 0;
|
||||
|
||||
if (!data_type_movable(a.data_type) ||
|
||||
!bch2_bucket_sectors_fragmented(ca, a))
|
||||
return 0;
|
||||
@ -242,10 +243,14 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s
|
||||
|
||||
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
|
||||
|
||||
int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
|
||||
int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
|
||||
int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
|
||||
int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
|
||||
int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
void bch2_alloc_v4_swab(struct bkey_s);
|
||||
void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
@ -279,7 +284,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
})
|
||||
|
||||
int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c,
|
||||
enum bch_validate_flags);
|
||||
struct bkey_validate_context);
|
||||
void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \
|
||||
@ -304,6 +309,8 @@ int bch2_alloc_key_to_dev_counters(struct btree_trans *, struct bch_dev *,
|
||||
int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
|
||||
struct bkey_s_c, struct bkey_s,
|
||||
enum btree_iter_update_trigger_flags);
|
||||
|
||||
int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *, bool);
|
||||
int bch2_check_alloc_info(struct bch_fs *);
|
||||
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
|
||||
void bch2_dev_do_discards(struct bch_dev *);
|
||||
|
@ -58,7 +58,7 @@ LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
|
||||
|
||||
struct bch_alloc_v4 {
|
||||
struct bch_val v;
|
||||
__u64 journal_seq;
|
||||
__u64 journal_seq_nonempty;
|
||||
__u32 flags;
|
||||
__u8 gen;
|
||||
__u8 oldest_gen;
|
||||
@ -70,7 +70,7 @@ struct bch_alloc_v4 {
|
||||
__u32 stripe;
|
||||
__u32 nr_external_backpointers;
|
||||
/* end of fields in original version of alloc_v4 */
|
||||
__u64 _fragmentation_lru; /* obsolete */
|
||||
__u64 journal_seq_empty;
|
||||
__u32 stripe_sectors;
|
||||
__u32 pad;
|
||||
} __packed __aligned(8);
|
||||
|
@ -107,14 +107,10 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
return;
|
||||
}
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
spin_lock(&ob->lock);
|
||||
|
||||
ob->valid = false;
|
||||
ob->data_type = 0;
|
||||
|
||||
spin_unlock(&ob->lock);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
bch2_open_bucket_hash_remove(c, ob);
|
||||
@ -131,14 +127,14 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
|
||||
void bch2_open_bucket_write_error(struct bch_fs *c,
|
||||
struct open_buckets *obs,
|
||||
unsigned dev)
|
||||
unsigned dev, int err)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
unsigned i;
|
||||
|
||||
open_bucket_for_each(c, obs, ob, i)
|
||||
if (ob->dev == dev && ob->ec)
|
||||
bch2_ec_bucket_cancel(c, ob);
|
||||
bch2_ec_bucket_cancel(c, ob, err);
|
||||
}
|
||||
|
||||
static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
|
||||
@ -156,12 +152,24 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
|
||||
return ob;
|
||||
}
|
||||
|
||||
static inline bool is_superblock_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b)
|
||||
{
|
||||
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_trans_mark_dev_sbs)
|
||||
return false;
|
||||
|
||||
return bch2_is_superblock_bucket(ca, b);
|
||||
}
|
||||
|
||||
static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
|
||||
{
|
||||
BUG_ON(c->open_buckets_partial_nr >=
|
||||
ARRAY_SIZE(c->open_buckets_partial));
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
rcu_read_lock();
|
||||
bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++;
|
||||
rcu_read_unlock();
|
||||
|
||||
ob->on_partial_list = true;
|
||||
c->open_buckets_partial[c->open_buckets_partial_nr++] =
|
||||
ob - c->open_buckets;
|
||||
@ -171,70 +179,50 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
}
|
||||
|
||||
/* _only_ for allocating the journal on a new device: */
|
||||
long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
|
||||
static inline bool may_alloc_bucket(struct bch_fs *c,
|
||||
struct bpos bucket,
|
||||
struct bucket_alloc_state *s)
|
||||
{
|
||||
while (ca->new_fs_bucket_idx < ca->mi.nbuckets) {
|
||||
u64 b = ca->new_fs_bucket_idx++;
|
||||
|
||||
if (!is_superblock_bucket(ca, b) &&
|
||||
(!ca->buckets_nouse || !test_bit(b, ca->buckets_nouse)))
|
||||
return b;
|
||||
if (bch2_bucket_is_open(c, bucket.inode, bucket.offset)) {
|
||||
s->skipped_open++;
|
||||
return false;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
|
||||
{
|
||||
switch (watermark) {
|
||||
case BCH_WATERMARK_interior_updates:
|
||||
return 0;
|
||||
case BCH_WATERMARK_reclaim:
|
||||
return OPEN_BUCKETS_COUNT / 6;
|
||||
case BCH_WATERMARK_btree:
|
||||
case BCH_WATERMARK_btree_copygc:
|
||||
return OPEN_BUCKETS_COUNT / 4;
|
||||
case BCH_WATERMARK_copygc:
|
||||
return OPEN_BUCKETS_COUNT / 3;
|
||||
default:
|
||||
return OPEN_BUCKETS_COUNT / 2;
|
||||
u64 journal_seq_ready =
|
||||
bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal,
|
||||
bucket.inode, bucket.offset);
|
||||
if (journal_seq_ready > c->journal.flushed_seq_ondisk) {
|
||||
if (journal_seq_ready > c->journal.flushing_seq)
|
||||
s->need_journal_commit++;
|
||||
s->skipped_need_journal_commit++;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bch2_bucket_nocow_is_locked(&c->nocow_locks, bucket)) {
|
||||
s->skipped_nocow++;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
u64 bucket,
|
||||
u64 bucket, u8 gen,
|
||||
enum bch_watermark watermark,
|
||||
const struct bch_alloc_v4 *a,
|
||||
struct bucket_alloc_state *s,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
if (unlikely(is_superblock_bucket(c, ca, bucket)))
|
||||
return NULL;
|
||||
|
||||
if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) {
|
||||
s->skipped_nouse++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) {
|
||||
s->skipped_open++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
|
||||
c->journal.flushed_seq_ondisk, ca->dev_idx, bucket)) {
|
||||
s->skipped_need_journal_commit++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (bch2_bucket_nocow_is_locked(&c->nocow_locks, POS(ca->dev_idx, bucket))) {
|
||||
s->skipped_nocow++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(watermark))) {
|
||||
if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) {
|
||||
if (cl)
|
||||
closure_wait(&c->open_buckets_wait, cl);
|
||||
|
||||
@ -250,14 +238,13 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ob = bch2_open_bucket_alloc(c);
|
||||
struct open_bucket *ob = bch2_open_bucket_alloc(c);
|
||||
|
||||
spin_lock(&ob->lock);
|
||||
|
||||
ob->valid = true;
|
||||
ob->sectors_free = ca->mi.bucket_size;
|
||||
ob->dev = ca->dev_idx;
|
||||
ob->gen = a->gen;
|
||||
ob->gen = gen;
|
||||
ob->bucket = bucket;
|
||||
spin_unlock(&ob->lock);
|
||||
|
||||
@ -272,111 +259,29 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
||||
}
|
||||
|
||||
static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca,
|
||||
enum bch_watermark watermark, u64 free_entry,
|
||||
enum bch_watermark watermark,
|
||||
struct bucket_alloc_state *s,
|
||||
struct bkey_s_c freespace_k,
|
||||
struct btree_iter *freespace_iter,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter = { NULL };
|
||||
struct bkey_s_c k;
|
||||
struct open_bucket *ob;
|
||||
struct bch_alloc_v4 a_convert;
|
||||
const struct bch_alloc_v4 *a;
|
||||
u64 b = free_entry & ~(~0ULL << 56);
|
||||
unsigned genbits = free_entry >> 56;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret;
|
||||
u64 b = freespace_iter->pos.offset & ~(~0ULL << 56);
|
||||
|
||||
if (b < ca->mi.first_bucket || b >= ca->mi.nbuckets) {
|
||||
prt_printf(&buf, "freespace btree has bucket outside allowed range %u-%llu\n"
|
||||
" freespace key ",
|
||||
ca->mi.first_bucket, ca->mi.nbuckets);
|
||||
bch2_bkey_val_to_text(&buf, c, freespace_k);
|
||||
bch2_trans_inconsistent(trans, "%s", buf.buf);
|
||||
ob = ERR_PTR(-EIO);
|
||||
goto err;
|
||||
}
|
||||
if (!may_alloc_bucket(c, POS(ca->dev_idx, b), s))
|
||||
return NULL;
|
||||
|
||||
k = bch2_bkey_get_iter(trans, &iter,
|
||||
BTREE_ID_alloc, POS(ca->dev_idx, b),
|
||||
BTREE_ITER_cached);
|
||||
ret = bkey_err(k);
|
||||
if (ret) {
|
||||
ob = ERR_PTR(ret);
|
||||
goto err;
|
||||
}
|
||||
u8 gen;
|
||||
int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen, true);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
if (ret)
|
||||
return NULL;
|
||||
|
||||
a = bch2_alloc_to_v4(k, &a_convert);
|
||||
|
||||
if (a->data_type != BCH_DATA_free) {
|
||||
if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) {
|
||||
ob = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
prt_printf(&buf, "non free bucket in freespace btree\n"
|
||||
" freespace key ");
|
||||
bch2_bkey_val_to_text(&buf, c, freespace_k);
|
||||
prt_printf(&buf, "\n ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch2_trans_inconsistent(trans, "%s", buf.buf);
|
||||
ob = ERR_PTR(-EIO);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (genbits != (alloc_freespace_genbits(*a) >> 56) &&
|
||||
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
|
||||
prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
|
||||
" freespace key ",
|
||||
genbits, alloc_freespace_genbits(*a) >> 56);
|
||||
bch2_bkey_val_to_text(&buf, c, freespace_k);
|
||||
prt_printf(&buf, "\n ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch2_trans_inconsistent(trans, "%s", buf.buf);
|
||||
ob = ERR_PTR(-EIO);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers) {
|
||||
struct bch_backpointer bp;
|
||||
struct bpos bp_pos = POS_MIN;
|
||||
|
||||
ret = bch2_get_next_backpointer(trans, ca, POS(ca->dev_idx, b), -1,
|
||||
&bp_pos, &bp,
|
||||
BTREE_ITER_nopreserve);
|
||||
if (ret) {
|
||||
ob = ERR_PTR(ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!bkey_eq(bp_pos, POS_MAX)) {
|
||||
/*
|
||||
* Bucket may have data in it - we don't call
|
||||
* bc2h_trans_inconnsistent() because fsck hasn't
|
||||
* finished yet
|
||||
*/
|
||||
ob = NULL;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl);
|
||||
if (!ob)
|
||||
bch2_set_btree_iter_dontneed(&iter);
|
||||
err:
|
||||
if (iter.path)
|
||||
bch2_set_btree_iter_dontneed(&iter);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
printbuf_exit(&buf);
|
||||
return ob;
|
||||
return __try_alloc_bucket(c, ca, b, gen, watermark, s, cl);
|
||||
}
|
||||
|
||||
/*
|
||||
* This path is for before the freespace btree is initialized:
|
||||
*
|
||||
* If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock &
|
||||
* journal buckets - journal buckets will be < ca->new_fs_bucket_idx
|
||||
*/
|
||||
static noinline struct open_bucket *
|
||||
bch2_bucket_alloc_early(struct btree_trans *trans,
|
||||
@ -385,10 +290,11 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
|
||||
struct bucket_alloc_state *s,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter, citer;
|
||||
struct bkey_s_c k, ck;
|
||||
struct open_bucket *ob = NULL;
|
||||
u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
|
||||
u64 first_bucket = ca->mi.first_bucket;
|
||||
u64 *dev_alloc_cursor = &ca->alloc_cursor[s->btree_bitmap];
|
||||
u64 alloc_start = max(first_bucket, *dev_alloc_cursor);
|
||||
u64 alloc_cursor = alloc_start;
|
||||
@ -411,10 +317,6 @@ again:
|
||||
if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets)))
|
||||
break;
|
||||
|
||||
if (ca->new_fs_bucket_idx &&
|
||||
is_superblock_bucket(ca, k.k->p.offset))
|
||||
continue;
|
||||
|
||||
if (s->btree_bitmap != BTREE_BITMAP_ANY &&
|
||||
s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca,
|
||||
bucket_to_sector(ca, bucket), ca->mi.bucket_size)) {
|
||||
@ -448,7 +350,10 @@ again:
|
||||
|
||||
s->buckets_seen++;
|
||||
|
||||
ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl);
|
||||
ob = may_alloc_bucket(c, k.k->p, s)
|
||||
? __try_alloc_bucket(c, ca, k.k->p.offset, a->gen,
|
||||
watermark, s, cl)
|
||||
: NULL;
|
||||
next:
|
||||
bch2_set_btree_iter_dontneed(&citer);
|
||||
bch2_trans_iter_exit(trans, &citer);
|
||||
@ -485,20 +390,21 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
|
||||
u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(*dev_alloc_cursor));
|
||||
u64 alloc_cursor = alloc_start;
|
||||
int ret;
|
||||
|
||||
BUG_ON(ca->new_fs_bucket_idx);
|
||||
again:
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace,
|
||||
POS(ca->dev_idx, alloc_cursor), 0, k, ret) {
|
||||
if (k.k->p.inode != ca->dev_idx)
|
||||
break;
|
||||
for_each_btree_key_max_norestart(trans, iter, BTREE_ID_freespace,
|
||||
POS(ca->dev_idx, alloc_cursor),
|
||||
POS(ca->dev_idx, U64_MAX),
|
||||
0, k, ret) {
|
||||
/*
|
||||
* peek normally dosen't trim extents - they can span iter.pos,
|
||||
* which is not what we want here:
|
||||
*/
|
||||
iter.k.size = iter.k.p.offset - iter.pos.offset;
|
||||
|
||||
for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k));
|
||||
alloc_cursor < k.k->p.offset;
|
||||
alloc_cursor++) {
|
||||
while (iter.k.size) {
|
||||
s->buckets_seen++;
|
||||
|
||||
u64 bucket = alloc_cursor & ~(~0ULL << 56);
|
||||
u64 bucket = iter.pos.offset & ~(~0ULL << 56);
|
||||
if (s->btree_bitmap != BTREE_BITMAP_ANY &&
|
||||
s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca,
|
||||
bucket_to_sector(ca, bucket), ca->mi.bucket_size)) {
|
||||
@ -507,32 +413,36 @@ again:
|
||||
goto fail;
|
||||
|
||||
bucket = sector_to_bucket(ca,
|
||||
round_up(bucket_to_sector(ca, bucket) + 1,
|
||||
round_up(bucket_to_sector(ca, bucket + 1),
|
||||
1ULL << ca->mi.btree_bitmap_shift));
|
||||
u64 genbits = alloc_cursor >> 56;
|
||||
alloc_cursor = bucket | (genbits << 56);
|
||||
alloc_cursor = bucket|(iter.pos.offset & (~0ULL << 56));
|
||||
|
||||
if (alloc_cursor > k.k->p.offset)
|
||||
bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor));
|
||||
bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor));
|
||||
s->skipped_mi_btree_bitmap++;
|
||||
continue;
|
||||
goto next;
|
||||
}
|
||||
|
||||
ob = try_alloc_bucket(trans, ca, watermark,
|
||||
alloc_cursor, s, k, cl);
|
||||
ob = try_alloc_bucket(trans, ca, watermark, s, &iter, cl);
|
||||
if (ob) {
|
||||
if (!IS_ERR(ob))
|
||||
*dev_alloc_cursor = iter.pos.offset;
|
||||
bch2_set_btree_iter_dontneed(&iter);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
iter.k.size--;
|
||||
iter.pos.offset++;
|
||||
}
|
||||
next:
|
||||
if (ob || ret)
|
||||
break;
|
||||
}
|
||||
fail:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (!ob && ret)
|
||||
BUG_ON(ob && ret);
|
||||
|
||||
if (ret)
|
||||
ob = ERR_PTR(ret);
|
||||
|
||||
if (!ob && alloc_start > ca->mi.first_bucket) {
|
||||
@ -540,8 +450,6 @@ fail:
|
||||
goto again;
|
||||
}
|
||||
|
||||
*dev_alloc_cursor = alloc_cursor;
|
||||
|
||||
return ob;
|
||||
}
|
||||
|
||||
@ -591,6 +499,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca,
|
||||
* @watermark: how important is this allocation?
|
||||
* @data_type: BCH_DATA_journal, btree, user...
|
||||
* @cl: if not NULL, closure to be used to wait if buckets not available
|
||||
* @nowait: if true, do not wait for buckets to become available
|
||||
* @usage: for secondarily also returning the current device usage
|
||||
*
|
||||
* Returns: an open_bucket on success, or an ERR_PTR() on failure.
|
||||
@ -625,6 +534,10 @@ again:
|
||||
bch2_dev_do_invalidates(ca);
|
||||
|
||||
if (!avail) {
|
||||
if (watermark > BCH_WATERMARK_normal &&
|
||||
c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations)
|
||||
goto alloc;
|
||||
|
||||
if (cl && !waiting) {
|
||||
closure_wait(&c->freelist_wait, cl);
|
||||
waiting = true;
|
||||
@ -644,7 +557,7 @@ alloc:
|
||||
? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl)
|
||||
: bch2_bucket_alloc_early(trans, ca, watermark, &s, cl);
|
||||
|
||||
if (s.skipped_need_journal_commit * 2 > avail)
|
||||
if (s.need_journal_commit * 2 > avail)
|
||||
bch2_journal_flush_async(&c->journal, NULL);
|
||||
|
||||
if (!ob && s.btree_bitmap != BTREE_BITMAP_ANY) {
|
||||
@ -684,7 +597,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bch_dev_usage usage;
|
||||
struct open_bucket *ob;
|
||||
|
||||
bch2_trans_do(c, NULL, NULL, 0,
|
||||
bch2_trans_do(c,
|
||||
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark,
|
||||
data_type, cl, false, &usage)));
|
||||
return ob;
|
||||
@ -707,9 +620,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
|
||||
unsigned i;
|
||||
|
||||
for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
|
||||
ret.devs[ret.nr++] = i;
|
||||
ret.data[ret.nr++] = i;
|
||||
|
||||
bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
|
||||
bubble_sort(ret.data, ret.nr, dev_stripe_cmp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -718,7 +631,7 @@ static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca,
|
||||
struct bch_dev_usage *usage)
|
||||
{
|
||||
u64 *v = stripe->next_alloc + ca->dev_idx;
|
||||
u64 free_space = dev_buckets_available(ca, BCH_WATERMARK_normal);
|
||||
u64 free_space = __dev_buckets_available(ca, *usage, BCH_WATERMARK_normal);
|
||||
u64 free_space_inv = free_space
|
||||
? div64_u64(1ULL << 48, free_space)
|
||||
: 1ULL << 48;
|
||||
@ -781,18 +694,13 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct dev_alloc_list devs_sorted =
|
||||
bch2_dev_alloc_list(c, stripe, devs_may_alloc);
|
||||
int ret = -BCH_ERR_insufficient_devices;
|
||||
|
||||
BUG_ON(*nr_effective >= nr_replicas);
|
||||
|
||||
for (unsigned i = 0; i < devs_sorted.nr; i++) {
|
||||
struct bch_dev_usage usage;
|
||||
struct open_bucket *ob;
|
||||
|
||||
unsigned dev = devs_sorted.devs[i];
|
||||
struct bch_dev *ca = bch2_dev_tryget_noerror(c, dev);
|
||||
struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, devs_may_alloc);
|
||||
darray_for_each(devs_sorted, i) {
|
||||
struct bch_dev *ca = bch2_dev_tryget_noerror(c, *i);
|
||||
if (!ca)
|
||||
continue;
|
||||
|
||||
@ -801,8 +709,9 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
|
||||
continue;
|
||||
}
|
||||
|
||||
ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type,
|
||||
cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage);
|
||||
struct bch_dev_usage usage;
|
||||
struct open_bucket *ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type,
|
||||
cl, flags & BCH_WRITE_alloc_nowait, &usage);
|
||||
if (!IS_ERR(ob))
|
||||
bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
|
||||
bch2_dev_put(ca);
|
||||
@ -846,10 +755,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct dev_alloc_list devs_sorted;
|
||||
struct ec_stripe_head *h;
|
||||
struct open_bucket *ob;
|
||||
unsigned i, ec_idx;
|
||||
int ret = 0;
|
||||
|
||||
if (nr_replicas < 2)
|
||||
@ -858,34 +763,32 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
|
||||
if (ec_open_bucket(c, ptrs))
|
||||
return 0;
|
||||
|
||||
h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl);
|
||||
struct ec_stripe_head *h =
|
||||
bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl);
|
||||
if (IS_ERR(h))
|
||||
return PTR_ERR(h);
|
||||
if (!h)
|
||||
return 0;
|
||||
|
||||
devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
|
||||
|
||||
for (i = 0; i < devs_sorted.nr; i++)
|
||||
for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
|
||||
struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
|
||||
darray_for_each(devs_sorted, i)
|
||||
for (unsigned ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
|
||||
if (!h->s->blocks[ec_idx])
|
||||
continue;
|
||||
|
||||
ob = c->open_buckets + h->s->blocks[ec_idx];
|
||||
if (ob->dev == devs_sorted.devs[i] &&
|
||||
!test_and_set_bit(ec_idx, h->s->blocks_allocated))
|
||||
goto got_bucket;
|
||||
}
|
||||
goto out_put_head;
|
||||
got_bucket:
|
||||
ob->ec_idx = ec_idx;
|
||||
ob->ec = h->s;
|
||||
ec_stripe_new_get(h->s, STRIPE_REF_io);
|
||||
struct open_bucket *ob = c->open_buckets + h->s->blocks[ec_idx];
|
||||
if (ob->dev == *i && !test_and_set_bit(ec_idx, h->s->blocks_allocated)) {
|
||||
ob->ec_idx = ec_idx;
|
||||
ob->ec = h->s;
|
||||
ec_stripe_new_get(h->s, STRIPE_REF_io);
|
||||
|
||||
ret = add_new_bucket(c, ptrs, devs_may_alloc,
|
||||
nr_replicas, nr_effective,
|
||||
have_cache, ob);
|
||||
out_put_head:
|
||||
ret = add_new_bucket(c, ptrs, devs_may_alloc,
|
||||
nr_replicas, nr_effective,
|
||||
have_cache, ob);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
bch2_ec_stripe_head_put(c, h);
|
||||
return ret;
|
||||
}
|
||||
@ -972,7 +875,7 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
|
||||
u64 avail;
|
||||
|
||||
bch2_dev_usage_read_fast(ca, &usage);
|
||||
avail = dev_buckets_free(ca, usage, watermark);
|
||||
avail = dev_buckets_free(ca, usage, watermark) + ca->nr_partial_buckets;
|
||||
if (!avail)
|
||||
continue;
|
||||
|
||||
@ -981,6 +884,10 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
|
||||
i);
|
||||
ob->on_partial_list = false;
|
||||
|
||||
rcu_read_lock();
|
||||
bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
|
||||
rcu_read_unlock();
|
||||
|
||||
ret = add_new_bucket(c, ptrs, devs_may_alloc,
|
||||
nr_replicas, nr_effective,
|
||||
have_cache, ob);
|
||||
@ -1191,7 +1098,13 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
--c->open_buckets_partial_nr;
|
||||
swap(c->open_buckets_partial[i],
|
||||
c->open_buckets_partial[c->open_buckets_partial_nr]);
|
||||
|
||||
ob->on_partial_list = false;
|
||||
|
||||
rcu_read_lock();
|
||||
bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
|
||||
rcu_read_unlock();
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
bch2_open_bucket_put(c, ob);
|
||||
spin_lock(&c->freelist_lock);
|
||||
@ -1406,7 +1319,7 @@ retry:
|
||||
if (wp->data_type != BCH_DATA_user)
|
||||
have_cache = true;
|
||||
|
||||
if (target && !(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
|
||||
if (target && !(flags & BCH_WRITE_only_specified_devs)) {
|
||||
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
|
||||
target, erasure_code,
|
||||
nr_replicas, &nr_effective,
|
||||
@ -1496,7 +1409,7 @@ err:
|
||||
if (cl && bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
|
||||
ret = -BCH_ERR_bucket_alloc_blocked;
|
||||
|
||||
if (cl && !(flags & BCH_WRITE_ALLOC_NOWAIT) &&
|
||||
if (cl && !(flags & BCH_WRITE_alloc_nowait) &&
|
||||
bch2_err_matches(ret, BCH_ERR_freelist_empty))
|
||||
ret = -BCH_ERR_bucket_alloc_blocked;
|
||||
|
||||
@ -1610,8 +1523,7 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
|
||||
ob++) {
|
||||
spin_lock(&ob->lock);
|
||||
if (ob->valid && !ob->on_partial_list &&
|
||||
(!ca || ob->dev == ca->dev_idx))
|
||||
if (ob->valid && (!ca || ob->dev == ca->dev_idx))
|
||||
bch2_open_bucket_to_text(out, c, ob);
|
||||
spin_unlock(&ob->lock);
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ void bch2_reset_alloc_cursors(struct bch_fs *);
|
||||
|
||||
struct dev_alloc_list {
|
||||
unsigned nr;
|
||||
u8 devs[BCH_SB_MEMBERS_MAX];
|
||||
u8 data[BCH_SB_MEMBERS_MAX];
|
||||
};
|
||||
|
||||
struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
|
||||
@ -28,13 +28,28 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
|
||||
struct bch_devs_mask *);
|
||||
void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
|
||||
|
||||
long bch2_bucket_alloc_new_fs(struct bch_dev *);
|
||||
|
||||
static inline struct bch_dev *ob_dev(struct bch_fs *c, struct open_bucket *ob)
|
||||
{
|
||||
return bch2_dev_have_ref(c, ob->dev);
|
||||
}
|
||||
|
||||
static inline unsigned bch2_open_buckets_reserved(enum bch_watermark watermark)
|
||||
{
|
||||
switch (watermark) {
|
||||
case BCH_WATERMARK_interior_updates:
|
||||
return 0;
|
||||
case BCH_WATERMARK_reclaim:
|
||||
return OPEN_BUCKETS_COUNT / 6;
|
||||
case BCH_WATERMARK_btree:
|
||||
case BCH_WATERMARK_btree_copygc:
|
||||
return OPEN_BUCKETS_COUNT / 4;
|
||||
case BCH_WATERMARK_copygc:
|
||||
return OPEN_BUCKETS_COUNT / 3;
|
||||
default:
|
||||
return OPEN_BUCKETS_COUNT / 2;
|
||||
}
|
||||
}
|
||||
|
||||
struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
|
||||
enum bch_watermark, enum bch_data_type,
|
||||
struct closure *);
|
||||
@ -67,7 +82,7 @@ static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
|
||||
}
|
||||
|
||||
void bch2_open_bucket_write_error(struct bch_fs *,
|
||||
struct open_buckets *, unsigned);
|
||||
struct open_buckets *, unsigned, int);
|
||||
|
||||
void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
|
||||
|
||||
|
@ -18,6 +18,7 @@ struct bucket_alloc_state {
|
||||
u64 buckets_seen;
|
||||
u64 skipped_open;
|
||||
u64 skipped_need_journal_commit;
|
||||
u64 need_journal_commit;
|
||||
u64 skipped_nocow;
|
||||
u64 skipped_nouse;
|
||||
u64 skipped_mi_btree_bitmap;
|
||||
@ -89,6 +90,7 @@ struct dev_stripe_state {
|
||||
x(stopped) \
|
||||
x(waiting_io) \
|
||||
x(waiting_work) \
|
||||
x(runnable) \
|
||||
x(running)
|
||||
|
||||
enum write_point_state {
|
||||
@ -124,6 +126,7 @@ struct write_point {
|
||||
enum write_point_state state;
|
||||
u64 last_state_change;
|
||||
u64 time[WRITE_POINT_STATE_NR];
|
||||
u64 last_runtime;
|
||||
} __aligned(SMP_CACHE_BYTES);
|
||||
};
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_H
|
||||
#define _BCACHEFS_BACKPOINTERS_H
|
||||
|
||||
#include "btree_cache.h"
|
||||
#include "btree_iter.h"
|
||||
@ -18,14 +18,14 @@ static inline u64 swab40(u64 x)
|
||||
((x & 0xff00000000ULL) >> 32));
|
||||
}
|
||||
|
||||
int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags);
|
||||
void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *);
|
||||
void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k,
|
||||
struct bkey_validate_context);
|
||||
void bch2_backpointer_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_backpointer_swab(struct bkey_s);
|
||||
|
||||
#define bch2_bkey_ops_backpointer ((struct bkey_ops) { \
|
||||
.key_validate = bch2_backpointer_validate, \
|
||||
.val_to_text = bch2_backpointer_k_to_text, \
|
||||
.val_to_text = bch2_backpointer_to_text, \
|
||||
.swab = bch2_backpointer_swab, \
|
||||
.min_val_size = 32, \
|
||||
})
|
||||
@ -43,22 +43,24 @@ static inline struct bpos bp_pos_to_bucket(const struct bch_dev *ca, struct bpos
|
||||
return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector));
|
||||
}
|
||||
|
||||
static inline struct bpos bp_pos_to_bucket_and_offset(const struct bch_dev *ca, struct bpos bp_pos,
|
||||
u32 *bucket_offset)
|
||||
{
|
||||
u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
|
||||
|
||||
return POS(bp_pos.inode, sector_to_bucket_and_offset(ca, bucket_sector, bucket_offset));
|
||||
}
|
||||
|
||||
static inline bool bp_pos_to_bucket_nodev_noerror(struct bch_fs *c, struct bpos bp_pos, struct bpos *bucket)
|
||||
{
|
||||
rcu_read_lock();
|
||||
struct bch_dev *ca = bch2_dev_rcu(c, bp_pos.inode);
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp_pos.inode);
|
||||
if (ca)
|
||||
*bucket = bp_pos_to_bucket(ca, bp_pos);
|
||||
rcu_read_unlock();
|
||||
return ca != NULL;
|
||||
}
|
||||
|
||||
static inline bool bp_pos_to_bucket_nodev(struct bch_fs *c, struct bpos bp_pos, struct bpos *bucket)
|
||||
{
|
||||
return !bch2_fs_inconsistent_on(!bp_pos_to_bucket_nodev_noerror(c, bp_pos, bucket),
|
||||
c, "backpointer for missing device %llu", bp_pos.inode);
|
||||
}
|
||||
|
||||
static inline struct bpos bucket_pos_to_bp_noerror(const struct bch_dev *ca,
|
||||
struct bpos bucket,
|
||||
u64 bucket_offset)
|
||||
@ -80,31 +82,35 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_dev *ca,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bch_dev *,
|
||||
struct bpos bucket, struct bch_backpointer, struct bkey_s_c, bool);
|
||||
static inline struct bpos bucket_pos_to_bp_start(const struct bch_dev *ca, struct bpos bucket)
|
||||
{
|
||||
return bucket_pos_to_bp(ca, bucket, 0);
|
||||
}
|
||||
|
||||
static inline struct bpos bucket_pos_to_bp_end(const struct bch_dev *ca, struct bpos bucket)
|
||||
{
|
||||
return bpos_nosnap_predecessor(bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0));
|
||||
}
|
||||
|
||||
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *,
|
||||
struct bkey_s_c,
|
||||
struct bkey_i_backpointer *,
|
||||
bool);
|
||||
|
||||
static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct bpos bucket,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c orig_k,
|
||||
struct bkey_i_backpointer *bp,
|
||||
bool insert)
|
||||
{
|
||||
if (unlikely(bch2_backpointers_no_use_write_buffer))
|
||||
return bch2_bucket_backpointer_mod_nowritebuffer(trans, ca, bucket, bp, orig_k, insert);
|
||||
|
||||
struct bkey_i_backpointer bp_k;
|
||||
|
||||
bkey_backpointer_init(&bp_k.k_i);
|
||||
bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset);
|
||||
bp_k.v = bp;
|
||||
return bch2_bucket_backpointer_mod_nowritebuffer(trans, orig_k, bp, insert);
|
||||
|
||||
if (!insert) {
|
||||
bp_k.k.type = KEY_TYPE_deleted;
|
||||
set_bkey_val_u64s(&bp_k.k, 0);
|
||||
bp->k.type = KEY_TYPE_deleted;
|
||||
set_bkey_val_u64s(&bp->k, 0);
|
||||
}
|
||||
|
||||
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i);
|
||||
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp->k_i);
|
||||
}
|
||||
|
||||
static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
|
||||
@ -117,7 +123,12 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
|
||||
return BCH_DATA_btree;
|
||||
case KEY_TYPE_extent:
|
||||
case KEY_TYPE_reflink_v:
|
||||
return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user;
|
||||
if (p.has_ec)
|
||||
return BCH_DATA_stripe;
|
||||
if (p.ptr.cached)
|
||||
return BCH_DATA_cached;
|
||||
else
|
||||
return BCH_DATA_user;
|
||||
case KEY_TYPE_stripe: {
|
||||
const struct bch_extent_ptr *ptr = &entry->ptr;
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
@ -134,44 +145,42 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca,
|
||||
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||
const union bch_extent_entry *entry,
|
||||
struct bpos *bucket_pos, struct bch_backpointer *bp,
|
||||
u64 sectors)
|
||||
struct bkey_i_backpointer *bp)
|
||||
{
|
||||
u32 bucket_offset;
|
||||
*bucket_pos = PTR_BUCKET_POS_OFFSET(ca, &p.ptr, &bucket_offset);
|
||||
*bp = (struct bch_backpointer) {
|
||||
bkey_backpointer_init(&bp->k_i);
|
||||
bp->k.p.inode = p.ptr.dev;
|
||||
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
bp->k.p.offset = ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset;
|
||||
else {
|
||||
/*
|
||||
* Put stripe backpointers where they won't collide with the
|
||||
* extent backpointers within the stripe:
|
||||
*/
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
bp->k.p.offset = ((u64) (p.ptr.offset + le16_to_cpu(s.v->sectors)) <<
|
||||
MAX_EXTENT_COMPRESS_RATIO_SHIFT) - 1;
|
||||
}
|
||||
|
||||
bp->v = (struct bch_backpointer) {
|
||||
.btree_id = btree_id,
|
||||
.level = level,
|
||||
.data_type = bch2_bkey_ptr_data_type(k, p, entry),
|
||||
.bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) +
|
||||
p.crc.offset,
|
||||
.bucket_len = sectors,
|
||||
.bucket_gen = p.ptr.gen,
|
||||
.bucket_len = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p),
|
||||
.pos = k.k->p,
|
||||
};
|
||||
}
|
||||
|
||||
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||
const union bch_extent_entry *entry,
|
||||
struct bpos *bucket_pos, struct bch_backpointer *bp)
|
||||
{
|
||||
u64 sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p);
|
||||
|
||||
__bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, bucket_pos, bp, sectors);
|
||||
}
|
||||
|
||||
int bch2_get_next_backpointer(struct btree_trans *, struct bch_dev *ca, struct bpos, int,
|
||||
struct bpos *, struct bch_backpointer *, unsigned);
|
||||
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *,
|
||||
struct bpos, struct bch_backpointer,
|
||||
unsigned);
|
||||
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *,
|
||||
struct bpos, struct bch_backpointer);
|
||||
struct bkey_buf;
|
||||
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_backpointer,
|
||||
struct btree_iter *, unsigned, struct bkey_buf *);
|
||||
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer,
|
||||
struct btree_iter *, struct bkey_buf *);
|
||||
|
||||
int bch2_check_btree_backpointers(struct bch_fs *);
|
||||
int bch2_check_extents_to_backpointers(struct bch_fs *);
|
||||
|
@ -29,7 +29,7 @@ static inline struct bbpos bbpos_successor(struct bbpos pos)
|
||||
|
||||
static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos)
|
||||
{
|
||||
prt_str(out, bch2_btree_id_str(pos.btree));
|
||||
bch2_btree_id_to_text(out, pos.btree);
|
||||
prt_char(out, ':');
|
||||
bch2_bpos_to_text(out, pos.pos);
|
||||
}
|
||||
|
@ -203,8 +203,10 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/zstd.h>
|
||||
#include <linux/unicode.h>
|
||||
|
||||
#include "bcachefs_format.h"
|
||||
#include "btree_journal_iter_types.h"
|
||||
#include "disk_accounting_types.h"
|
||||
#include "errcode.h"
|
||||
#include "fifo.h"
|
||||
@ -293,6 +295,8 @@ do { \
|
||||
|
||||
#define bch_info(c, fmt, ...) \
|
||||
bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
|
||||
#define bch_info_ratelimited(c, fmt, ...) \
|
||||
bch2_print_ratelimited(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
|
||||
#define bch_notice(c, fmt, ...) \
|
||||
bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
|
||||
#define bch_warn(c, fmt, ...) \
|
||||
@ -352,6 +356,12 @@ do { \
|
||||
bch_info(c, fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define bch_verbose_ratelimited(c, fmt, ...) \
|
||||
do { \
|
||||
if ((c)->opts.verbose) \
|
||||
bch_info_ratelimited(c, fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define pr_verbose_init(opts, fmt, ...) \
|
||||
do { \
|
||||
if (opt_get(opts, verbose)) \
|
||||
@ -435,6 +445,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(btree_node_sort) \
|
||||
x(btree_node_read) \
|
||||
x(btree_node_read_done) \
|
||||
x(btree_node_write) \
|
||||
x(btree_interior_update_foreground) \
|
||||
x(btree_interior_update_total) \
|
||||
x(btree_gc) \
|
||||
@ -447,6 +458,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(blocked_journal_low_on_space) \
|
||||
x(blocked_journal_low_on_pin) \
|
||||
x(blocked_journal_max_in_flight) \
|
||||
x(blocked_journal_max_open) \
|
||||
x(blocked_key_cache_flush) \
|
||||
x(blocked_allocate) \
|
||||
x(blocked_allocate_open_bucket) \
|
||||
@ -524,6 +536,7 @@ struct bch_dev {
|
||||
*/
|
||||
struct bch_member_cpu mi;
|
||||
atomic64_t errors[BCH_MEMBER_ERROR_NR];
|
||||
unsigned long write_errors_start;
|
||||
|
||||
__uuid_t uuid;
|
||||
char name[BDEVNAME_SIZE];
|
||||
@ -538,23 +551,24 @@ struct bch_dev {
|
||||
|
||||
/*
|
||||
* Buckets:
|
||||
* Per-bucket arrays are protected by c->mark_lock, bucket_lock and
|
||||
* gc_gens_lock, for device resize - holding any is sufficient for
|
||||
* access: Or rcu_read_lock(), but only for dev_ptr_stale():
|
||||
* Per-bucket arrays are protected by either rcu_read_lock or
|
||||
* state_lock, for device resize.
|
||||
*/
|
||||
GENRADIX(struct bucket) buckets_gc;
|
||||
struct bucket_gens __rcu *bucket_gens;
|
||||
u8 *oldest_gen;
|
||||
unsigned long *buckets_nouse;
|
||||
struct rw_semaphore bucket_lock;
|
||||
|
||||
unsigned long *bucket_backpointer_mismatches;
|
||||
unsigned long *bucket_backpointer_empty;
|
||||
|
||||
struct bch_dev_usage __percpu *usage;
|
||||
|
||||
/* Allocator: */
|
||||
u64 new_fs_bucket_idx;
|
||||
u64 alloc_cursor[3];
|
||||
|
||||
unsigned nr_open_buckets;
|
||||
unsigned nr_partial_buckets;
|
||||
unsigned nr_btree_reserve;
|
||||
|
||||
size_t inc_gen_needs_gc;
|
||||
@ -605,6 +619,7 @@ struct bch_dev {
|
||||
x(going_ro) \
|
||||
x(write_disable_complete) \
|
||||
x(clean_shutdown) \
|
||||
x(recovery_running) \
|
||||
x(fsck_running) \
|
||||
x(initial_gc_unfixed) \
|
||||
x(need_delete_dead_snapshots) \
|
||||
@ -612,7 +627,8 @@ struct bch_dev {
|
||||
x(topology_error) \
|
||||
x(errors_fixed) \
|
||||
x(errors_not_fixed) \
|
||||
x(no_invalid_checks)
|
||||
x(no_invalid_checks) \
|
||||
x(discard_mount_opt_set) \
|
||||
|
||||
enum bch_fs_flags {
|
||||
#define x(n) BCH_FS_##n,
|
||||
@ -649,28 +665,6 @@ struct journal_seq_blacklist_table {
|
||||
} entries[];
|
||||
};
|
||||
|
||||
struct journal_keys {
|
||||
/* must match layout in darray_types.h */
|
||||
size_t nr, size;
|
||||
struct journal_key {
|
||||
u64 journal_seq;
|
||||
u32 journal_offset;
|
||||
enum btree_id btree_id:8;
|
||||
unsigned level:8;
|
||||
bool allocated;
|
||||
bool overwritten;
|
||||
struct bkey_i *k;
|
||||
} *data;
|
||||
/*
|
||||
* Gap buffer: instead of all the empty space in the array being at the
|
||||
* end of the buffer - from @nr to @size - the empty space is at @gap.
|
||||
* This means that sequential insertions are O(n) instead of O(n^2).
|
||||
*/
|
||||
size_t gap;
|
||||
atomic_t ref;
|
||||
bool initial_ref_held;
|
||||
};
|
||||
|
||||
struct btree_trans_buf {
|
||||
struct btree_trans *trans;
|
||||
};
|
||||
@ -679,6 +673,7 @@ struct btree_trans_buf {
|
||||
((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
|
||||
|
||||
#define BCH_WRITE_REFS() \
|
||||
x(journal) \
|
||||
x(trans) \
|
||||
x(write) \
|
||||
x(promote) \
|
||||
@ -691,12 +686,14 @@ struct btree_trans_buf {
|
||||
x(dio_write) \
|
||||
x(discard) \
|
||||
x(discard_fast) \
|
||||
x(check_discard_freespace_key) \
|
||||
x(invalidate) \
|
||||
x(delete_dead_snapshots) \
|
||||
x(gc_gens) \
|
||||
x(snapshot_delete_pagecache) \
|
||||
x(sysfs) \
|
||||
x(btree_write_buffer)
|
||||
x(btree_write_buffer) \
|
||||
x(btree_node_scrub)
|
||||
|
||||
enum bch_write_ref {
|
||||
#define x(n) BCH_WRITE_REF_##n,
|
||||
@ -705,6 +702,8 @@ enum bch_write_ref {
|
||||
BCH_WRITE_REF_NR,
|
||||
};
|
||||
|
||||
#define BCH_FS_DEFAULT_UTF8_ENCODING UNICODE_AGE(12, 1, 0)
|
||||
|
||||
struct bch_fs {
|
||||
struct closure cl;
|
||||
|
||||
@ -733,6 +732,12 @@ struct bch_fs {
|
||||
#else
|
||||
struct percpu_ref writes;
|
||||
#endif
|
||||
/*
|
||||
* Certain operations are only allowed in single threaded mode, during
|
||||
* recovery, and we want to assert that this is the case:
|
||||
*/
|
||||
struct task_struct *recovery_task;
|
||||
|
||||
/*
|
||||
* Analagous to c->writes, for asynchronous ops that don't necessarily
|
||||
* need fs to be read-write
|
||||
@ -763,6 +768,8 @@ struct bch_fs {
|
||||
__uuid_t user_uuid;
|
||||
|
||||
u16 version;
|
||||
u16 version_incompat;
|
||||
u16 version_incompat_allowed;
|
||||
u16 version_min;
|
||||
u16 version_upgrade_complete;
|
||||
|
||||
@ -781,6 +788,9 @@ struct bch_fs {
|
||||
u64 btrees_lost_data;
|
||||
} sb;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
struct unicode_map *cf_encoding;
|
||||
#endif
|
||||
|
||||
struct bch_sb_handle disk_sb;
|
||||
|
||||
@ -833,9 +843,10 @@ struct bch_fs {
|
||||
struct work_struct btree_interior_update_work;
|
||||
|
||||
struct workqueue_struct *btree_node_rewrite_worker;
|
||||
|
||||
struct list_head pending_node_rewrites;
|
||||
struct mutex pending_node_rewrites_lock;
|
||||
struct list_head btree_node_rewrites;
|
||||
struct list_head btree_node_rewrites_pending;
|
||||
spinlock_t btree_node_rewrites_lock;
|
||||
struct closure_waitlist btree_node_rewrites_wait;
|
||||
|
||||
/* btree_io.c: */
|
||||
spinlock_t btree_write_error_lock;
|
||||
@ -966,11 +977,9 @@ struct bch_fs {
|
||||
struct rhashtable promote_table;
|
||||
|
||||
mempool_t compression_bounce[2];
|
||||
mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR];
|
||||
mempool_t decompress_workspace;
|
||||
mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR];
|
||||
size_t zstd_workspace_size;
|
||||
|
||||
struct crypto_shash *sha256;
|
||||
struct crypto_sync_skcipher *chacha20;
|
||||
struct crypto_shash *poly1305;
|
||||
|
||||
@ -994,15 +1003,11 @@ struct bch_fs {
|
||||
wait_queue_head_t copygc_running_wq;
|
||||
|
||||
/* STRIPES: */
|
||||
GENRADIX(struct stripe) stripes;
|
||||
GENRADIX(struct gc_stripe) gc_stripes;
|
||||
|
||||
struct hlist_head ec_stripes_new[32];
|
||||
spinlock_t ec_stripes_new_lock;
|
||||
|
||||
ec_stripes_heap ec_stripes_heap;
|
||||
struct mutex ec_stripes_heap_lock;
|
||||
|
||||
/* ERASURE CODING */
|
||||
struct list_head ec_stripe_head_list;
|
||||
struct mutex ec_stripe_head_lock;
|
||||
@ -1026,6 +1031,7 @@ struct bch_fs {
|
||||
struct list_head vfs_inodes_list;
|
||||
struct mutex vfs_inodes_lock;
|
||||
struct rhashtable vfs_inodes_table;
|
||||
struct rhltable vfs_inodes_by_inum_table;
|
||||
|
||||
/* VFS IO PATH - fs-io.c */
|
||||
struct bio_set writepage_bioset;
|
||||
@ -1047,10 +1053,12 @@ struct bch_fs {
|
||||
* for signaling to the toplevel code which pass we want to run now.
|
||||
*/
|
||||
enum bch_recovery_pass curr_recovery_pass;
|
||||
enum bch_recovery_pass next_recovery_pass;
|
||||
/* bitmask of recovery passes that we actually ran */
|
||||
u64 recovery_passes_complete;
|
||||
/* never rewinds version of curr_recovery_pass */
|
||||
enum bch_recovery_pass recovery_pass_done;
|
||||
spinlock_t recovery_pass_lock;
|
||||
struct semaphore online_fsck_mutex;
|
||||
|
||||
/* DEBUG JUNK */
|
||||
@ -1061,9 +1069,6 @@ struct bch_fs {
|
||||
struct btree_node *verify_ondisk;
|
||||
struct mutex verify_lock;
|
||||
|
||||
u64 *unused_inode_hints;
|
||||
unsigned inode_shard_bits;
|
||||
|
||||
/*
|
||||
* A btree node on disk could have too many bsets for an iterator to fit
|
||||
* on the stack - have to dynamically allocate them
|
||||
@ -1085,8 +1090,6 @@ struct bch_fs {
|
||||
u64 counters_on_mount[BCH_COUNTER_NR];
|
||||
u64 __percpu *counters;
|
||||
|
||||
unsigned copy_gc_enabled:1;
|
||||
|
||||
struct bch2_time_stats times[BCH_TIME_STAT_NR];
|
||||
|
||||
struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
|
||||
|
@ -418,7 +418,8 @@ static inline void bkey_init(struct bkey *k)
|
||||
x(snapshot_tree, 31) \
|
||||
x(logged_op_truncate, 32) \
|
||||
x(logged_op_finsert, 33) \
|
||||
x(accounting, 34)
|
||||
x(accounting, 34) \
|
||||
x(inode_alloc_cursor, 35)
|
||||
|
||||
enum bch_bkey_type {
|
||||
#define x(name, nr) KEY_TYPE_##name = nr,
|
||||
@ -463,7 +464,8 @@ struct bch_backpointer {
|
||||
__u8 btree_id;
|
||||
__u8 level;
|
||||
__u8 data_type;
|
||||
__u64 bucket_offset:40;
|
||||
__u8 bucket_gen;
|
||||
__u32 pad;
|
||||
__u32 bucket_len;
|
||||
struct bpos pos;
|
||||
} __packed __aligned(8);
|
||||
@ -499,8 +501,6 @@ struct bch_sb_field {
|
||||
#include "disk_groups_format.h"
|
||||
#include "extents_format.h"
|
||||
#include "ec_format.h"
|
||||
#include "dirent_format.h"
|
||||
#include "disk_groups_format.h"
|
||||
#include "inode_format.h"
|
||||
#include "journal_seq_blacklist_format.h"
|
||||
#include "logged_ops_format.h"
|
||||
@ -679,7 +679,19 @@ struct bch_sb_field_ext {
|
||||
x(disk_accounting_v3, BCH_VERSION(1, 10)) \
|
||||
x(disk_accounting_inum, BCH_VERSION(1, 11)) \
|
||||
x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \
|
||||
x(inode_has_child_snapshots, BCH_VERSION(1, 13))
|
||||
x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \
|
||||
x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \
|
||||
x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \
|
||||
x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \
|
||||
x(inode_depth, BCH_VERSION(1, 17)) \
|
||||
x(persistent_inode_cursors, BCH_VERSION(1, 18)) \
|
||||
x(autofix_errors, BCH_VERSION(1, 19)) \
|
||||
x(directory_size, BCH_VERSION(1, 20)) \
|
||||
x(cached_backpointers, BCH_VERSION(1, 21)) \
|
||||
x(stripe_backpointers, BCH_VERSION(1, 22)) \
|
||||
x(stripe_lru, BCH_VERSION(1, 23)) \
|
||||
x(casefolding, BCH_VERSION(1, 24)) \
|
||||
x(extent_flags, BCH_VERSION(1, 25))
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
@ -830,6 +842,7 @@ LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29);
|
||||
LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
|
||||
/* one free bit */
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
|
||||
LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
|
||||
@ -844,6 +857,12 @@ LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
|
||||
struct bch_sb, flags[5], 0, 16);
|
||||
LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT,
|
||||
struct bch_sb, flags[5], 16, 32);
|
||||
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48);
|
||||
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
|
||||
struct bch_sb, flags[5], 48, 64);
|
||||
LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
|
||||
LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14);
|
||||
LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20);
|
||||
|
||||
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
|
||||
{
|
||||
@ -896,21 +915,24 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u
|
||||
x(new_varint, 15) \
|
||||
x(journal_no_flush, 16) \
|
||||
x(alloc_v2, 17) \
|
||||
x(extents_across_btree_nodes, 18)
|
||||
x(extents_across_btree_nodes, 18) \
|
||||
x(incompat_version_field, 19) \
|
||||
x(casefolding, 20)
|
||||
|
||||
#define BCH_SB_FEATURES_ALWAYS \
|
||||
((1ULL << BCH_FEATURE_new_extent_overwrite)| \
|
||||
(1ULL << BCH_FEATURE_extents_above_btree_updates)|\
|
||||
(1ULL << BCH_FEATURE_btree_updates_journalled)|\
|
||||
(1ULL << BCH_FEATURE_alloc_v2)|\
|
||||
(1ULL << BCH_FEATURE_extents_across_btree_nodes))
|
||||
(BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \
|
||||
BIT_ULL(BCH_FEATURE_extents_above_btree_updates)|\
|
||||
BIT_ULL(BCH_FEATURE_btree_updates_journalled)|\
|
||||
BIT_ULL(BCH_FEATURE_alloc_v2)|\
|
||||
BIT_ULL(BCH_FEATURE_extents_across_btree_nodes))
|
||||
|
||||
#define BCH_SB_FEATURES_ALL \
|
||||
(BCH_SB_FEATURES_ALWAYS| \
|
||||
(1ULL << BCH_FEATURE_new_siphash)| \
|
||||
(1ULL << BCH_FEATURE_btree_ptr_v2)| \
|
||||
(1ULL << BCH_FEATURE_new_varint)| \
|
||||
(1ULL << BCH_FEATURE_journal_no_flush))
|
||||
BIT_ULL(BCH_FEATURE_new_siphash)| \
|
||||
BIT_ULL(BCH_FEATURE_btree_ptr_v2)| \
|
||||
BIT_ULL(BCH_FEATURE_new_varint)| \
|
||||
BIT_ULL(BCH_FEATURE_journal_no_flush)| \
|
||||
BIT_ULL(BCH_FEATURE_incompat_version_field))
|
||||
|
||||
enum bch_sb_feature {
|
||||
#define x(f, n) BCH_FEATURE_##f,
|
||||
@ -1032,7 +1054,7 @@ static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type)
|
||||
x(crc64, 2) \
|
||||
x(xxhash, 3)
|
||||
|
||||
enum bch_csum_opts {
|
||||
enum bch_csum_opt {
|
||||
#define x(t, n) BCH_CSUM_OPT_##t = n,
|
||||
BCH_CSUM_OPTS()
|
||||
#undef x
|
||||
@ -1121,7 +1143,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
|
||||
x(log, 9) \
|
||||
x(overwrite, 10) \
|
||||
x(write_buffer_keys, 11) \
|
||||
x(datetime, 12)
|
||||
x(datetime, 12) \
|
||||
x(log_bkey, 13)
|
||||
|
||||
enum bch_jset_entry_type {
|
||||
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
|
||||
@ -1221,6 +1244,15 @@ struct jset_entry_log {
|
||||
u8 d[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
static inline unsigned jset_entry_log_msg_bytes(struct jset_entry_log *l)
|
||||
{
|
||||
unsigned b = vstruct_bytes(&l->entry) - offsetof(struct jset_entry_log, d);
|
||||
|
||||
while (b && !l->d[b - 1])
|
||||
--b;
|
||||
return b;
|
||||
}
|
||||
|
||||
struct jset_entry_datetime {
|
||||
struct jset_entry entry;
|
||||
__le64 seconds;
|
||||
@ -1268,14 +1300,18 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6);
|
||||
/* Btree: */
|
||||
|
||||
enum btree_id_flags {
|
||||
BTREE_ID_EXTENTS = BIT(0),
|
||||
BTREE_ID_SNAPSHOTS = BIT(1),
|
||||
BTREE_ID_SNAPSHOT_FIELD = BIT(2),
|
||||
BTREE_ID_DATA = BIT(3),
|
||||
BTREE_IS_extents = BIT(0),
|
||||
BTREE_IS_snapshots = BIT(1),
|
||||
BTREE_IS_snapshot_field = BIT(2),
|
||||
BTREE_IS_data = BIT(3),
|
||||
BTREE_IS_write_buffer = BIT(4),
|
||||
};
|
||||
|
||||
#define BCH_BTREE_IDS() \
|
||||
x(extents, 0, BTREE_ID_EXTENTS|BTREE_ID_SNAPSHOTS|BTREE_ID_DATA,\
|
||||
x(extents, 0, \
|
||||
BTREE_IS_extents| \
|
||||
BTREE_IS_snapshots| \
|
||||
BTREE_IS_data, \
|
||||
BIT_ULL(KEY_TYPE_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_error)| \
|
||||
BIT_ULL(KEY_TYPE_cookie)| \
|
||||
@ -1283,17 +1319,20 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_reservation)| \
|
||||
BIT_ULL(KEY_TYPE_reflink_p)| \
|
||||
BIT_ULL(KEY_TYPE_inline_data)) \
|
||||
x(inodes, 1, BTREE_ID_SNAPSHOTS, \
|
||||
x(inodes, 1, \
|
||||
BTREE_IS_snapshots, \
|
||||
BIT_ULL(KEY_TYPE_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_inode)| \
|
||||
BIT_ULL(KEY_TYPE_inode_v2)| \
|
||||
BIT_ULL(KEY_TYPE_inode_v3)| \
|
||||
BIT_ULL(KEY_TYPE_inode_generation)) \
|
||||
x(dirents, 2, BTREE_ID_SNAPSHOTS, \
|
||||
x(dirents, 2, \
|
||||
BTREE_IS_snapshots, \
|
||||
BIT_ULL(KEY_TYPE_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_hash_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_dirent)) \
|
||||
x(xattrs, 3, BTREE_ID_SNAPSHOTS, \
|
||||
x(xattrs, 3, \
|
||||
BTREE_IS_snapshots, \
|
||||
BIT_ULL(KEY_TYPE_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_cookie)| \
|
||||
BIT_ULL(KEY_TYPE_hash_whiteout)| \
|
||||
@ -1307,7 +1346,9 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_quota)) \
|
||||
x(stripes, 6, 0, \
|
||||
BIT_ULL(KEY_TYPE_stripe)) \
|
||||
x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \
|
||||
x(reflink, 7, \
|
||||
BTREE_IS_extents| \
|
||||
BTREE_IS_data, \
|
||||
BIT_ULL(KEY_TYPE_reflink_v)| \
|
||||
BIT_ULL(KEY_TYPE_indirect_inline_data)| \
|
||||
BIT_ULL(KEY_TYPE_error)) \
|
||||
@ -1315,28 +1356,38 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_subvolume)) \
|
||||
x(snapshots, 9, 0, \
|
||||
BIT_ULL(KEY_TYPE_snapshot)) \
|
||||
x(lru, 10, 0, \
|
||||
x(lru, 10, \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(freespace, 11, BTREE_ID_EXTENTS, \
|
||||
x(freespace, 11, \
|
||||
BTREE_IS_extents, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(need_discard, 12, 0, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(backpointers, 13, 0, \
|
||||
x(backpointers, 13, \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_backpointer)) \
|
||||
x(bucket_gens, 14, 0, \
|
||||
BIT_ULL(KEY_TYPE_bucket_gens)) \
|
||||
x(snapshot_trees, 15, 0, \
|
||||
BIT_ULL(KEY_TYPE_snapshot_tree)) \
|
||||
x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
x(deleted_inodes, 16, \
|
||||
BTREE_IS_snapshot_field| \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(logged_ops, 17, 0, \
|
||||
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
|
||||
BIT_ULL(KEY_TYPE_logged_op_finsert)) \
|
||||
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
BIT_ULL(KEY_TYPE_logged_op_finsert)| \
|
||||
BIT_ULL(KEY_TYPE_inode_alloc_cursor)) \
|
||||
x(rebalance_work, 18, \
|
||||
BTREE_IS_snapshot_field| \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \
|
||||
x(subvolume_children, 19, 0, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(accounting, 20, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
x(accounting, 20, \
|
||||
BTREE_IS_snapshot_field| \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_accounting)) \
|
||||
|
||||
enum btree_id {
|
||||
@ -1361,6 +1412,8 @@ static inline bool btree_id_is_alloc(enum btree_id id)
|
||||
case BTREE_ID_need_discard:
|
||||
case BTREE_ID_freespace:
|
||||
case BTREE_ID_bucket_gens:
|
||||
case BTREE_ID_lru:
|
||||
case BTREE_ID_accounting:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -87,6 +87,7 @@ struct bch_ioctl_incremental {
|
||||
#define BCH_IOCTL_FSCK_OFFLINE _IOW(0xbc, 19, struct bch_ioctl_fsck_offline)
|
||||
#define BCH_IOCTL_FSCK_ONLINE _IOW(0xbc, 20, struct bch_ioctl_fsck_online)
|
||||
#define BCH_IOCTL_QUERY_ACCOUNTING _IOW(0xbc, 21, struct bch_ioctl_query_accounting)
|
||||
#define BCH_IOCTL_QUERY_COUNTERS _IOW(0xbc, 21, struct bch_ioctl_query_counters)
|
||||
|
||||
/* ioctl below act on a particular file, not the filesystem as a whole: */
|
||||
|
||||
@ -213,6 +214,10 @@ struct bch_ioctl_data {
|
||||
struct bpos end_pos;
|
||||
|
||||
union {
|
||||
struct {
|
||||
__u32 dev;
|
||||
__u32 data_types;
|
||||
} scrub;
|
||||
struct {
|
||||
__u32 dev;
|
||||
__u32 pad;
|
||||
@ -229,6 +234,11 @@ enum bch_data_event {
|
||||
BCH_DATA_EVENT_NR = 1,
|
||||
};
|
||||
|
||||
enum data_progress_data_type_special {
|
||||
DATA_PROGRESS_DATA_TYPE_phys = 254,
|
||||
DATA_PROGRESS_DATA_TYPE_done = 255,
|
||||
};
|
||||
|
||||
struct bch_ioctl_data_progress {
|
||||
__u8 data_type;
|
||||
__u8 btree_id;
|
||||
@ -237,11 +247,19 @@ struct bch_ioctl_data_progress {
|
||||
|
||||
__u64 sectors_done;
|
||||
__u64 sectors_total;
|
||||
__u64 sectors_error_corrected;
|
||||
__u64 sectors_error_uncorrected;
|
||||
} __packed __aligned(8);
|
||||
|
||||
enum bch_ioctl_data_event_ret {
|
||||
BCH_IOCTL_DATA_EVENT_RET_done = 1,
|
||||
BCH_IOCTL_DATA_EVENT_RET_device_offline = 2,
|
||||
};
|
||||
|
||||
struct bch_ioctl_data_event {
|
||||
__u8 type;
|
||||
__u8 pad[7];
|
||||
__u8 ret;
|
||||
__u8 pad[6];
|
||||
union {
|
||||
struct bch_ioctl_data_progress p;
|
||||
__u64 pad2[15];
|
||||
@ -443,4 +461,13 @@ struct bch_ioctl_query_accounting {
|
||||
struct bkey_i_accounting accounting[];
|
||||
};
|
||||
|
||||
#define BCH_IOCTL_QUERY_COUNTERS_MOUNT (1 << 0)
|
||||
|
||||
struct bch_ioctl_query_counters {
|
||||
__u16 nr;
|
||||
__u16 flags;
|
||||
__u32 pad;
|
||||
__u64 d[];
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_IOCTL_H */
|
||||
|
@ -643,7 +643,7 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
|
||||
enum bch_validate_flags flags,
|
||||
struct printbuf *err)
|
||||
{
|
||||
unsigned i, bits = KEY_PACKED_BITS_START;
|
||||
unsigned bits = KEY_PACKED_BITS_START;
|
||||
|
||||
if (f->nr_fields != BKEY_NR_FIELDS) {
|
||||
prt_printf(err, "incorrect number of fields: got %u, should be %u",
|
||||
@ -655,9 +655,8 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
|
||||
* Verify that the packed format can't represent fields larger than the
|
||||
* unpacked format:
|
||||
*/
|
||||
for (i = 0; i < f->nr_fields; i++) {
|
||||
if ((!c || c->sb.version_min >= bcachefs_metadata_version_snapshot) &&
|
||||
bch2_bkey_format_field_overflows(f, i)) {
|
||||
for (unsigned i = 0; i < f->nr_fields; i++) {
|
||||
if (bch2_bkey_format_field_overflows(f, i)) {
|
||||
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
|
||||
u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
|
||||
unsigned packed_bits = min(64, f->bits_per_field[i]);
|
||||
|
@ -9,13 +9,6 @@
|
||||
#include "util.h"
|
||||
#include "vstructs.h"
|
||||
|
||||
enum bch_validate_flags {
|
||||
BCH_VALIDATE_write = BIT(0),
|
||||
BCH_VALIDATE_commit = BIT(1),
|
||||
BCH_VALIDATE_journal = BIT(2),
|
||||
BCH_VALIDATE_silent = BIT(3),
|
||||
};
|
||||
|
||||
#if 0
|
||||
|
||||
/*
|
||||
|
@ -28,7 +28,7 @@ const char * const bch2_bkey_types[] = {
|
||||
};
|
||||
|
||||
static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -42,7 +42,7 @@ static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
})
|
||||
|
||||
static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
@ -59,7 +59,7 @@ fsck_err:
|
||||
})
|
||||
|
||||
static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -83,7 +83,7 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
})
|
||||
|
||||
static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -124,7 +124,7 @@ const struct bkey_ops bch2_bkey_null_ops = {
|
||||
};
|
||||
|
||||
int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
if (test_bit(BCH_FS_no_invalid_checks, &c->flags))
|
||||
return 0;
|
||||
@ -140,7 +140,7 @@ int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
if (!ops->key_validate)
|
||||
return 0;
|
||||
|
||||
ret = ops->key_validate(c, k, flags);
|
||||
ret = ops->key_validate(c, k, from);
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
@ -161,9 +161,10 @@ const char *bch2_btree_node_type_str(enum btree_node_type type)
|
||||
}
|
||||
|
||||
int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum btree_node_type type,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
enum btree_node_type type = __btree_node_type(from.level, from.btree);
|
||||
|
||||
if (test_bit(BCH_FS_no_invalid_checks, &c->flags))
|
||||
return 0;
|
||||
|
||||
@ -177,7 +178,7 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
return 0;
|
||||
|
||||
bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX &&
|
||||
(type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) &&
|
||||
(type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) &&
|
||||
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)),
|
||||
c, bkey_invalid_type_for_btree,
|
||||
"invalid key type for btree %s (%s)",
|
||||
@ -228,15 +229,15 @@ fsck_err:
|
||||
}
|
||||
|
||||
int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum btree_node_type type,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
return __bch2_bkey_validate(c, k, type, flags) ?:
|
||||
bch2_bkey_val_validate(c, k, flags);
|
||||
return __bch2_bkey_validate(c, k, from) ?:
|
||||
bch2_bkey_val_validate(c, k, from);
|
||||
}
|
||||
|
||||
int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_s_c k, enum bch_validate_flags flags)
|
||||
struct bkey_s_c k,
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
|
@ -22,7 +22,7 @@ extern const struct bkey_ops bch2_bkey_null_ops;
|
||||
*/
|
||||
struct bkey_ops {
|
||||
int (*key_validate)(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags);
|
||||
struct bkey_validate_context from);
|
||||
void (*val_to_text)(struct printbuf *, struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
void (*swab)(struct bkey_s);
|
||||
@ -48,13 +48,14 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type)
|
||||
: &bch2_bkey_null_ops;
|
||||
}
|
||||
|
||||
int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
|
||||
int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
|
||||
enum bch_validate_flags);
|
||||
int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
|
||||
enum bch_validate_flags);
|
||||
int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c,
|
||||
enum bch_validate_flags);
|
||||
struct bkey_validate_context from);
|
||||
|
||||
void bch2_bpos_to_text(struct printbuf *, struct bpos);
|
||||
void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
|
||||
|
@ -210,4 +210,32 @@ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
|
||||
BCH_BKEY_TYPES();
|
||||
#undef x
|
||||
|
||||
enum bch_validate_flags {
|
||||
BCH_VALIDATE_write = BIT(0),
|
||||
BCH_VALIDATE_commit = BIT(1),
|
||||
BCH_VALIDATE_silent = BIT(2),
|
||||
};
|
||||
|
||||
#define BKEY_VALIDATE_CONTEXTS() \
|
||||
x(unknown) \
|
||||
x(superblock) \
|
||||
x(journal) \
|
||||
x(btree_root) \
|
||||
x(btree_node) \
|
||||
x(commit)
|
||||
|
||||
struct bkey_validate_context {
|
||||
enum {
|
||||
#define x(n) BKEY_VALIDATE_##n,
|
||||
BKEY_VALIDATE_CONTEXTS()
|
||||
#undef x
|
||||
} from:8;
|
||||
enum bch_validate_flags flags:8;
|
||||
u8 level;
|
||||
enum btree_id btree;
|
||||
bool root:1;
|
||||
unsigned journal_offset;
|
||||
u64 journal_seq;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_BKEY_TYPES_H */
|
||||
|
@ -13,7 +13,7 @@
|
||||
#include "trace.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/unaligned.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/prefetch.h>
|
||||
|
@ -24,7 +24,10 @@ do { \
|
||||
} while (0)
|
||||
|
||||
const char * const bch2_btree_node_flags[] = {
|
||||
#define x(f) #f,
|
||||
"typebit",
|
||||
"typebit",
|
||||
"typebit",
|
||||
#define x(f) [BTREE_NODE_##f] = #f,
|
||||
BTREE_FLAGS()
|
||||
#undef x
|
||||
NULL
|
||||
@ -59,16 +62,38 @@ static inline size_t btree_cache_can_free(struct btree_cache_list *list)
|
||||
|
||||
static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
|
||||
if (b->c.lock.readers)
|
||||
list_move(&b->list, &bc->freed_pcpu);
|
||||
list_add(&b->list, &bc->freed_pcpu);
|
||||
else
|
||||
list_move(&b->list, &bc->freed_nonpcpu);
|
||||
list_add(&b->list, &bc->freed_nonpcpu);
|
||||
}
|
||||
|
||||
static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
BUG_ON(!b->data);
|
||||
|
||||
bc->nr_freeable++;
|
||||
list_add(&b->list, &bc->freeable);
|
||||
}
|
||||
|
||||
void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
__bch2_btree_node_to_freelist(bc, b);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
}
|
||||
|
||||
static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
BUG_ON(btree_node_hashed(b));
|
||||
|
||||
/*
|
||||
@ -94,11 +119,17 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
#endif
|
||||
b->aux_data = NULL;
|
||||
|
||||
bc->nr_freeable--;
|
||||
|
||||
btree_node_to_freedlist(bc, b);
|
||||
}
|
||||
|
||||
static void btree_node_data_free(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(list_empty(&b->list));
|
||||
list_del_init(&b->list);
|
||||
--bc->nr_freeable;
|
||||
__btree_node_data_free(bc, b);
|
||||
}
|
||||
|
||||
static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
|
||||
const void *obj)
|
||||
{
|
||||
@ -172,23 +203,12 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bch2_btree_lock_init(&b->c, 0);
|
||||
bch2_btree_lock_init(&b->c, 0, GFP_KERNEL);
|
||||
|
||||
bc->nr_freeable++;
|
||||
list_add(&b->list, &bc->freeable);
|
||||
__bch2_btree_node_to_freelist(bc, b);
|
||||
return b;
|
||||
}
|
||||
|
||||
void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
list_move(&b->list, &c->btree_cache.freeable);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
}
|
||||
|
||||
static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
|
||||
@ -205,7 +225,6 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b)
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
BUG_ON(!__btree_node_pinned(bc, b));
|
||||
if (b != btree_node_root(c, b) && !btree_node_pinned(b)) {
|
||||
set_btree_node_pinned(b);
|
||||
list_move(&b->list, &bc->live[1].list);
|
||||
@ -236,11 +255,11 @@ void bch2_btree_cache_unpin(struct bch_fs *c)
|
||||
|
||||
/* Btree in memory cache - hash table */
|
||||
|
||||
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
lockdep_assert_held(&bc->lock);
|
||||
int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
|
||||
|
||||
int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
|
||||
BUG_ON(ret);
|
||||
|
||||
/* Cause future lookups for this node to fail: */
|
||||
@ -248,17 +267,22 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
|
||||
if (b->c.btree_id < BTREE_ID_NR)
|
||||
--bc->nr_by_btree[b->c.btree_id];
|
||||
--bc->live[btree_node_pinned(b)].nr;
|
||||
list_del_init(&b->list);
|
||||
}
|
||||
|
||||
bc->live[btree_node_pinned(b)].nr--;
|
||||
bc->nr_freeable++;
|
||||
list_move(&b->list, &bc->freeable);
|
||||
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
__bch2_btree_node_hash_remove(bc, b);
|
||||
__bch2_btree_node_to_freelist(bc, b);
|
||||
}
|
||||
|
||||
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
BUG_ON(b->hash_val);
|
||||
b->hash_val = btree_ptr_hash_val(&b->key);
|
||||
|
||||
b->hash_val = btree_ptr_hash_val(&b->key);
|
||||
int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash,
|
||||
bch_btree_cache_params);
|
||||
if (ret)
|
||||
@ -270,10 +294,8 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
||||
bool p = __btree_node_pinned(bc, b);
|
||||
mod_bit(BTREE_NODE_pinned, &b->flags, p);
|
||||
|
||||
list_move_tail(&b->list, &bc->live[p].list);
|
||||
list_add_tail(&b->list, &bc->live[p].list);
|
||||
bc->live[p].nr++;
|
||||
|
||||
bc->nr_freeable--;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -306,7 +328,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans,
|
||||
if (!IS_ERR_OR_NULL(b)) {
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
__bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
bkey_copy(&b->key, new);
|
||||
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
|
||||
@ -485,7 +507,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||
goto out;
|
||||
|
||||
if (!btree_node_reclaim(c, b, true)) {
|
||||
btree_node_data_free(c, b);
|
||||
btree_node_data_free(bc, b);
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
freed++;
|
||||
@ -501,10 +523,10 @@ restart:
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++;
|
||||
--touched;;
|
||||
} else if (!btree_node_reclaim(c, b, true)) {
|
||||
bch2_btree_node_hash_remove(bc, b);
|
||||
__bch2_btree_node_hash_remove(bc, b);
|
||||
__btree_node_data_free(bc, b);
|
||||
|
||||
freed++;
|
||||
btree_node_data_free(c, b);
|
||||
bc->nr_freed++;
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
@ -587,7 +609,8 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
BUG_ON(btree_node_read_in_flight(b) ||
|
||||
btree_node_write_in_flight(b));
|
||||
|
||||
btree_node_data_free(c, b);
|
||||
btree_node_data_free(bc, b);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
BUG_ON(!bch2_journal_error(&c->journal) &&
|
||||
@ -773,21 +796,22 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
|
||||
}
|
||||
|
||||
b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN);
|
||||
if (!b) {
|
||||
if (b) {
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_NOWAIT);
|
||||
} else {
|
||||
mutex_unlock(&bc->lock);
|
||||
bch2_trans_unlock(trans);
|
||||
b = __btree_node_mem_alloc(c, GFP_KERNEL);
|
||||
if (!b)
|
||||
goto err;
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL);
|
||||
mutex_lock(&bc->lock);
|
||||
}
|
||||
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0);
|
||||
|
||||
BUG_ON(!six_trylock_intent(&b->c.lock));
|
||||
BUG_ON(!six_trylock_write(&b->c.lock));
|
||||
got_node:
|
||||
|
||||
got_node:
|
||||
/*
|
||||
* btree_free() doesn't free memory; it sticks the node on the end of
|
||||
* the list. Check if there's any freed nodes there:
|
||||
@ -796,7 +820,12 @@ got_node:
|
||||
if (!btree_node_reclaim(c, b2, false)) {
|
||||
swap(b->data, b2->data);
|
||||
swap(b->aux_data, b2->aux_data);
|
||||
|
||||
list_del_init(&b2->list);
|
||||
--bc->nr_freeable;
|
||||
btree_node_to_freedlist(bc, b2);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
six_unlock_write(&b2->c.lock);
|
||||
six_unlock_intent(&b2->c.lock);
|
||||
goto got_mem;
|
||||
@ -810,11 +839,8 @@ got_node:
|
||||
goto err;
|
||||
}
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
bc->nr_freeable++;
|
||||
got_mem:
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
BUG_ON(btree_node_hashed(b));
|
||||
BUG_ON(btree_node_dirty(b));
|
||||
BUG_ON(btree_node_write_in_flight(b));
|
||||
@ -845,7 +871,7 @@ err:
|
||||
if (bc->alloc_lock == current) {
|
||||
b2 = btree_node_cannibalize(c);
|
||||
clear_btree_node_just_written(b2);
|
||||
bch2_btree_node_hash_remove(bc, b2);
|
||||
__bch2_btree_node_hash_remove(bc, b2);
|
||||
|
||||
if (b) {
|
||||
swap(b->data, b2->data);
|
||||
@ -855,9 +881,9 @@ err:
|
||||
six_unlock_intent(&b2->c.lock);
|
||||
} else {
|
||||
b = b2;
|
||||
list_del_init(&b->list);
|
||||
}
|
||||
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
trace_and_count(c, btree_cache_cannibalize, trans);
|
||||
@ -936,7 +962,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
|
||||
b->hash_val = 0;
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
list_add(&b->list, &bc->freeable);
|
||||
__bch2_btree_node_to_freelist(bc, b);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
@ -982,16 +1008,14 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
|
||||
return;
|
||||
|
||||
prt_printf(&buf,
|
||||
"btree node header doesn't match ptr\n"
|
||||
"btree %s level %u\n"
|
||||
"ptr: ",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level);
|
||||
"btree node header doesn't match ptr: ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_str(&buf, "\nptr: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
prt_printf(&buf, "\nheader: btree %s level %llu\n"
|
||||
"min ",
|
||||
bch2_btree_id_str(BTREE_NODE_ID(b->data)),
|
||||
BTREE_NODE_LEVEL(b->data));
|
||||
prt_str(&buf, "\nheader: ");
|
||||
bch2_btree_id_level_to_text(&buf, BTREE_NODE_ID(b->data), BTREE_NODE_LEVEL(b->data));
|
||||
prt_str(&buf, "\nmin ");
|
||||
bch2_bpos_to_text(&buf, b->data->min_key);
|
||||
|
||||
prt_printf(&buf, "\nmax ");
|
||||
@ -1111,7 +1135,7 @@ retry:
|
||||
|
||||
if (unlikely(btree_node_read_error(b))) {
|
||||
six_unlock_type(&b->c.lock, lock_type);
|
||||
return ERR_PTR(-BCH_ERR_btree_node_read_error);
|
||||
return ERR_PTR(-BCH_ERR_btree_node_read_err_cached);
|
||||
}
|
||||
|
||||
EBUG_ON(b->c.btree_id != path->btree_id);
|
||||
@ -1201,7 +1225,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
|
||||
|
||||
if (unlikely(btree_node_read_error(b))) {
|
||||
six_unlock_type(&b->c.lock, lock_type);
|
||||
return ERR_PTR(-BCH_ERR_btree_node_read_error);
|
||||
return ERR_PTR(-BCH_ERR_btree_node_read_err_cached);
|
||||
}
|
||||
|
||||
EBUG_ON(b->c.btree_id != path->btree_id);
|
||||
@ -1283,7 +1307,7 @@ lock_node:
|
||||
|
||||
if (unlikely(btree_node_read_error(b))) {
|
||||
six_unlock_read(&b->c.lock);
|
||||
b = ERR_PTR(-BCH_ERR_btree_node_read_error);
|
||||
b = ERR_PTR(-BCH_ERR_btree_node_read_err_cached);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1312,9 +1336,12 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
|
||||
|
||||
b = bch2_btree_node_fill(trans, path, k, btree_id,
|
||||
level, SIX_LOCK_read, false);
|
||||
if (!IS_ERR_OR_NULL(b))
|
||||
int ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (b)
|
||||
six_unlock_read(&b->c.lock);
|
||||
return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
|
||||
@ -1353,7 +1380,7 @@ wait_on_io:
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
bch2_btree_node_hash_remove(bc, b);
|
||||
btree_node_data_free(c, b);
|
||||
btree_node_data_free(bc, b);
|
||||
mutex_unlock(&bc->lock);
|
||||
out:
|
||||
six_unlock_write(&b->c.lock);
|
||||
@ -1373,13 +1400,31 @@ void bch2_btree_id_to_text(struct printbuf *out, enum btree_id btree)
|
||||
prt_printf(out, "(unknown btree %u)", btree);
|
||||
}
|
||||
|
||||
void bch2_btree_id_level_to_text(struct printbuf *out, enum btree_id btree, unsigned level)
|
||||
{
|
||||
prt_str(out, "btree=");
|
||||
bch2_btree_id_to_text(out, btree);
|
||||
prt_printf(out, " level=%u", level);
|
||||
}
|
||||
|
||||
void __bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
enum btree_id btree, unsigned level, struct bkey_s_c k)
|
||||
{
|
||||
bch2_btree_id_to_text(out, btree);
|
||||
prt_printf(out, " level %u/", level);
|
||||
struct btree_root *r = bch2_btree_id_root(c, btree);
|
||||
if (r)
|
||||
prt_printf(out, "%u", r->level);
|
||||
else
|
||||
prt_printf(out, "(unknown)");
|
||||
prt_newline(out);
|
||||
|
||||
bch2_bkey_val_to_text(out, c, k);
|
||||
}
|
||||
|
||||
void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
|
||||
{
|
||||
prt_printf(out, "%s level %u/%u\n ",
|
||||
bch2_btree_id_str(b->c.btree_id),
|
||||
b->c.level,
|
||||
bch2_btree_id_root(c, b->c.btree_id)->level);
|
||||
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
|
||||
__bch2_btree_pos_to_text(out, c, b->c.btree_id, b->c.level, bkey_i_to_s_c(&b->key));
|
||||
}
|
||||
|
||||
void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
|
||||
@ -1453,8 +1498,12 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc
|
||||
prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
|
||||
prt_newline(out);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++)
|
||||
prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->nr_by_btree[i]);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) {
|
||||
bch2_btree_id_to_text(out, i);
|
||||
prt_printf(out, "\t");
|
||||
prt_human_readable_u64(out, bc->nr_by_btree[i] * c->opts.btree_node_size);
|
||||
prt_printf(out, " (%zu)\n", bc->nr_by_btree[i]);
|
||||
}
|
||||
|
||||
prt_newline(out);
|
||||
prt_printf(out, "freed:\t%zu\n", bc->nr_freed);
|
||||
|
@ -14,7 +14,9 @@ void bch2_recalc_btree_reserve(struct bch_fs *);
|
||||
|
||||
void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *);
|
||||
|
||||
void __bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
|
||||
void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
|
||||
|
||||
int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
|
||||
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
|
||||
unsigned, enum btree_id);
|
||||
@ -126,19 +128,27 @@ static inline struct btree_root *bch2_btree_id_root(struct bch_fs *c, unsigned i
|
||||
} else {
|
||||
unsigned idx = id - BTREE_ID_NR;
|
||||
|
||||
EBUG_ON(idx >= c->btree_roots_extra.nr);
|
||||
/* This can happen when we're called from btree_node_scan */
|
||||
if (idx >= c->btree_roots_extra.nr)
|
||||
return NULL;
|
||||
|
||||
return &c->btree_roots_extra.data[idx];
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
return bch2_btree_id_root(c, b->c.btree_id)->b;
|
||||
struct btree_root *r = bch2_btree_id_root(c, b->c.btree_id);
|
||||
|
||||
return r ? r->b : NULL;
|
||||
}
|
||||
|
||||
const char *bch2_btree_id_str(enum btree_id);
|
||||
const char *bch2_btree_id_str(enum btree_id); /* avoid */
|
||||
void bch2_btree_id_to_text(struct printbuf *, enum btree_id);
|
||||
void bch2_btree_id_level_to_text(struct printbuf *, enum btree_id, unsigned);
|
||||
|
||||
void __bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *,
|
||||
enum btree_id, unsigned, struct bkey_s_c);
|
||||
void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
|
||||
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
|
||||
void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
|
||||
|
@ -27,8 +27,10 @@
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "progress.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "reflink.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
#include "trace.h"
|
||||
@ -56,8 +58,8 @@ void bch2_gc_pos_to_text(struct printbuf *out, struct gc_pos *p)
|
||||
{
|
||||
prt_str(out, bch2_gc_phase_strs[p->phase]);
|
||||
prt_char(out, ' ');
|
||||
bch2_btree_id_to_text(out, p->btree);
|
||||
prt_printf(out, " l=%u ", p->level);
|
||||
bch2_btree_id_level_to_text(out, p->btree, p->level);
|
||||
prt_char(out, ' ');
|
||||
bch2_bpos_to_text(out, p->pos);
|
||||
}
|
||||
|
||||
@ -182,7 +184,7 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
|
||||
bch2_btree_node_drop_keys_outside_node(b);
|
||||
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
__bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
bkey_copy(&b->key, &new->k_i);
|
||||
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
|
||||
@ -209,16 +211,17 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree *
|
||||
if (bpos_eq(expected_start, cur->data->min_key))
|
||||
return 0;
|
||||
|
||||
prt_printf(&buf, " at btree %s level %u:\n parent: ",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level);
|
||||
prt_printf(&buf, " at ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_printf(&buf, ":\nparent: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
if (prev) {
|
||||
prt_printf(&buf, "\n prev: ");
|
||||
prt_printf(&buf, "\nprev: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key));
|
||||
}
|
||||
|
||||
prt_str(&buf, "\n next: ");
|
||||
prt_str(&buf, "\nnext: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key));
|
||||
|
||||
if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */
|
||||
@ -277,11 +280,12 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b,
|
||||
if (bpos_eq(child->key.k.p, b->key.k.p))
|
||||
return 0;
|
||||
|
||||
prt_printf(&buf, "at btree %s level %u:\n parent: ",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level);
|
||||
prt_printf(&buf, "\nat: ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_printf(&buf, "\nparent: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
prt_str(&buf, "\n child: ");
|
||||
prt_str(&buf, "\nchild: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key));
|
||||
|
||||
if (mustfix_fsck_err(trans, btree_node_topology_bad_max_key,
|
||||
@ -341,14 +345,13 @@ again:
|
||||
ret = PTR_ERR_OR_ZERO(cur);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level - 1);
|
||||
prt_char(&buf, ' ');
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
|
||||
|
||||
if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
|
||||
trans, btree_node_unreadable,
|
||||
"Topology repair: unreadable btree node at btree %s level %u:\n"
|
||||
" %s",
|
||||
bch2_btree_id_str(b->c.btree_id),
|
||||
b->c.level - 1,
|
||||
trans, btree_node_read_error,
|
||||
"Topology repair: unreadable btree node at\n%s",
|
||||
buf.buf)) {
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
cur = NULL;
|
||||
@ -357,11 +360,9 @@ again:
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (!btree_id_is_alloc(b->c.btree_id)) {
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
ret = bch2_btree_lost_data(c, b->c.btree_id);
|
||||
if (ret)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -370,7 +371,7 @@ again:
|
||||
break;
|
||||
|
||||
if (bch2_btree_node_is_stale(c, cur)) {
|
||||
bch_info(c, "btree node %s older than nodes found by scanning", buf.buf);
|
||||
bch_info(c, "btree node older than nodes found by scanning\n %s", buf.buf);
|
||||
six_unlock_read(&cur->c.lock);
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
@ -478,14 +479,13 @@ again:
|
||||
}
|
||||
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
if (mustfix_fsck_err_on(!have_child,
|
||||
trans, btree_node_topology_interior_node_empty,
|
||||
"empty interior btree node at btree %s level %u\n"
|
||||
" %s",
|
||||
bch2_btree_id_str(b->c.btree_id),
|
||||
b->c.level, buf.buf))
|
||||
"empty interior btree node at %s", buf.buf))
|
||||
ret = DROP_THIS_NODE;
|
||||
err:
|
||||
fsck_err:
|
||||
@ -511,6 +511,7 @@ int bch2_check_topology(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct bpos pulled_from_scan = POS_MIN;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_srcu_unlock(trans);
|
||||
@ -519,19 +520,22 @@ int bch2_check_topology(struct bch_fs *c)
|
||||
struct btree_root *r = bch2_btree_id_root(c, i);
|
||||
bool reconstructed_root = false;
|
||||
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_id_to_text(&buf, i);
|
||||
|
||||
if (r->error) {
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
|
||||
ret = bch2_btree_lost_data(c, i);
|
||||
if (ret)
|
||||
break;
|
||||
reconstruct_root:
|
||||
bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i));
|
||||
bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
|
||||
|
||||
r->alive = false;
|
||||
r->error = 0;
|
||||
|
||||
if (!bch2_btree_has_scanned_nodes(c, i)) {
|
||||
mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing,
|
||||
"no nodes found for btree %s, continue?", bch2_btree_id_str(i));
|
||||
"no nodes found for btree %s, continue?", buf.buf);
|
||||
bch2_btree_root_alloc_fake_trans(trans, i, 0);
|
||||
} else {
|
||||
bch2_btree_root_alloc_fake_trans(trans, i, 1);
|
||||
@ -560,13 +564,14 @@ reconstruct_root:
|
||||
if (!reconstructed_root)
|
||||
goto reconstruct_root;
|
||||
|
||||
bch_err(c, "empty btree root %s", bch2_btree_id_str(i));
|
||||
bch_err(c, "empty btree root %s", buf.buf);
|
||||
bch2_btree_root_alloc_fake_trans(trans, i, 0);
|
||||
r->alive = false;
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
bch2_trans_put(trans);
|
||||
return ret;
|
||||
}
|
||||
@ -606,7 +611,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
|
||||
if (fsck_err_on(btree_id != BTREE_ID_accounting &&
|
||||
k.k->bversion.lo > atomic64_read(&c->key_version),
|
||||
trans, bkey_version_in_future,
|
||||
"key version number higher than recorded %llu\n %s",
|
||||
"key version number higher than recorded %llu\n%s",
|
||||
atomic64_read(&c->key_version),
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
|
||||
atomic64_set(&c->key_version, k.k->bversion.lo);
|
||||
@ -614,7 +619,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
|
||||
|
||||
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),
|
||||
trans, btree_bitmap_not_marked,
|
||||
"btree ptr not marked in member info btree allocated bitmap\n %s",
|
||||
"btree ptr not marked in member info btree allocated bitmap\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k),
|
||||
buf.buf))) {
|
||||
@ -651,7 +656,9 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool initial)
|
||||
static int bch2_gc_btree(struct btree_trans *trans,
|
||||
struct progress_indicator_state *progress,
|
||||
enum btree_id btree, bool initial)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
unsigned target_depth = btree_node_type_has_triggers(__btree_node_type(0, btree)) ? 0 : 1;
|
||||
@ -668,6 +675,7 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in
|
||||
BTREE_ITER_prefetch);
|
||||
|
||||
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
||||
bch2_progress_update_iter(trans, progress, &iter, "check_allocations");
|
||||
gc_pos_set(c, gc_pos_btree(btree, level, k.k->p));
|
||||
bch2_gc_mark_key(trans, btree, level, &prev, &iter, k, initial);
|
||||
}));
|
||||
@ -712,29 +720,27 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
|
||||
static int bch2_gc_btrees(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
enum btree_id ids[BTREE_ID_NR];
|
||||
unsigned i;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
struct progress_indicator_state progress;
|
||||
bch2_progress_init(&progress, c, ~0ULL);
|
||||
|
||||
enum btree_id ids[BTREE_ID_NR];
|
||||
for (unsigned i = 0; i < BTREE_ID_NR; i++)
|
||||
ids[i] = i;
|
||||
bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
|
||||
|
||||
for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
||||
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
||||
unsigned btree = i < BTREE_ID_NR ? ids[i] : i;
|
||||
|
||||
if (IS_ERR_OR_NULL(bch2_btree_id_root(c, btree)->b))
|
||||
continue;
|
||||
|
||||
ret = bch2_gc_btree(trans, btree, true);
|
||||
|
||||
if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
|
||||
trans, btree_node_read_error,
|
||||
"btree node read error for %s",
|
||||
bch2_btree_id_str(btree)))
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
|
||||
ret = bch2_gc_btree(trans, &progress, btree, true);
|
||||
}
|
||||
fsck_err:
|
||||
|
||||
printbuf_exit(&buf);
|
||||
bch2_trans_put(trans);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
@ -802,7 +808,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
old = bch2_alloc_to_v4(k, &old_convert);
|
||||
gc = new = *old;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
__bucket_m_to_alloc(&gc, *gc_bucket(ca, iter->pos.offset));
|
||||
|
||||
old_gc = gc;
|
||||
@ -813,19 +818,26 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
gc.data_type = old->data_type;
|
||||
gc.dirty_sectors = old->dirty_sectors;
|
||||
}
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
/*
|
||||
* gc.data_type doesn't yet include need_discard & need_gc_gen states -
|
||||
* fix that here:
|
||||
*/
|
||||
alloc_data_type_set(&gc, gc.data_type);
|
||||
|
||||
if (gc.data_type != old_gc.data_type ||
|
||||
gc.dirty_sectors != old_gc.dirty_sectors) {
|
||||
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Ugly: alloc_key_to_dev_counters(..., BTREE_TRIGGER_gc) is not
|
||||
* safe w.r.t. transaction restarts, so fixup the gc_bucket so
|
||||
* we don't run it twice:
|
||||
*/
|
||||
struct bucket *gc_m = gc_bucket(ca, iter->pos.offset);
|
||||
gc_m->data_type = gc.data_type;
|
||||
gc_m->dirty_sectors = gc.dirty_sectors;
|
||||
}
|
||||
|
||||
if (fsck_err_on(new.data_type != gc.data_type,
|
||||
@ -885,11 +897,11 @@ static int bch2_gc_alloc_done(struct bch_fs *c)
|
||||
|
||||
for_each_member_device(c, ca) {
|
||||
ret = bch2_trans_run(c,
|
||||
for_each_btree_key_upto_commit(trans, iter, BTREE_ID_alloc,
|
||||
for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc,
|
||||
POS(ca->dev_idx, ca->mi.first_bucket),
|
||||
POS(ca->dev_idx, ca->mi.nbuckets - 1),
|
||||
BTREE_ITER_slots|BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_alloc_write_key(trans, &iter, ca, k)));
|
||||
if (ret) {
|
||||
bch2_dev_put(ca);
|
||||
@ -918,98 +930,6 @@ static int bch2_gc_alloc_start(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_write_reflink_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
size_t *idx)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct reflink_gc *r;
|
||||
int ret = 0;
|
||||
|
||||
if (!refcount)
|
||||
return 0;
|
||||
|
||||
while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) &&
|
||||
r->offset < k.k->p.offset)
|
||||
++*idx;
|
||||
|
||||
if (!r ||
|
||||
r->offset != k.k->p.offset ||
|
||||
r->size != k.k->size) {
|
||||
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount),
|
||||
trans, reflink_v_refcount_wrong,
|
||||
"reflink key has wrong refcount:\n"
|
||||
" %s\n"
|
||||
" should be %u",
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf),
|
||||
r->refcount)) {
|
||||
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
|
||||
ret = PTR_ERR_OR_ZERO(new);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (!r->refcount)
|
||||
new->k.type = KEY_TYPE_deleted;
|
||||
else
|
||||
*bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount);
|
||||
ret = bch2_trans_update(trans, iter, new, 0);
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_done(struct bch_fs *c)
|
||||
{
|
||||
size_t idx = 0;
|
||||
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_gc_write_reflink_key(trans, &iter, k, &idx)));
|
||||
c->reflink_gc_nr = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_start(struct bch_fs *c)
|
||||
{
|
||||
c->reflink_gc_nr = 0;
|
||||
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_prefetch, k, ({
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
|
||||
if (!refcount)
|
||||
continue;
|
||||
|
||||
struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table,
|
||||
c->reflink_gc_nr++, GFP_KERNEL);
|
||||
if (!r) {
|
||||
ret = -BCH_ERR_ENOMEM_gc_reflink_start;
|
||||
break;
|
||||
}
|
||||
|
||||
r->offset = k.k->p.offset;
|
||||
r->size = k.k->size;
|
||||
r->refcount = 0;
|
||||
0;
|
||||
})));
|
||||
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_write_stripes_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k)
|
||||
@ -1100,8 +1020,7 @@ int bch2_check_allocations(struct bch_fs *c)
|
||||
{
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
down_read(&c->state_lock);
|
||||
down_write(&c->gc_lock);
|
||||
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
@ -1139,6 +1058,7 @@ out:
|
||||
percpu_up_write(&c->mark_lock);
|
||||
|
||||
up_write(&c->gc_lock);
|
||||
up_read(&c->state_lock);
|
||||
|
||||
/*
|
||||
* At startup, allocations can happen directly instead of via the
|
||||
@ -1161,7 +1081,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
|
||||
if (unlikely(test_bit(BCH_FS_going_ro, &c->flags)))
|
||||
return -EROFS;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
rcu_read_lock();
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
|
||||
@ -1170,7 +1089,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
|
||||
|
||||
if (dev_ptr_stale(ca, ptr) > 16) {
|
||||
rcu_read_unlock();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
goto update;
|
||||
}
|
||||
}
|
||||
@ -1185,7 +1103,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
|
||||
*gen = ptr->gen;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
return 0;
|
||||
update:
|
||||
u = bch2_bkey_make_mut(trans, iter, &k, 0);
|
||||
@ -1214,7 +1131,6 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev
|
||||
return ret;
|
||||
|
||||
a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset];
|
||||
alloc_data_type_set(&a_mut->v, a_mut->v.data_type);
|
||||
|
||||
return bch2_trans_update(trans, iter, &a_mut->k_i, 0);
|
||||
}
|
||||
@ -1327,9 +1243,16 @@ void bch2_gc_gens_async(struct bch_fs *c)
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
|
||||
}
|
||||
|
||||
void bch2_fs_gc_init(struct bch_fs *c)
|
||||
void bch2_fs_btree_gc_exit(struct bch_fs *c)
|
||||
{
|
||||
}
|
||||
|
||||
int bch2_fs_btree_gc_init(struct bch_fs *c)
|
||||
{
|
||||
seqcount_init(&c->gc_pos_lock);
|
||||
|
||||
INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work);
|
||||
|
||||
init_rwsem(&c->gc_lock);
|
||||
mutex_init(&c->gc_gens_lock);
|
||||
return 0;
|
||||
}
|
||||
|
@ -82,6 +82,8 @@ void bch2_gc_pos_to_text(struct printbuf *, struct gc_pos *);
|
||||
|
||||
int bch2_gc_gens(struct bch_fs *);
|
||||
void bch2_gc_gens_async(struct bch_fs *);
|
||||
void bch2_fs_gc_init(struct bch_fs *);
|
||||
|
||||
void bch2_fs_btree_gc_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_gc_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_GC_H */
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "bkey_methods.h"
|
||||
#include "bkey_sort.h"
|
||||
#include "btree_cache.h"
|
||||
@ -25,9 +26,8 @@
|
||||
|
||||
static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn)
|
||||
{
|
||||
prt_printf(out, "btree=%s l=%u seq %llux\n",
|
||||
bch2_btree_id_str(BTREE_NODE_ID(bn)),
|
||||
(unsigned) BTREE_NODE_LEVEL(bn), bn->keys.seq);
|
||||
bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn));
|
||||
prt_printf(out, " seq %llx %llu\n", bn->keys.seq, BTREE_NODE_SEQ(bn));
|
||||
prt_str(out, "min: ");
|
||||
bch2_bpos_to_text(out, bn->min_key);
|
||||
prt_newline(out);
|
||||
@ -490,8 +490,8 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
|
||||
if (b->nsets == MAX_BSETS &&
|
||||
!btree_node_write_in_flight(b) &&
|
||||
should_compact_all(c, b)) {
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_write,
|
||||
BTREE_WRITE_init_next_bset);
|
||||
bch2_btree_node_write_trans(trans, b, SIX_LOCK_write,
|
||||
BTREE_WRITE_init_next_bset);
|
||||
reinit_iter = true;
|
||||
}
|
||||
|
||||
@ -525,8 +525,6 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, "at btree ");
|
||||
bch2_btree_pos_to_text(out, c, b);
|
||||
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_printf(out, "\nnode offset %u/%u",
|
||||
b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)));
|
||||
if (i)
|
||||
@ -550,23 +548,7 @@ static int __btree_err(int ret,
|
||||
enum bch_sb_error_id err_type,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct printbuf out = PRINTBUF;
|
||||
bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes;
|
||||
va_list args;
|
||||
|
||||
btree_err_msg(&out, c, ca, b, i, k, b->written, write);
|
||||
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(&out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (write == WRITE) {
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = c->opts.errors == BCH_ON_ERROR_continue
|
||||
? 0
|
||||
: -BCH_ERR_fsck_errors_not_fixed;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
|
||||
ret = -BCH_ERR_btree_node_read_err_fixable;
|
||||
@ -576,6 +558,29 @@ static int __btree_err(int ret,
|
||||
if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable)
|
||||
bch2_sb_error_count(c, err_type);
|
||||
|
||||
struct printbuf out = PRINTBUF;
|
||||
if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) {
|
||||
printbuf_indent_add_nextline(&out, 2);
|
||||
#ifdef BCACHEFS_LOG_PREFIX
|
||||
prt_printf(&out, bch2_log_msg(c, ""));
|
||||
#endif
|
||||
}
|
||||
|
||||
btree_err_msg(&out, c, ca, b, i, k, b->written, write);
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(&out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (write == WRITE) {
|
||||
prt_str(&out, ", ");
|
||||
ret = __bch2_inconsistent_error(c, &out)
|
||||
? -BCH_ERR_fsck_errors_not_fixed
|
||||
: 0;
|
||||
silent = false;
|
||||
}
|
||||
|
||||
switch (ret) {
|
||||
case -BCH_ERR_btree_node_read_err_fixable:
|
||||
ret = !silent
|
||||
@ -585,25 +590,21 @@ static int __btree_err(int ret,
|
||||
ret != -BCH_ERR_fsck_ignore)
|
||||
goto fsck_err;
|
||||
ret = -BCH_ERR_fsck_fix;
|
||||
break;
|
||||
case -BCH_ERR_btree_node_read_err_want_retry:
|
||||
case -BCH_ERR_btree_node_read_err_must_retry:
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
break;
|
||||
goto out;
|
||||
case -BCH_ERR_btree_node_read_err_bad_node:
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = bch2_topology_error(c);
|
||||
prt_str(&out, ", ");
|
||||
ret = __bch2_topology_error(c, &out);
|
||||
if (ret)
|
||||
silent = false;
|
||||
break;
|
||||
case -BCH_ERR_btree_node_read_err_incompatible:
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed;
|
||||
silent = false;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&out);
|
||||
@ -733,11 +734,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
c, ca, b, i, NULL,
|
||||
bset_past_end_of_btree_node,
|
||||
"bset past end of btree node (offset %u len %u but written %zu)",
|
||||
offset, sectors, ptr_written ?: btree_sectors(c))) {
|
||||
offset, sectors, ptr_written ?: btree_sectors(c)))
|
||||
i->u64s = 0;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
btree_err_on(offset && !i->u64s,
|
||||
-BCH_ERR_btree_node_read_err_fixable,
|
||||
@ -820,7 +818,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
-BCH_ERR_btree_node_read_err_bad_node,
|
||||
c, ca, b, i, NULL,
|
||||
btree_node_bad_format,
|
||||
"invalid bkey format: %s\n %s", buf1.buf,
|
||||
"invalid bkey format: %s\n%s", buf1.buf,
|
||||
(printbuf_reset(&buf2),
|
||||
bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf));
|
||||
printbuf_reset(&buf1);
|
||||
@ -829,20 +827,38 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
BSET_BIG_ENDIAN(i), write,
|
||||
&bn->format);
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf2);
|
||||
printbuf_exit(&buf1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btree_node_bkey_val_validate(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
{
|
||||
return bch2_bkey_val_validate(c, k, (struct bkey_validate_context) {
|
||||
.from = BKEY_VALIDATE_btree_node,
|
||||
.level = b->c.level,
|
||||
.btree = b->c.btree_id,
|
||||
.flags = flags
|
||||
});
|
||||
}
|
||||
|
||||
static int bset_key_validate(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_s_c k,
|
||||
bool updated_range, int rw)
|
||||
bool updated_range,
|
||||
enum bch_validate_flags flags)
|
||||
{
|
||||
return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?:
|
||||
(!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?:
|
||||
(rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0);
|
||||
struct bkey_validate_context from = (struct bkey_validate_context) {
|
||||
.from = BKEY_VALIDATE_btree_node,
|
||||
.level = b->c.level,
|
||||
.btree = b->c.btree_id,
|
||||
.flags = flags,
|
||||
};
|
||||
return __bch2_bkey_validate(c, k, from) ?:
|
||||
(!updated_range ? bch2_bkey_in_btree_node(c, b, k, from) : 0) ?:
|
||||
(flags & BCH_VALIDATE_write ? btree_node_bkey_val_validate(c, b, k, flags) : 0);
|
||||
}
|
||||
|
||||
static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
|
||||
@ -859,7 +875,21 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
|
||||
|
||||
struct bkey tmp;
|
||||
struct bkey_s u = __bkey_disassemble(b, k, &tmp);
|
||||
return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent);
|
||||
return !__bch2_bkey_validate(c, u.s_c,
|
||||
(struct bkey_validate_context) {
|
||||
.from = BKEY_VALIDATE_btree_node,
|
||||
.level = b->c.level,
|
||||
.btree = b->c.btree_id,
|
||||
.flags = BCH_VALIDATE_silent
|
||||
});
|
||||
}
|
||||
|
||||
static inline int btree_node_read_bkey_cmp(const struct btree *b,
|
||||
const struct bkey_packed *l,
|
||||
const struct bkey_packed *r)
|
||||
{
|
||||
return bch2_bkey_cmp_packed(b, l, r)
|
||||
?: (int) bkey_deleted(r) - (int) bkey_deleted(l);
|
||||
}
|
||||
|
||||
static int validate_bset_keys(struct bch_fs *c, struct btree *b,
|
||||
@ -922,7 +952,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
|
||||
BSET_BIG_ENDIAN(i), write,
|
||||
&b->format, k);
|
||||
|
||||
if (prev && bkey_iter_cmp(b, prev, k) > 0) {
|
||||
if (prev && btree_node_read_bkey_cmp(b, prev, k) >= 0) {
|
||||
struct bkey up = bkey_unpack_key(b, prev);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
@ -968,7 +998,8 @@ drop_this_key:
|
||||
}
|
||||
got_good_key:
|
||||
le16_add_cpu(&i->u64s, -next_good_key);
|
||||
memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k);
|
||||
memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k);
|
||||
set_btree_node_need_rewrite(b);
|
||||
}
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
@ -1042,39 +1073,51 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
while (b->written < (ptr_written ?: btree_sectors(c))) {
|
||||
unsigned sectors;
|
||||
struct nonce nonce;
|
||||
bool first = !b->written;
|
||||
bool csum_bad;
|
||||
|
||||
if (!b->written) {
|
||||
if (first) {
|
||||
bne = NULL;
|
||||
i = &b->data->keys;
|
||||
} else {
|
||||
bne = write_block(b);
|
||||
i = &bne->keys;
|
||||
|
||||
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
|
||||
-BCH_ERR_btree_node_read_err_want_retry,
|
||||
c, ca, b, i, NULL,
|
||||
bset_unknown_csum,
|
||||
"unknown checksum type %llu", BSET_CSUM_TYPE(i));
|
||||
if (i->seq != b->data->keys.seq)
|
||||
break;
|
||||
}
|
||||
|
||||
nonce = btree_nonce(i, b->written << 9);
|
||||
struct nonce nonce = btree_nonce(i, b->written << 9);
|
||||
bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i));
|
||||
|
||||
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
|
||||
csum_bad = bch2_crc_cmp(b->data->csum, csum);
|
||||
if (csum_bad)
|
||||
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
|
||||
btree_err_on(!good_csum_type,
|
||||
bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i))
|
||||
? -BCH_ERR_btree_node_read_err_must_retry
|
||||
: -BCH_ERR_btree_node_read_err_want_retry,
|
||||
c, ca, b, i, NULL,
|
||||
bset_unknown_csum,
|
||||
"unknown checksum type %llu", BSET_CSUM_TYPE(i));
|
||||
|
||||
btree_err_on(csum_bad,
|
||||
-BCH_ERR_btree_node_read_err_want_retry,
|
||||
c, ca, b, i, NULL,
|
||||
bset_bad_csum,
|
||||
"%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum),
|
||||
buf.buf));
|
||||
if (first) {
|
||||
if (good_csum_type) {
|
||||
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
|
||||
bool csum_bad = bch2_crc_cmp(b->data->csum, csum);
|
||||
if (csum_bad)
|
||||
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
|
||||
|
||||
ret = bset_encrypt(c, i, b->written << 9);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"decrypting btree node: %s", bch2_err_str(ret)))
|
||||
goto fsck_err;
|
||||
btree_err_on(csum_bad,
|
||||
-BCH_ERR_btree_node_read_err_want_retry,
|
||||
c, ca, b, i, NULL,
|
||||
bset_bad_csum,
|
||||
"%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum),
|
||||
buf.buf));
|
||||
|
||||
ret = bset_encrypt(c, i, b->written << 9);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"decrypting btree node: %s", bch2_err_str(ret)))
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
|
||||
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
|
||||
@ -1085,37 +1128,26 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
sectors = vstruct_sectors(b->data, c->block_bits);
|
||||
} else {
|
||||
bne = write_block(b);
|
||||
i = &bne->keys;
|
||||
if (good_csum_type) {
|
||||
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
|
||||
bool csum_bad = bch2_crc_cmp(bne->csum, csum);
|
||||
if (ca && csum_bad)
|
||||
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
|
||||
|
||||
if (i->seq != b->data->keys.seq)
|
||||
break;
|
||||
btree_err_on(csum_bad,
|
||||
-BCH_ERR_btree_node_read_err_want_retry,
|
||||
c, ca, b, i, NULL,
|
||||
bset_bad_csum,
|
||||
"%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum),
|
||||
buf.buf));
|
||||
|
||||
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
|
||||
-BCH_ERR_btree_node_read_err_want_retry,
|
||||
c, ca, b, i, NULL,
|
||||
bset_unknown_csum,
|
||||
"unknown checksum type %llu", BSET_CSUM_TYPE(i));
|
||||
|
||||
nonce = btree_nonce(i, b->written << 9);
|
||||
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
|
||||
csum_bad = bch2_crc_cmp(bne->csum, csum);
|
||||
if (ca && csum_bad)
|
||||
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
|
||||
|
||||
btree_err_on(csum_bad,
|
||||
-BCH_ERR_btree_node_read_err_want_retry,
|
||||
c, ca, b, i, NULL,
|
||||
bset_bad_csum,
|
||||
"%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum),
|
||||
buf.buf));
|
||||
|
||||
ret = bset_encrypt(c, i, b->written << 9);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"decrypting btree node: %s", bch2_err_str(ret)))
|
||||
goto fsck_err;
|
||||
ret = bset_encrypt(c, i, b->written << 9);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"decrypting btree node: %s", bch2_err_str(ret)))
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
sectors = vstruct_sectors(bne, c->block_bits);
|
||||
}
|
||||
@ -1156,7 +1188,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
le64_to_cpu(i->journal_seq),
|
||||
b->written, b->written + sectors, ptr_written);
|
||||
|
||||
b->written += sectors;
|
||||
b->written = min(b->written + sectors, btree_sectors(c));
|
||||
|
||||
if (blacklisted && !first)
|
||||
continue;
|
||||
@ -1220,7 +1252,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bkey tmp;
|
||||
struct bkey_s u = __bkey_disassemble(b, k, &tmp);
|
||||
|
||||
ret = bch2_bkey_val_validate(c, u.s_c, READ);
|
||||
ret = btree_node_bkey_val_validate(c, b, u.s_c, READ);
|
||||
if (ret == -BCH_ERR_fsck_delete_bkey ||
|
||||
(bch2_inject_invalid_keys &&
|
||||
!bversion_cmp(u.k->bversion, MAX_VERSION))) {
|
||||
@ -1230,6 +1262,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
memmove_u64s_down(k, bkey_p_next(k),
|
||||
(u64 *) vstruct_end(i) - (u64 *) k);
|
||||
set_btree_bset_end(b, b->set);
|
||||
set_btree_node_need_rewrite(b);
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
@ -1297,6 +1330,7 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
bch_info(c, "retrying read");
|
||||
ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ);
|
||||
rb->have_ioref = ca != NULL;
|
||||
rb->start_time = local_clock();
|
||||
bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
|
||||
bio->bi_iter.bi_sector = rb->pick.ptr.offset;
|
||||
bio->bi_iter.bi_size = btree_buf_bytes(b);
|
||||
@ -1307,21 +1341,26 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
} else {
|
||||
bio->bi_status = BLK_STS_REMOVED;
|
||||
}
|
||||
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
|
||||
rb->start_time, !bio->bi_status);
|
||||
start:
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch2_dev_io_err_on(ca && bio->bi_status, ca, BCH_MEMBER_ERROR_read,
|
||||
"btree read error %s for %s",
|
||||
bch2_blk_status_to_str(bio->bi_status), buf.buf);
|
||||
|
||||
if (ca && bio->bi_status)
|
||||
bch_err_dev_ratelimited(ca,
|
||||
"btree read error %s for %s",
|
||||
bch2_blk_status_to_str(bio->bi_status), buf.buf);
|
||||
if (rb->have_ioref)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
rb->have_ioref = false;
|
||||
|
||||
bch2_mark_io_failure(&failed, &rb->pick);
|
||||
bch2_mark_io_failure(&failed, &rb->pick, false);
|
||||
|
||||
can_retry = bch2_bkey_pick_read_device(c,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
&failed, &rb->pick) > 0;
|
||||
&failed, &rb->pick, -1) > 0;
|
||||
|
||||
if (!bio->bi_status &&
|
||||
!bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) {
|
||||
@ -1343,13 +1382,18 @@ start:
|
||||
rb->start_time);
|
||||
bio_put(&rb->bio);
|
||||
|
||||
if (saw_error &&
|
||||
if ((saw_error ||
|
||||
btree_node_need_rewrite(b)) &&
|
||||
!btree_node_read_error(b) &&
|
||||
c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
|
||||
printbuf_reset(&buf);
|
||||
bch2_bpos_to_text(&buf, b->key.k.p);
|
||||
bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
|
||||
__func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf);
|
||||
if (saw_error) {
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_str(&buf, " ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s",
|
||||
__func__, buf.buf);
|
||||
}
|
||||
|
||||
bch2_btree_node_rewrite_async(c, b);
|
||||
}
|
||||
@ -1364,12 +1408,11 @@ static void btree_node_read_endio(struct bio *bio)
|
||||
struct btree_read_bio *rb =
|
||||
container_of(bio, struct btree_read_bio, bio);
|
||||
struct bch_fs *c = rb->c;
|
||||
struct bch_dev *ca = rb->have_ioref
|
||||
? bch2_dev_have_ref(c, rb->pick.ptr.dev) : NULL;
|
||||
|
||||
if (rb->have_ioref) {
|
||||
struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev);
|
||||
|
||||
bch2_latency_acct(ca, rb->start_time, READ);
|
||||
}
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
|
||||
rb->start_time, !bio->bi_status);
|
||||
|
||||
queue_work(c->btree_read_complete_wq, &rb->work);
|
||||
}
|
||||
@ -1661,7 +1704,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
|
||||
return;
|
||||
|
||||
ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
|
||||
NULL, &pick);
|
||||
NULL, &pick, -1);
|
||||
|
||||
if (ret <= 0) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
@ -1775,6 +1818,190 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
|
||||
return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
|
||||
}
|
||||
|
||||
struct btree_node_scrub {
|
||||
struct bch_fs *c;
|
||||
struct bch_dev *ca;
|
||||
void *buf;
|
||||
bool used_mempool;
|
||||
unsigned written;
|
||||
|
||||
enum btree_id btree;
|
||||
unsigned level;
|
||||
struct bkey_buf key;
|
||||
__le64 seq;
|
||||
|
||||
struct work_struct work;
|
||||
struct bio bio;
|
||||
};
|
||||
|
||||
static bool btree_node_scrub_check(struct bch_fs *c, struct btree_node *data, unsigned ptr_written,
|
||||
struct printbuf *err)
|
||||
{
|
||||
unsigned written = 0;
|
||||
|
||||
if (le64_to_cpu(data->magic) != bset_magic(c)) {
|
||||
prt_printf(err, "bad magic: want %llx, got %llx",
|
||||
bset_magic(c), le64_to_cpu(data->magic));
|
||||
return false;
|
||||
}
|
||||
|
||||
while (written < (ptr_written ?: btree_sectors(c))) {
|
||||
struct btree_node_entry *bne;
|
||||
struct bset *i;
|
||||
bool first = !written;
|
||||
|
||||
if (first) {
|
||||
bne = NULL;
|
||||
i = &data->keys;
|
||||
} else {
|
||||
bne = (void *) data + (written << 9);
|
||||
i = &bne->keys;
|
||||
|
||||
if (!ptr_written && i->seq != data->keys.seq)
|
||||
break;
|
||||
}
|
||||
|
||||
struct nonce nonce = btree_nonce(i, written << 9);
|
||||
bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i));
|
||||
|
||||
if (first) {
|
||||
if (good_csum_type) {
|
||||
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, data);
|
||||
if (bch2_crc_cmp(data->csum, csum)) {
|
||||
bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), data->csum, csum);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
written += vstruct_sectors(data, c->block_bits);
|
||||
} else {
|
||||
if (good_csum_type) {
|
||||
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
|
||||
if (bch2_crc_cmp(bne->csum, csum)) {
|
||||
bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), bne->csum, csum);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
written += vstruct_sectors(bne, c->block_bits);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void btree_node_scrub_work(struct work_struct *work)
|
||||
{
|
||||
struct btree_node_scrub *scrub = container_of(work, struct btree_node_scrub, work);
|
||||
struct bch_fs *c = scrub->c;
|
||||
struct printbuf err = PRINTBUF;
|
||||
|
||||
__bch2_btree_pos_to_text(&err, c, scrub->btree, scrub->level,
|
||||
bkey_i_to_s_c(scrub->key.k));
|
||||
prt_newline(&err);
|
||||
|
||||
if (!btree_node_scrub_check(c, scrub->buf, scrub->written, &err)) {
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter, scrub->btree,
|
||||
scrub->key.k->k.p, 0, scrub->level - 1, 0);
|
||||
|
||||
struct btree *b;
|
||||
int ret = lockrestart_do(trans, PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(&iter)));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (bkey_i_to_btree_ptr_v2(&b->key)->v.seq == scrub->seq) {
|
||||
bch_err(c, "error validating btree node during scrub on %s at btree %s",
|
||||
scrub->ca->name, err.buf);
|
||||
|
||||
ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
bch2_trans_begin(trans);
|
||||
bch2_trans_put(trans);
|
||||
}
|
||||
|
||||
printbuf_exit(&err);
|
||||
bch2_bkey_buf_exit(&scrub->key, c);;
|
||||
btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf);
|
||||
percpu_ref_put(&scrub->ca->io_ref);
|
||||
kfree(scrub);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
|
||||
}
|
||||
|
||||
static void btree_node_scrub_endio(struct bio *bio)
|
||||
{
|
||||
struct btree_node_scrub *scrub = container_of(bio, struct btree_node_scrub, bio);
|
||||
|
||||
queue_work(scrub->c->btree_read_complete_wq, &scrub->work);
|
||||
}
|
||||
|
||||
int bch2_btree_node_scrub(struct btree_trans *trans,
|
||||
enum btree_id btree, unsigned level,
|
||||
struct bkey_s_c k, unsigned dev)
|
||||
{
|
||||
if (k.k->type != KEY_TYPE_btree_ptr_v2)
|
||||
return 0;
|
||||
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_node_scrub))
|
||||
return -BCH_ERR_erofs_no_writes;
|
||||
|
||||
struct extent_ptr_decoded pick;
|
||||
int ret = bch2_bkey_pick_read_device(c, k, NULL, &pick, dev);
|
||||
if (ret <= 0)
|
||||
goto err;
|
||||
|
||||
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
|
||||
if (!ca) {
|
||||
ret = -BCH_ERR_device_offline;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bool used_mempool = false;
|
||||
void *buf = btree_bounce_alloc(c, c->opts.btree_node_size, &used_mempool);
|
||||
|
||||
unsigned vecs = buf_pages(buf, c->opts.btree_node_size);
|
||||
|
||||
struct btree_node_scrub *scrub =
|
||||
kzalloc(sizeof(*scrub) + sizeof(struct bio_vec) * vecs, GFP_KERNEL);
|
||||
if (!scrub) {
|
||||
ret = -ENOMEM;
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
scrub->c = c;
|
||||
scrub->ca = ca;
|
||||
scrub->buf = buf;
|
||||
scrub->used_mempool = used_mempool;
|
||||
scrub->written = btree_ptr_sectors_written(k);
|
||||
|
||||
scrub->btree = btree;
|
||||
scrub->level = level;
|
||||
bch2_bkey_buf_init(&scrub->key);
|
||||
bch2_bkey_buf_reassemble(&scrub->key, c, k);
|
||||
scrub->seq = bkey_s_c_to_btree_ptr_v2(k).v->seq;
|
||||
|
||||
INIT_WORK(&scrub->work, btree_node_scrub_work);
|
||||
|
||||
bio_init(&scrub->bio, ca->disk_sb.bdev, scrub->bio.bi_inline_vecs, vecs, REQ_OP_READ);
|
||||
bch2_bio_map(&scrub->bio, scrub->buf, c->opts.btree_node_size);
|
||||
scrub->bio.bi_iter.bi_sector = pick.ptr.offset;
|
||||
scrub->bio.bi_end_io = btree_node_scrub_endio;
|
||||
submit_bio(&scrub->bio);
|
||||
return 0;
|
||||
err_free:
|
||||
btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
err:
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
|
||||
struct btree_write *w)
|
||||
{
|
||||
@ -1795,7 +2022,7 @@ static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
|
||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||
}
|
||||
|
||||
static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
static void __btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time)
|
||||
{
|
||||
struct btree_write *w = btree_prev_write(b);
|
||||
unsigned long old, new;
|
||||
@ -1803,6 +2030,9 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
|
||||
bch2_btree_complete_write(c, b, w);
|
||||
|
||||
if (start_time)
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_write], start_time);
|
||||
|
||||
old = READ_ONCE(b->flags);
|
||||
do {
|
||||
new = old;
|
||||
@ -1833,7 +2063,7 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
|
||||
}
|
||||
|
||||
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
@ -1841,7 +2071,7 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
|
||||
/* we don't need transaction context anymore after we got the lock. */
|
||||
bch2_trans_put(trans);
|
||||
__btree_node_write_done(c, b);
|
||||
__btree_node_write_done(c, b, start_time);
|
||||
six_unlock_read(&b->c.lock);
|
||||
}
|
||||
|
||||
@ -1851,6 +2081,7 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
container_of(work, struct btree_write_bio, work);
|
||||
struct bch_fs *c = wbio->wbio.c;
|
||||
struct btree *b = wbio->wbio.bio.bi_private;
|
||||
u64 start_time = wbio->start_time;
|
||||
int ret = 0;
|
||||
|
||||
btree_bounce_free(c,
|
||||
@ -1871,7 +2102,7 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
|
||||
}
|
||||
} else {
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
ret = bch2_trans_do(c,
|
||||
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
|
||||
BCH_WATERMARK_interior_updates|
|
||||
BCH_TRANS_COMMIT_journal_reclaim|
|
||||
@ -1883,12 +2114,18 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
}
|
||||
out:
|
||||
bio_put(&wbio->wbio.bio);
|
||||
btree_node_write_done(c, b);
|
||||
btree_node_write_done(c, b, start_time);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c,
|
||||
"writing btree node: %s", bch2_err_str(ret));
|
||||
|
||||
if (!bch2_err_matches(ret, EROFS)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret));
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch2_fs_fatal_error(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1901,16 +2138,21 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
struct bch_fs *c = wbio->c;
|
||||
struct btree *b = wbio->bio.bi_private;
|
||||
struct bch_dev *ca = wbio->have_ioref ? bch2_dev_have_ref(c, wbio->dev) : NULL;
|
||||
unsigned long flags;
|
||||
|
||||
if (wbio->have_ioref)
|
||||
bch2_latency_acct(ca, wbio->submit_time, WRITE);
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_write,
|
||||
wbio->submit_time, !bio->bi_status);
|
||||
|
||||
if (!ca ||
|
||||
bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
|
||||
"btree write error: %s",
|
||||
bch2_blk_status_to_str(bio->bi_status)) ||
|
||||
bch2_meta_write_fault("btree")) {
|
||||
if (ca && bio->bi_status) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_printf(&buf, "btree write error: %s\n ",
|
||||
bch2_blk_status_to_str(bio->bi_status));
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch_err_dev_ratelimited(ca, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
if (bio->bi_status) {
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&c->btree_write_error_lock, flags);
|
||||
bch2_dev_list_add_dev(&orig->failed, wbio->dev);
|
||||
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
|
||||
@ -1937,7 +2179,12 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
bool saw_error;
|
||||
|
||||
int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key),
|
||||
BKEY_TYPE_btree, WRITE);
|
||||
(struct bkey_validate_context) {
|
||||
.from = BKEY_VALIDATE_btree_node,
|
||||
.level = b->c.level + 1,
|
||||
.btree = b->c.btree_id,
|
||||
.flags = BCH_VALIDATE_write,
|
||||
});
|
||||
if (ret) {
|
||||
bch2_fs_inconsistent(c, "invalid btree node key before write");
|
||||
return ret;
|
||||
@ -1982,6 +2229,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
||||
bool validate_before_checksum = false;
|
||||
enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK;
|
||||
void *data;
|
||||
u64 start_time = local_clock();
|
||||
int ret;
|
||||
|
||||
if (flags & BTREE_WRITE_ALREADY_STARTED)
|
||||
@ -2190,6 +2438,7 @@ do_write:
|
||||
wbio->data = data;
|
||||
wbio->data_bytes = bytes;
|
||||
wbio->sector_offset = b->written;
|
||||
wbio->start_time = start_time;
|
||||
wbio->wbio.c = c;
|
||||
wbio->wbio.used_mempool = used_mempool;
|
||||
wbio->wbio.first_btree_write = !b->written;
|
||||
@ -2217,7 +2466,7 @@ err:
|
||||
b->written += sectors_to_write;
|
||||
nowrite:
|
||||
btree_bounce_free(c, bytes, used_mempool, data);
|
||||
__btree_node_write_done(c, b);
|
||||
__btree_node_write_done(c, b, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2304,6 +2553,34 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_btree_node_write_trans(struct btree_trans *trans, struct btree *b,
|
||||
enum six_lock_type lock_type_held,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
if (lock_type_held == SIX_LOCK_intent ||
|
||||
(lock_type_held == SIX_LOCK_read &&
|
||||
six_lock_tryupgrade(&b->c.lock))) {
|
||||
__bch2_btree_node_write(c, b, flags);
|
||||
|
||||
/* don't cycle lock unnecessarily: */
|
||||
if (btree_node_just_written(b) &&
|
||||
six_trylock_write(&b->c.lock)) {
|
||||
bch2_btree_post_write_cleanup(c, b);
|
||||
__bch2_btree_node_unlock_write(trans, b);
|
||||
}
|
||||
|
||||
if (lock_type_held == SIX_LOCK_read)
|
||||
six_lock_downgrade(&b->c.lock);
|
||||
} else {
|
||||
__bch2_btree_node_write(c, b, flags);
|
||||
if (lock_type_held == SIX_LOCK_write &&
|
||||
btree_node_just_written(b))
|
||||
bch2_btree_post_write_cleanup(c, b);
|
||||
}
|
||||
}
|
||||
|
||||
static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
|
||||
{
|
||||
struct bucket_table *tbl;
|
||||
|
@ -52,6 +52,7 @@ struct btree_write_bio {
|
||||
void *data;
|
||||
unsigned data_bytes;
|
||||
unsigned sector_offset;
|
||||
u64 start_time;
|
||||
struct bch_write_bio wbio;
|
||||
};
|
||||
|
||||
@ -132,6 +133,9 @@ void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
|
||||
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
|
||||
const struct bkey_i *, unsigned);
|
||||
|
||||
int bch2_btree_node_scrub(struct btree_trans *, enum btree_id, unsigned,
|
||||
struct bkey_s_c, unsigned);
|
||||
|
||||
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
|
||||
|
||||
enum btree_write_flags {
|
||||
@ -144,11 +148,13 @@ enum btree_write_flags {
|
||||
void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned);
|
||||
void bch2_btree_node_write(struct bch_fs *, struct btree *,
|
||||
enum six_lock_type, unsigned);
|
||||
void bch2_btree_node_write_trans(struct btree_trans *, struct btree *,
|
||||
enum six_lock_type, unsigned);
|
||||
|
||||
static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
|
||||
static inline void btree_node_write_if_need(struct btree_trans *trans, struct btree *b,
|
||||
enum six_lock_type lock_held)
|
||||
{
|
||||
bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
|
||||
bch2_btree_node_write_trans(trans, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
|
||||
}
|
||||
|
||||
bool bch2_btree_flush_all_reads(struct bch_fs *);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -9,7 +9,6 @@
|
||||
void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
|
||||
void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t);
|
||||
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
|
||||
void bch2_dump_trans_updates(struct btree_trans *);
|
||||
void bch2_dump_trans_paths_updates(struct btree_trans *);
|
||||
|
||||
static inline int __bkey_err(const struct bkey *k)
|
||||
@ -23,6 +22,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path
|
||||
{
|
||||
unsigned idx = path - trans->paths;
|
||||
|
||||
EBUG_ON(idx >= trans->nr_paths);
|
||||
EBUG_ON(!test_bit(idx, trans->paths_allocated));
|
||||
if (unlikely(path->ref == U8_MAX)) {
|
||||
bch2_dump_trans_paths_updates(trans);
|
||||
@ -36,6 +36,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path
|
||||
|
||||
static inline bool __btree_path_put(struct btree_trans *trans, struct btree_path *path, bool intent)
|
||||
{
|
||||
EBUG_ON(path - trans->paths >= trans->nr_paths);
|
||||
EBUG_ON(!test_bit(path - trans->paths, trans->paths_allocated));
|
||||
EBUG_ON(!path->ref);
|
||||
EBUG_ON(!path->intent_ref && intent);
|
||||
@ -234,12 +235,12 @@ int __must_check bch2_btree_path_traverse_one(struct btree_trans *,
|
||||
btree_path_idx_t,
|
||||
unsigned, unsigned long);
|
||||
|
||||
static inline void bch2_trans_verify_not_unlocked(struct btree_trans *);
|
||||
static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_trans *);
|
||||
|
||||
static inline int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
|
||||
btree_path_idx_t path, unsigned flags)
|
||||
{
|
||||
bch2_trans_verify_not_unlocked(trans);
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
|
||||
if (trans->paths[path].uptodate < BTREE_ITER_NEED_RELOCK)
|
||||
return 0;
|
||||
@ -324,38 +325,52 @@ static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans,
|
||||
bch2_trans_restart_error(trans, restart_count);
|
||||
}
|
||||
|
||||
void __noreturn bch2_trans_in_restart_error(struct btree_trans *);
|
||||
void __noreturn bch2_trans_unlocked_or_in_restart_error(struct btree_trans *);
|
||||
|
||||
static inline void bch2_trans_verify_not_in_restart(struct btree_trans *trans)
|
||||
static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_trans *trans)
|
||||
{
|
||||
if (trans->restarted)
|
||||
bch2_trans_in_restart_error(trans);
|
||||
}
|
||||
|
||||
void __noreturn bch2_trans_unlocked_error(struct btree_trans *);
|
||||
|
||||
static inline void bch2_trans_verify_not_unlocked(struct btree_trans *trans)
|
||||
{
|
||||
if (!trans->locked)
|
||||
bch2_trans_unlocked_error(trans);
|
||||
if (trans->restarted || !trans->locked)
|
||||
bch2_trans_unlocked_or_in_restart_error(trans);
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int btree_trans_restart_nounlock(struct btree_trans *trans, int err)
|
||||
static int btree_trans_restart_foreign_task(struct btree_trans *trans, int err, unsigned long ip)
|
||||
{
|
||||
BUG_ON(err <= 0);
|
||||
BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart));
|
||||
|
||||
trans->restarted = err;
|
||||
trans->last_restarted_ip = _THIS_IP_;
|
||||
trans->last_restarted_ip = ip;
|
||||
return -err;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip)
|
||||
{
|
||||
btree_trans_restart_foreign_task(trans, err, ip);
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_exit(&trans->last_restarted_trace);
|
||||
bch2_save_backtrace(&trans->last_restarted_trace, current, 0, GFP_NOWAIT);
|
||||
#endif
|
||||
return -err;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int btree_trans_restart(struct btree_trans *trans, int err)
|
||||
{
|
||||
btree_trans_restart_nounlock(trans, err);
|
||||
return -err;
|
||||
return btree_trans_restart_ip(trans, err, _THIS_IP_);
|
||||
}
|
||||
|
||||
static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
if (!(ktime_get_ns() & ~(~0ULL << min(63, (10 + trans->restart_count_this_trans))))) {
|
||||
trace_and_count(trans->c, trans_restart_injected, trans, ip);
|
||||
return btree_trans_restart_ip(trans,
|
||||
BCH_ERR_transaction_restart_fault_inject, ip);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool bch2_btree_node_upgrade(struct btree_trans *,
|
||||
@ -375,6 +390,7 @@ static inline void bch2_btree_path_downgrade(struct btree_trans *trans,
|
||||
void bch2_trans_downgrade(struct btree_trans *);
|
||||
|
||||
void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct btree *);
|
||||
void bch2_trans_node_drop(struct btree_trans *trans, struct btree *);
|
||||
void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *);
|
||||
|
||||
int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter);
|
||||
@ -384,15 +400,21 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
|
||||
struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *);
|
||||
struct btree *bch2_btree_iter_next_node(struct btree_iter *);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
|
||||
struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *, struct bpos);
|
||||
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
|
||||
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
{
|
||||
return bch2_btree_iter_peek_upto(iter, SPOS_MAX);
|
||||
return bch2_btree_iter_peek_max(iter, SPOS_MAX);
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *, struct bpos);
|
||||
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
{
|
||||
return bch2_btree_iter_peek_prev_min(iter, POS_MIN);
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
|
||||
@ -443,10 +465,17 @@ static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 sna
|
||||
|
||||
void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
unsigned btree_id,
|
||||
unsigned flags)
|
||||
static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
unsigned btree_id,
|
||||
unsigned level,
|
||||
unsigned flags)
|
||||
{
|
||||
if (level || !btree_id_cached(trans->c, btree_id)) {
|
||||
flags &= ~BTREE_ITER_cached;
|
||||
flags &= ~BTREE_ITER_with_key_cache;
|
||||
} else if (!(flags & BTREE_ITER_cached))
|
||||
flags |= BTREE_ITER_with_key_cache;
|
||||
|
||||
if (!(flags & (BTREE_ITER_all_snapshots|BTREE_ITER_not_extents)) &&
|
||||
btree_id_is_extents(btree_id))
|
||||
flags |= BTREE_ITER_is_extents;
|
||||
@ -465,19 +494,6 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
return flags;
|
||||
}
|
||||
|
||||
static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
unsigned btree_id,
|
||||
unsigned flags)
|
||||
{
|
||||
if (!btree_id_cached(trans->c, btree_id)) {
|
||||
flags &= ~BTREE_ITER_cached;
|
||||
flags &= ~BTREE_ITER_with_key_cache;
|
||||
} else if (!(flags & BTREE_ITER_cached))
|
||||
flags |= BTREE_ITER_with_key_cache;
|
||||
|
||||
return __bch2_btree_iter_flags(trans, btree_id, flags);
|
||||
}
|
||||
|
||||
static inline void bch2_trans_iter_init_common(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
unsigned btree_id, struct bpos pos,
|
||||
@ -514,7 +530,7 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans,
|
||||
if (__builtin_constant_p(btree_id) &&
|
||||
__builtin_constant_p(flags))
|
||||
bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
|
||||
bch2_btree_iter_flags(trans, btree_id, flags),
|
||||
bch2_btree_iter_flags(trans, btree_id, 0, flags),
|
||||
_THIS_IP_);
|
||||
else
|
||||
bch2_trans_iter_init_outlined(trans, iter, btree_id, pos, flags);
|
||||
@ -593,13 +609,18 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans,
|
||||
bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \
|
||||
_btree_id, _pos, _flags, KEY_TYPE_##_type))
|
||||
|
||||
static inline void __bkey_val_copy(void *dst_v, unsigned dst_size, struct bkey_s_c src_k)
|
||||
{
|
||||
unsigned b = min_t(unsigned, dst_size, bkey_val_bytes(src_k.k));
|
||||
memcpy(dst_v, src_k.v, b);
|
||||
if (unlikely(b < dst_size))
|
||||
memset(dst_v + b, 0, dst_size - b);
|
||||
}
|
||||
|
||||
#define bkey_val_copy(_dst_v, _src_k) \
|
||||
do { \
|
||||
unsigned b = min_t(unsigned, sizeof(*_dst_v), \
|
||||
bkey_val_bytes(_src_k.k)); \
|
||||
memcpy(_dst_v, _src_k.v, b); \
|
||||
if (b < sizeof(*_dst_v)) \
|
||||
memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \
|
||||
BUILD_BUG_ON(!__typecheck(*_dst_v, *_src_k.v)); \
|
||||
__bkey_val_copy(_dst_v, sizeof(*_dst_v), _src_k.s_c); \
|
||||
} while (0)
|
||||
|
||||
static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
|
||||
@ -608,17 +629,10 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
|
||||
unsigned val_size, void *val)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type);
|
||||
ret = bkey_err(k);
|
||||
struct bkey_s_c k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type);
|
||||
int ret = bkey_err(k);
|
||||
if (!ret) {
|
||||
unsigned b = min_t(unsigned, bkey_val_bytes(k.k), val_size);
|
||||
|
||||
memcpy(val, k.v, b);
|
||||
if (unlikely(b < sizeof(*val)))
|
||||
memset((void *) val + b, 0, sizeof(*val) - b);
|
||||
__bkey_val_copy(val, val_size, k);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
}
|
||||
|
||||
@ -677,12 +691,12 @@ static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
|
||||
bch2_btree_iter_peek(iter);
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter,
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_iter *iter,
|
||||
struct bpos end,
|
||||
unsigned flags)
|
||||
{
|
||||
if (!(flags & BTREE_ITER_slots))
|
||||
return bch2_btree_iter_peek_upto(iter, end);
|
||||
return bch2_btree_iter_peek_max(iter, end);
|
||||
|
||||
if (bkey_gt(iter->pos, end))
|
||||
return bkey_s_c_null;
|
||||
@ -743,10 +757,10 @@ transaction_restart: \
|
||||
if (!_ret2) \
|
||||
bch2_trans_verify_not_restarted(_trans, _restart_count);\
|
||||
\
|
||||
_ret2 ?: trans_was_restarted(_trans, _restart_count); \
|
||||
_ret2 ?: trans_was_restarted(_trans, _orig_restart_count); \
|
||||
})
|
||||
|
||||
#define for_each_btree_key_upto_continue(_trans, _iter, \
|
||||
#define for_each_btree_key_max_continue(_trans, _iter, \
|
||||
_end, _flags, _k, _do) \
|
||||
({ \
|
||||
struct bkey_s_c _k; \
|
||||
@ -754,7 +768,7 @@ transaction_restart: \
|
||||
\
|
||||
do { \
|
||||
_ret3 = lockrestart_do(_trans, ({ \
|
||||
(_k) = bch2_btree_iter_peek_upto_type(&(_iter), \
|
||||
(_k) = bch2_btree_iter_peek_max_type(&(_iter), \
|
||||
_end, (_flags)); \
|
||||
if (!(_k).k) \
|
||||
break; \
|
||||
@ -768,9 +782,9 @@ transaction_restart: \
|
||||
})
|
||||
|
||||
#define for_each_btree_key_continue(_trans, _iter, _flags, _k, _do) \
|
||||
for_each_btree_key_upto_continue(_trans, _iter, SPOS_MAX, _flags, _k, _do)
|
||||
for_each_btree_key_max_continue(_trans, _iter, SPOS_MAX, _flags, _k, _do)
|
||||
|
||||
#define for_each_btree_key_upto(_trans, _iter, _btree_id, \
|
||||
#define for_each_btree_key_max(_trans, _iter, _btree_id, \
|
||||
_start, _end, _flags, _k, _do) \
|
||||
({ \
|
||||
bch2_trans_begin(trans); \
|
||||
@ -779,12 +793,12 @@ transaction_restart: \
|
||||
bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
|
||||
(_start), (_flags)); \
|
||||
\
|
||||
for_each_btree_key_upto_continue(_trans, _iter, _end, _flags, _k, _do);\
|
||||
for_each_btree_key_max_continue(_trans, _iter, _end, _flags, _k, _do);\
|
||||
})
|
||||
|
||||
#define for_each_btree_key(_trans, _iter, _btree_id, \
|
||||
_start, _flags, _k, _do) \
|
||||
for_each_btree_key_upto(_trans, _iter, _btree_id, _start, \
|
||||
for_each_btree_key_max(_trans, _iter, _btree_id, _start, \
|
||||
SPOS_MAX, _flags, _k, _do)
|
||||
|
||||
#define for_each_btree_key_reverse(_trans, _iter, _btree_id, \
|
||||
@ -828,33 +842,33 @@ transaction_restart: \
|
||||
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
|
||||
(_journal_seq), (_commit_flags)))
|
||||
|
||||
#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \
|
||||
#define for_each_btree_key_max_commit(_trans, _iter, _btree_id, \
|
||||
_start, _end, _iter_flags, _k, \
|
||||
_disk_res, _journal_seq, _commit_flags,\
|
||||
_do) \
|
||||
for_each_btree_key_upto(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\
|
||||
for_each_btree_key_max(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\
|
||||
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
|
||||
(_journal_seq), (_commit_flags)))
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
|
||||
|
||||
#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \
|
||||
#define for_each_btree_key_max_norestart(_trans, _iter, _btree_id, \
|
||||
_start, _end, _flags, _k, _ret) \
|
||||
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
|
||||
(_start), (_flags)); \
|
||||
(_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\
|
||||
(_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags),\
|
||||
!((_ret) = bkey_err(_k)) && (_k).k; \
|
||||
bch2_btree_iter_advance(&(_iter)))
|
||||
|
||||
#define for_each_btree_key_upto_continue_norestart(_iter, _end, _flags, _k, _ret)\
|
||||
#define for_each_btree_key_max_continue_norestart(_iter, _end, _flags, _k, _ret)\
|
||||
for (; \
|
||||
(_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags), \
|
||||
(_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags), \
|
||||
!((_ret) = bkey_err(_k)) && (_k).k; \
|
||||
bch2_btree_iter_advance(&(_iter)))
|
||||
|
||||
#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \
|
||||
_start, _flags, _k, _ret) \
|
||||
for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, _start,\
|
||||
for_each_btree_key_max_norestart(_trans, _iter, _btree_id, _start,\
|
||||
SPOS_MAX, _flags, _k, _ret)
|
||||
|
||||
#define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \
|
||||
@ -866,7 +880,7 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
|
||||
bch2_btree_iter_rewind(&(_iter)))
|
||||
|
||||
#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \
|
||||
for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret)
|
||||
for_each_btree_key_max_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret)
|
||||
|
||||
/*
|
||||
* This should not be used in a fastpath, without first trying _do in
|
||||
@ -912,6 +926,8 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do))
|
||||
|
||||
struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned);
|
||||
void bch2_trans_put(struct btree_trans *);
|
||||
|
||||
|
@ -16,6 +16,17 @@
|
||||
* operations for the regular btree iter code to use:
|
||||
*/
|
||||
|
||||
static inline size_t pos_to_idx(struct journal_keys *keys, size_t pos)
|
||||
{
|
||||
size_t gap_size = keys->size - keys->nr;
|
||||
|
||||
BUG_ON(pos >= keys->gap && pos < keys->gap + gap_size);
|
||||
|
||||
if (pos >= keys->gap)
|
||||
pos -= gap_size;
|
||||
return pos;
|
||||
}
|
||||
|
||||
static inline size_t idx_to_pos(struct journal_keys *keys, size_t idx)
|
||||
{
|
||||
size_t gap_size = keys->size - keys->nr;
|
||||
@ -61,7 +72,7 @@ static size_t bch2_journal_key_search(struct journal_keys *keys,
|
||||
}
|
||||
|
||||
/* Returns first non-overwritten key >= search key: */
|
||||
struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id,
|
||||
struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id,
|
||||
unsigned level, struct bpos pos,
|
||||
struct bpos end_pos, size_t *idx)
|
||||
{
|
||||
@ -84,18 +95,54 @@ search:
|
||||
}
|
||||
}
|
||||
|
||||
struct bkey_i *ret = NULL;
|
||||
rcu_read_lock(); /* for overwritten_ranges */
|
||||
|
||||
while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
|
||||
if (__journal_key_cmp(btree_id, level, end_pos, k) < 0)
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
if (k->overwritten) {
|
||||
(*idx)++;
|
||||
if (k->overwritten_range)
|
||||
*idx = rcu_dereference(k->overwritten_range)->end;
|
||||
else
|
||||
*idx += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (__journal_key_cmp(btree_id, level, pos, k) <= 0)
|
||||
return k->k;
|
||||
if (__journal_key_cmp(btree_id, level, pos, k) <= 0) {
|
||||
ret = k->k;
|
||||
break;
|
||||
}
|
||||
|
||||
(*idx)++;
|
||||
iters++;
|
||||
if (iters == 10) {
|
||||
*idx = 0;
|
||||
rcu_read_unlock();
|
||||
goto search;
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id,
|
||||
unsigned level, struct bpos pos,
|
||||
struct bpos end_pos, size_t *idx)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
unsigned iters = 0;
|
||||
struct journal_key *k;
|
||||
|
||||
BUG_ON(*idx > keys->nr);
|
||||
search:
|
||||
if (!*idx)
|
||||
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
|
||||
|
||||
while (*idx &&
|
||||
__journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) {
|
||||
(*idx)++;
|
||||
iters++;
|
||||
if (iters == 10) {
|
||||
@ -104,7 +151,36 @@ search:
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
struct bkey_i *ret = NULL;
|
||||
rcu_read_lock(); /* for overwritten_ranges */
|
||||
|
||||
while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
|
||||
if (__journal_key_cmp(btree_id, level, end_pos, k) > 0)
|
||||
break;
|
||||
|
||||
if (k->overwritten) {
|
||||
if (k->overwritten_range)
|
||||
*idx = rcu_dereference(k->overwritten_range)->start - 1;
|
||||
else
|
||||
*idx -= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (__journal_key_cmp(btree_id, level, pos, k) >= 0) {
|
||||
ret = k->k;
|
||||
break;
|
||||
}
|
||||
|
||||
--(*idx);
|
||||
iters++;
|
||||
if (iters == 10) {
|
||||
*idx = 0;
|
||||
goto search;
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id,
|
||||
@ -112,11 +188,12 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree
|
||||
{
|
||||
size_t idx = 0;
|
||||
|
||||
return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx);
|
||||
return bch2_journal_keys_peek_max(c, btree_id, level, pos, pos, &idx);
|
||||
}
|
||||
|
||||
static void journal_iter_verify(struct journal_iter *iter)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
struct journal_keys *keys = iter->keys;
|
||||
size_t gap_size = keys->size - keys->nr;
|
||||
|
||||
@ -126,10 +203,10 @@ static void journal_iter_verify(struct journal_iter *iter)
|
||||
if (iter->idx < keys->size) {
|
||||
struct journal_key *k = keys->data + iter->idx;
|
||||
|
||||
int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
|
||||
cmp_int(k->level, iter->level);
|
||||
BUG_ON(cmp < 0);
|
||||
int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k);
|
||||
BUG_ON(cmp > 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void journal_iters_fix(struct bch_fs *c)
|
||||
@ -182,7 +259,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
||||
* Ensure these keys are done last by journal replay, to unblock
|
||||
* journal reclaim:
|
||||
*/
|
||||
.journal_seq = U32_MAX,
|
||||
.journal_seq = U64_MAX,
|
||||
};
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
|
||||
@ -290,6 +367,68 @@ bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree,
|
||||
bkey_deleted(&keys->data[idx].k->k));
|
||||
}
|
||||
|
||||
static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos)
|
||||
{
|
||||
struct journal_key *k = keys->data + pos;
|
||||
size_t idx = pos_to_idx(keys, pos);
|
||||
|
||||
k->overwritten = true;
|
||||
|
||||
struct journal_key *prev = idx > 0 ? keys->data + idx_to_pos(keys, idx - 1) : NULL;
|
||||
struct journal_key *next = idx + 1 < keys->nr ? keys->data + idx_to_pos(keys, idx + 1) : NULL;
|
||||
|
||||
bool prev_overwritten = prev && prev->overwritten;
|
||||
bool next_overwritten = next && next->overwritten;
|
||||
|
||||
struct journal_key_range_overwritten *prev_range =
|
||||
prev_overwritten ? prev->overwritten_range : NULL;
|
||||
struct journal_key_range_overwritten *next_range =
|
||||
next_overwritten ? next->overwritten_range : NULL;
|
||||
|
||||
BUG_ON(prev_range && prev_range->end != idx);
|
||||
BUG_ON(next_range && next_range->start != idx + 1);
|
||||
|
||||
if (prev_range && next_range) {
|
||||
prev_range->end = next_range->end;
|
||||
|
||||
keys->data[pos].overwritten_range = prev_range;
|
||||
for (size_t i = next_range->start; i < next_range->end; i++) {
|
||||
struct journal_key *ip = keys->data + idx_to_pos(keys, i);
|
||||
BUG_ON(ip->overwritten_range != next_range);
|
||||
ip->overwritten_range = prev_range;
|
||||
}
|
||||
|
||||
kfree_rcu_mightsleep(next_range);
|
||||
} else if (prev_range) {
|
||||
prev_range->end++;
|
||||
k->overwritten_range = prev_range;
|
||||
if (next_overwritten) {
|
||||
prev_range->end++;
|
||||
next->overwritten_range = prev_range;
|
||||
}
|
||||
} else if (next_range) {
|
||||
next_range->start--;
|
||||
k->overwritten_range = next_range;
|
||||
if (prev_overwritten) {
|
||||
next_range->start--;
|
||||
prev->overwritten_range = next_range;
|
||||
}
|
||||
} else if (prev_overwritten || next_overwritten) {
|
||||
struct journal_key_range_overwritten *r = kmalloc(sizeof(*r), GFP_KERNEL);
|
||||
if (!r)
|
||||
return;
|
||||
|
||||
r->start = idx - (size_t) prev_overwritten;
|
||||
r->end = idx + 1 + (size_t) next_overwritten;
|
||||
|
||||
rcu_assign_pointer(k->overwritten_range, r);
|
||||
if (prev_overwritten)
|
||||
prev->overwritten_range = r;
|
||||
if (next_overwritten)
|
||||
next->overwritten_range = r;
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
|
||||
unsigned level, struct bpos pos)
|
||||
{
|
||||
@ -299,8 +438,12 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
|
||||
if (idx < keys->size &&
|
||||
keys->data[idx].btree_id == btree &&
|
||||
keys->data[idx].level == level &&
|
||||
bpos_eq(keys->data[idx].k->k.p, pos))
|
||||
keys->data[idx].overwritten = true;
|
||||
bpos_eq(keys->data[idx].k->k.p, pos) &&
|
||||
!keys->data[idx].overwritten) {
|
||||
mutex_lock(&keys->overwrite_lock);
|
||||
__bch2_journal_key_overwritten(keys, idx);
|
||||
mutex_unlock(&keys->overwrite_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_journal_iter_advance(struct journal_iter *iter)
|
||||
@ -314,24 +457,32 @@ static void bch2_journal_iter_advance(struct journal_iter *iter)
|
||||
|
||||
static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
|
||||
{
|
||||
struct bkey_s_c ret = bkey_s_c_null;
|
||||
|
||||
journal_iter_verify(iter);
|
||||
|
||||
rcu_read_lock();
|
||||
while (iter->idx < iter->keys->size) {
|
||||
struct journal_key *k = iter->keys->data + iter->idx;
|
||||
|
||||
int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
|
||||
cmp_int(k->level, iter->level);
|
||||
if (cmp > 0)
|
||||
int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k);
|
||||
if (cmp < 0)
|
||||
break;
|
||||
BUG_ON(cmp);
|
||||
|
||||
if (!k->overwritten)
|
||||
return bkey_i_to_s_c(k->k);
|
||||
if (!k->overwritten) {
|
||||
ret = bkey_i_to_s_c(k->k);
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_journal_iter_advance(iter);
|
||||
if (k->overwritten_range)
|
||||
iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end);
|
||||
else
|
||||
bch2_journal_iter_advance(iter);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return bkey_s_c_null;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_journal_iter_exit(struct journal_iter *iter)
|
||||
@ -382,6 +533,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter
|
||||
: (level > 1 ? 1 : 16);
|
||||
|
||||
iter.prefetch = false;
|
||||
iter.fail_if_too_many_whiteouts = true;
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
while (nr--) {
|
||||
@ -400,6 +552,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
|
||||
{
|
||||
struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret;
|
||||
size_t iters = 0;
|
||||
|
||||
if (iter->prefetch && iter->journal.level)
|
||||
btree_and_journal_iter_prefetch(iter);
|
||||
@ -407,6 +560,11 @@ again:
|
||||
if (iter->at_end)
|
||||
return bkey_s_c_null;
|
||||
|
||||
iters++;
|
||||
|
||||
if (iters > 20 && iter->fail_if_too_many_whiteouts)
|
||||
return bkey_s_c_null;
|
||||
|
||||
while ((btree_k = bch2_journal_iter_peek_btree(iter)).k &&
|
||||
bpos_lt(btree_k.k->p, iter->pos))
|
||||
bch2_journal_iter_advance_btree(iter);
|
||||
@ -481,21 +639,13 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
|
||||
|
||||
/* sort and dedup all keys in the journal: */
|
||||
|
||||
void bch2_journal_entries_free(struct bch_fs *c)
|
||||
{
|
||||
struct journal_replay **i;
|
||||
struct genradix_iter iter;
|
||||
|
||||
genradix_for_each(&c->journal_entries, iter, i)
|
||||
kvfree(*i);
|
||||
genradix_free(&c->journal_entries);
|
||||
}
|
||||
|
||||
/*
|
||||
* When keys compare equal, oldest compares first:
|
||||
*/
|
||||
static int journal_sort_key_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
cond_resched();
|
||||
|
||||
const struct journal_key *l = _l;
|
||||
const struct journal_key *r = _r;
|
||||
|
||||
@ -515,15 +665,26 @@ void bch2_journal_keys_put(struct bch_fs *c)
|
||||
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
darray_for_each(*keys, i)
|
||||
darray_for_each(*keys, i) {
|
||||
if (i->overwritten_range &&
|
||||
(i == &darray_last(*keys) ||
|
||||
i->overwritten_range != i[1].overwritten_range))
|
||||
kfree(i->overwritten_range);
|
||||
|
||||
if (i->allocated)
|
||||
kfree(i->k);
|
||||
}
|
||||
|
||||
kvfree(keys->data);
|
||||
keys->data = NULL;
|
||||
keys->nr = keys->gap = keys->size = 0;
|
||||
|
||||
bch2_journal_entries_free(c);
|
||||
struct journal_replay **i;
|
||||
struct genradix_iter iter;
|
||||
|
||||
genradix_for_each(&c->journal_entries, iter, i)
|
||||
kvfree(*i);
|
||||
genradix_free(&c->journal_entries);
|
||||
}
|
||||
|
||||
static void __journal_keys_sort(struct journal_keys *keys)
|
||||
@ -628,8 +789,20 @@ void bch2_journal_keys_dump(struct bch_fs *c)
|
||||
|
||||
darray_for_each(*keys, i) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "btree=");
|
||||
bch2_btree_id_to_text(&buf, i->btree_id);
|
||||
prt_printf(&buf, " l=%u ", i->level);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k));
|
||||
pr_err("%s l=%u %s", bch2_btree_id_str(i->btree_id), i->level, buf.buf);
|
||||
pr_err("%s", buf.buf);
|
||||
}
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
void bch2_fs_journal_keys_init(struct bch_fs *c)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
|
||||
atomic_set(&keys->ref, 1);
|
||||
keys->initial_ref_held = true;
|
||||
mutex_init(&keys->overwrite_lock);
|
||||
}
|
||||
|
@ -26,16 +26,24 @@ struct btree_and_journal_iter {
|
||||
struct bpos pos;
|
||||
bool at_end;
|
||||
bool prefetch;
|
||||
bool fail_if_too_many_whiteouts;
|
||||
};
|
||||
|
||||
static inline int __journal_key_btree_cmp(enum btree_id l_btree_id,
|
||||
unsigned l_level,
|
||||
const struct journal_key *r)
|
||||
{
|
||||
return -cmp_int(l_level, r->level) ?:
|
||||
cmp_int(l_btree_id, r->btree_id);
|
||||
}
|
||||
|
||||
static inline int __journal_key_cmp(enum btree_id l_btree_id,
|
||||
unsigned l_level,
|
||||
struct bpos l_pos,
|
||||
const struct journal_key *r)
|
||||
{
|
||||
return (cmp_int(l_btree_id, r->btree_id) ?:
|
||||
cmp_int(l_level, r->level) ?:
|
||||
bpos_cmp(l_pos, r->k->k.p));
|
||||
return __journal_key_btree_cmp(l_btree_id, l_level, r) ?:
|
||||
bpos_cmp(l_pos, r->k->k.p);
|
||||
}
|
||||
|
||||
static inline int journal_key_cmp(const struct journal_key *l, const struct journal_key *r)
|
||||
@ -43,7 +51,9 @@ static inline int journal_key_cmp(const struct journal_key *l, const struct jour
|
||||
return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r);
|
||||
}
|
||||
|
||||
struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id,
|
||||
struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos, struct bpos, size_t *);
|
||||
struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos, struct bpos, size_t *);
|
||||
struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos);
|
||||
@ -79,8 +89,6 @@ static inline void bch2_journal_keys_put_initial(struct bch_fs *c)
|
||||
c->journal_keys.initial_ref_held = false;
|
||||
}
|
||||
|
||||
void bch2_journal_entries_free(struct bch_fs *);
|
||||
|
||||
int bch2_journal_keys_sort(struct bch_fs *);
|
||||
|
||||
void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id,
|
||||
@ -89,4 +97,6 @@ void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id,
|
||||
|
||||
void bch2_journal_keys_dump(struct bch_fs *);
|
||||
|
||||
void bch2_fs_journal_keys_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */
|
||||
|
36
libbcachefs/btree_journal_iter_types.h
Normal file
36
libbcachefs/btree_journal_iter_types.h
Normal file
@ -0,0 +1,36 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H
|
||||
#define _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H
|
||||
|
||||
struct journal_key_range_overwritten {
|
||||
size_t start, end;
|
||||
};
|
||||
|
||||
struct journal_key {
|
||||
u64 journal_seq;
|
||||
u32 journal_offset;
|
||||
enum btree_id btree_id:8;
|
||||
unsigned level:8;
|
||||
bool allocated;
|
||||
bool overwritten;
|
||||
struct journal_key_range_overwritten __rcu *
|
||||
overwritten_range;
|
||||
struct bkey_i *k;
|
||||
};
|
||||
|
||||
struct journal_keys {
|
||||
/* must match layout in darray_types.h */
|
||||
size_t nr, size;
|
||||
struct journal_key *data;
|
||||
/*
|
||||
* Gap buffer: instead of all the empty space in the array being at the
|
||||
* end of the buffer - from @nr to @size - the empty space is at @gap.
|
||||
* This means that sequential insertions are O(n) instead of O(n^2).
|
||||
*/
|
||||
size_t gap;
|
||||
atomic_t ref;
|
||||
bool initial_ref_held;
|
||||
struct mutex overwrite_lock;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H */
|
@ -156,7 +156,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
|
||||
}
|
||||
|
||||
if (ck) {
|
||||
bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0);
|
||||
bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL);
|
||||
ck->c.cached = true;
|
||||
goto lock;
|
||||
}
|
||||
@ -197,7 +197,9 @@ out:
|
||||
return ck;
|
||||
}
|
||||
|
||||
static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *path,
|
||||
static int btree_key_cache_create(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_path *ck_path,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -217,7 +219,7 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
|
||||
key_u64s = min(256U, (key_u64s * 3) / 2);
|
||||
key_u64s = roundup_pow_of_two(key_u64s);
|
||||
|
||||
struct bkey_cached *ck = bkey_cached_alloc(trans, path, key_u64s);
|
||||
struct bkey_cached *ck = bkey_cached_alloc(trans, ck_path, key_u64s);
|
||||
int ret = PTR_ERR_OR_ZERO(ck);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -226,19 +228,19 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
|
||||
ck = bkey_cached_reuse(bc);
|
||||
if (unlikely(!ck)) {
|
||||
bch_err(c, "error allocating memory for key cache item, btree %s",
|
||||
bch2_btree_id_str(path->btree_id));
|
||||
bch2_btree_id_str(ck_path->btree_id));
|
||||
return -BCH_ERR_ENOMEM_btree_key_cache_create;
|
||||
}
|
||||
}
|
||||
|
||||
ck->c.level = 0;
|
||||
ck->c.btree_id = path->btree_id;
|
||||
ck->key.btree_id = path->btree_id;
|
||||
ck->key.pos = path->pos;
|
||||
ck->c.btree_id = ck_path->btree_id;
|
||||
ck->key.btree_id = ck_path->btree_id;
|
||||
ck->key.pos = ck_path->pos;
|
||||
ck->flags = 1U << BKEY_CACHED_ACCESSED;
|
||||
|
||||
if (unlikely(key_u64s > ck->u64s)) {
|
||||
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
|
||||
mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED);
|
||||
|
||||
struct bkey_i *new_k = allocate_dropping_locks(trans, ret,
|
||||
kmalloc(key_u64s * sizeof(u64), _gfp));
|
||||
@ -258,22 +260,29 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
|
||||
|
||||
bkey_reassemble(ck->k, k);
|
||||
|
||||
ret = bch2_btree_node_lock_write(trans, path, &path_l(path)->b->c);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
ret = rhashtable_lookup_insert_fast(&bc->table, &ck->hash, bch2_btree_key_cache_params);
|
||||
|
||||
bch2_btree_node_unlock_write(trans, path, path_l(path)->b);
|
||||
|
||||
if (unlikely(ret)) /* raced with another fill? */
|
||||
goto err;
|
||||
|
||||
atomic_long_inc(&bc->nr_keys);
|
||||
six_unlock_write(&ck->c.lock);
|
||||
|
||||
enum six_lock_type lock_want = __btree_lock_want(path, 0);
|
||||
enum six_lock_type lock_want = __btree_lock_want(ck_path, 0);
|
||||
if (lock_want == SIX_LOCK_read)
|
||||
six_lock_downgrade(&ck->c.lock);
|
||||
btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want);
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
btree_path_cached_set(trans, ck_path, ck, (enum btree_node_locked_type) lock_want);
|
||||
ck_path->uptodate = BTREE_ITER_UPTODATE;
|
||||
return 0;
|
||||
err:
|
||||
bkey_cached_free(bc, ck);
|
||||
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
|
||||
mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -283,7 +292,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
|
||||
unsigned flags)
|
||||
{
|
||||
if (flags & BTREE_ITER_cached_nofill) {
|
||||
ck_path->uptodate = BTREE_ITER_UPTODATE;
|
||||
ck_path->l[0].b = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -293,6 +302,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, ck_path->btree_id, ck_path->pos,
|
||||
BTREE_ITER_intent|
|
||||
BTREE_ITER_key_cache_fill|
|
||||
BTREE_ITER_cached_nofill);
|
||||
iter.flags &= ~BTREE_ITER_with_journal;
|
||||
@ -306,9 +316,19 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
|
||||
if (unlikely(ret))
|
||||
goto out;
|
||||
|
||||
ret = btree_key_cache_create(trans, ck_path, k);
|
||||
ret = btree_key_cache_create(trans, btree_iter_path(trans, &iter), ck_path, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (trace_key_cache_fill_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bpos_to_text(&buf, ck_path->pos);
|
||||
prt_char(&buf, ' ');
|
||||
bch2_bkey_val_to_text(&buf, trans->c, k);
|
||||
trace_key_cache_fill(trans, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
out:
|
||||
/* We're not likely to need this iterator again: */
|
||||
bch2_set_btree_iter_dontneed(&iter);
|
||||
@ -424,8 +444,15 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
!test_bit(JOURNAL_space_low, &c->journal.flags))
|
||||
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
|
||||
|
||||
ret = bch2_btree_iter_traverse(&b_iter) ?:
|
||||
bch2_trans_update(trans, &b_iter, ck->k,
|
||||
struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(&b_iter);
|
||||
ret = bkey_err(btree_k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* * Check that we're not violating cache coherency rules: */
|
||||
BUG_ON(bkey_deleted(btree_k.k));
|
||||
|
||||
ret = bch2_trans_update(trans, &b_iter, ck->k,
|
||||
BTREE_UPDATE_key_cache_reclaim|
|
||||
BTREE_UPDATE_internal_snapshot_node|
|
||||
BTREE_TRIGGER_norun) ?:
|
||||
@ -433,7 +460,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
commit_flags);
|
||||
|
||||
err:
|
||||
bch2_fs_fatal_err_on(ret &&
|
||||
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
|
||||
!bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
|
||||
@ -586,8 +613,18 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
|
||||
bkey_cached_free(bc, ck);
|
||||
|
||||
mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
|
||||
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
|
||||
path->should_be_locked = false;
|
||||
|
||||
struct btree_path *path2;
|
||||
unsigned i;
|
||||
trans_for_each_path(trans, path2, i)
|
||||
if (path2->l[0].b == (void *) ck) {
|
||||
__bch2_btree_path_unlock(trans, path2);
|
||||
path2->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_drop);
|
||||
path2->should_be_locked = false;
|
||||
btree_path_set_dirty(path2, BTREE_ITER_NEED_TRAVERSE);
|
||||
}
|
||||
|
||||
bch2_trans_verify_locks(trans);
|
||||
}
|
||||
|
||||
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
@ -711,7 +748,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
rcu_read_unlock();
|
||||
mutex_lock(&bc->table.mutex);
|
||||
mutex_unlock(&bc->table.mutex);
|
||||
rcu_read_lock();
|
||||
continue;
|
||||
}
|
||||
for (i = 0; i < tbl->size; i++)
|
||||
|
@ -7,9 +7,10 @@
|
||||
static struct lock_class_key bch2_btree_node_lock_key;
|
||||
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *b,
|
||||
enum six_lock_init_flags flags)
|
||||
enum six_lock_init_flags flags,
|
||||
gfp_t gfp)
|
||||
{
|
||||
__six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags);
|
||||
__six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags, gfp);
|
||||
lockdep_set_notrack_class(&b->lock);
|
||||
}
|
||||
|
||||
@ -90,10 +91,10 @@ static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
for (i = g->g; i != g->g + g->nr; i++) {
|
||||
struct task_struct *task = i->trans->locking_wait.task;
|
||||
struct task_struct *task = READ_ONCE(i->trans->locking_wait.task);
|
||||
if (i != g->g)
|
||||
prt_str(out, "<- ");
|
||||
prt_printf(out, "%u ", task ?task->pid : 0);
|
||||
prt_printf(out, "%u ", task ? task->pid : 0);
|
||||
}
|
||||
prt_newline(out);
|
||||
}
|
||||
@ -109,6 +110,12 @@ static noinline void lock_graph_pop_all(struct lock_graph *g)
|
||||
lock_graph_up(g);
|
||||
}
|
||||
|
||||
static noinline void lock_graph_pop_from(struct lock_graph *g, struct trans_waiting_for_lock *i)
|
||||
{
|
||||
while (g->g + g->nr > i)
|
||||
lock_graph_up(g);
|
||||
}
|
||||
|
||||
static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
|
||||
{
|
||||
g->g[g->nr++] = (struct trans_waiting_for_lock) {
|
||||
@ -124,15 +131,20 @@ static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
|
||||
__lock_graph_down(g, trans);
|
||||
}
|
||||
|
||||
static bool lock_graph_remove_non_waiters(struct lock_graph *g)
|
||||
static bool lock_graph_remove_non_waiters(struct lock_graph *g,
|
||||
struct trans_waiting_for_lock *from)
|
||||
{
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
for (i = g->g + 1; i < g->g + g->nr; i++)
|
||||
if (from->trans->locking != from->node_want) {
|
||||
lock_graph_pop_from(g, from);
|
||||
return true;
|
||||
}
|
||||
|
||||
for (i = from + 1; i < g->g + g->nr; i++)
|
||||
if (i->trans->locking != i->node_want ||
|
||||
i->trans->locking_wait.start_time != i[-1].lock_start_time) {
|
||||
while (g->g + g->nr > i)
|
||||
lock_graph_up(g);
|
||||
lock_graph_pop_from(g, i);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -160,7 +172,9 @@ static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
|
||||
{
|
||||
if (i == g->g) {
|
||||
trace_would_deadlock(g, i->trans);
|
||||
return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
|
||||
return btree_trans_restart_foreign_task(i->trans,
|
||||
BCH_ERR_transaction_restart_would_deadlock,
|
||||
_THIS_IP_);
|
||||
} else {
|
||||
i->trans->lock_must_abort = true;
|
||||
wake_up_process(i->trans->locking_wait.task);
|
||||
@ -179,13 +193,14 @@ static int btree_trans_abort_preference(struct btree_trans *trans)
|
||||
return 3;
|
||||
}
|
||||
|
||||
static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
|
||||
static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle,
|
||||
struct trans_waiting_for_lock *from)
|
||||
{
|
||||
struct trans_waiting_for_lock *i, *abort = NULL;
|
||||
unsigned best = 0, pref;
|
||||
int ret;
|
||||
|
||||
if (lock_graph_remove_non_waiters(g))
|
||||
if (lock_graph_remove_non_waiters(g, from))
|
||||
return 0;
|
||||
|
||||
/* Only checking, for debugfs: */
|
||||
@ -195,7 +210,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = g->g; i < g->g + g->nr; i++) {
|
||||
for (i = from; i < g->g + g->nr; i++) {
|
||||
pref = btree_trans_abort_preference(i->trans);
|
||||
if (pref > best) {
|
||||
abort = i;
|
||||
@ -229,8 +244,9 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
|
||||
ret = abort_lock(g, abort);
|
||||
out:
|
||||
if (ret)
|
||||
while (g->nr)
|
||||
lock_graph_up(g);
|
||||
lock_graph_pop_all(g);
|
||||
else
|
||||
lock_graph_pop_from(g, abort);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -243,7 +259,7 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
|
||||
for (i = g->g; i < g->g + g->nr; i++)
|
||||
if (i->trans == trans) {
|
||||
closure_put(&trans->ref);
|
||||
return break_cycle(g, cycle);
|
||||
return break_cycle(g, cycle, i);
|
||||
}
|
||||
|
||||
if (g->nr == ARRAY_SIZE(g->g)) {
|
||||
@ -252,8 +268,7 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
|
||||
if (orig_trans->lock_may_not_fail)
|
||||
return 0;
|
||||
|
||||
while (g->nr)
|
||||
lock_graph_up(g);
|
||||
lock_graph_pop_all(g);
|
||||
|
||||
if (cycle)
|
||||
return 0;
|
||||
@ -281,7 +296,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
|
||||
|
||||
g.nr = 0;
|
||||
|
||||
if (trans->lock_must_abort) {
|
||||
if (trans->lock_must_abort && !trans->lock_may_not_fail) {
|
||||
if (cycle)
|
||||
return -1;
|
||||
|
||||
@ -336,7 +351,7 @@ next:
|
||||
* structures - which means it can't be blocked
|
||||
* waiting on a lock:
|
||||
*/
|
||||
if (!lock_graph_remove_non_waiters(&g)) {
|
||||
if (!lock_graph_remove_non_waiters(&g, g.g)) {
|
||||
/*
|
||||
* If lock_graph_remove_non_waiters()
|
||||
* didn't do anything, it must be
|
||||
@ -512,7 +527,6 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
struct btree *b = path->l[level].b;
|
||||
struct six_lock_count count = bch2_btree_node_lock_counts(trans, path, &b->c, level);
|
||||
|
||||
if (!is_btree_node(path, level))
|
||||
return false;
|
||||
@ -536,24 +550,11 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans,
|
||||
if (race_fault())
|
||||
return false;
|
||||
|
||||
if (btree_node_locked(path, level)) {
|
||||
bool ret;
|
||||
if (btree_node_locked(path, level)
|
||||
? six_lock_tryupgrade(&b->c.lock)
|
||||
: six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
|
||||
goto success;
|
||||
|
||||
six_lock_readers_add(&b->c.lock, -count.n[SIX_LOCK_read]);
|
||||
ret = six_lock_tryupgrade(&b->c.lock);
|
||||
six_lock_readers_add(&b->c.lock, count.n[SIX_LOCK_read]);
|
||||
|
||||
if (ret)
|
||||
goto success;
|
||||
} else {
|
||||
if (six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
|
||||
goto success;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do we already have an intent lock via another path? If so, just bump
|
||||
* lock count:
|
||||
*/
|
||||
if (btree_node_lock_seq_matches(path, b, level) &&
|
||||
btree_node_lock_increment(trans, &b->c, level, BTREE_NODE_INTENT_LOCKED)) {
|
||||
btree_node_unlock(trans, path, level);
|
||||
@ -782,7 +783,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
|
||||
return bch2_trans_relock_fail(trans, path, &f, trace);
|
||||
}
|
||||
|
||||
trans_set_locked(trans);
|
||||
trans_set_locked(trans, true);
|
||||
out:
|
||||
bch2_trans_verify_locks(trans);
|
||||
return 0;
|
||||
@ -818,6 +819,17 @@ void bch2_trans_unlock_long(struct btree_trans *trans)
|
||||
bch2_trans_srcu_unlock(trans);
|
||||
}
|
||||
|
||||
void bch2_trans_unlock_write(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
unsigned i;
|
||||
|
||||
trans_for_each_path(trans, path, i)
|
||||
for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++)
|
||||
if (btree_node_write_locked(path, l))
|
||||
bch2_btree_node_unlock_write(trans, path, path->l[l].b);
|
||||
}
|
||||
|
||||
int __bch2_trans_mutex_lock(struct btree_trans *trans,
|
||||
struct mutex *lock)
|
||||
{
|
||||
@ -856,6 +868,9 @@ void bch2_btree_path_verify_locks(struct btree_path *path)
|
||||
(want == BTREE_NODE_UNLOCKED ||
|
||||
have != BTREE_NODE_WRITE_LOCKED) &&
|
||||
want != have);
|
||||
|
||||
BUG_ON(btree_node_locked(path, l) &&
|
||||
path->l[l].lock_seq != six_lock_seq(&path->l[l].b->c.lock));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,9 +13,10 @@
|
||||
#include "btree_iter.h"
|
||||
#include "six.h"
|
||||
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags);
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags, gfp_t gfp);
|
||||
|
||||
void bch2_trans_unlock_noassert(struct btree_trans *);
|
||||
void bch2_trans_unlock_write(struct btree_trans *);
|
||||
|
||||
static inline bool is_btree_node(struct btree_path *path, unsigned l)
|
||||
{
|
||||
@ -75,13 +76,6 @@ static inline void mark_btree_node_locked_noreset(struct btree_path *path,
|
||||
path->nodes_locked |= (type + 1) << (level << 1);
|
||||
}
|
||||
|
||||
static inline void mark_btree_node_unlocked(struct btree_path *path,
|
||||
unsigned level)
|
||||
{
|
||||
EBUG_ON(btree_node_write_locked(path, level));
|
||||
mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED);
|
||||
}
|
||||
|
||||
static inline void mark_btree_node_locked(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned level,
|
||||
@ -124,19 +118,25 @@ static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
|
||||
|
||||
/* unlock: */
|
||||
|
||||
void bch2_btree_node_unlock_write(struct btree_trans *,
|
||||
struct btree_path *, struct btree *);
|
||||
|
||||
static inline void btree_node_unlock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
int lock_type = btree_node_locked_type(path, level);
|
||||
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
EBUG_ON(lock_type == BTREE_NODE_WRITE_LOCKED);
|
||||
|
||||
if (lock_type != BTREE_NODE_UNLOCKED) {
|
||||
if (unlikely(lock_type == BTREE_NODE_WRITE_LOCKED)) {
|
||||
bch2_btree_node_unlock_write(trans, path, path->l[level].b);
|
||||
lock_type = BTREE_NODE_INTENT_LOCKED;
|
||||
}
|
||||
six_unlock_type(&path->l[level].b->c.lock, lock_type);
|
||||
btree_trans_lock_hold_time_update(trans, path, level);
|
||||
mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED);
|
||||
}
|
||||
mark_btree_node_unlocked(path, level);
|
||||
}
|
||||
|
||||
static inline int btree_path_lowest_level_locked(struct btree_path *path)
|
||||
@ -162,36 +162,40 @@ static inline void __bch2_btree_path_unlock(struct btree_trans *trans,
|
||||
* Updates the saved lock sequence number, so that bch2_btree_node_relock() will
|
||||
* succeed:
|
||||
*/
|
||||
static inline void
|
||||
__bch2_btree_node_unlock_write(struct btree_trans *trans, struct btree *b)
|
||||
{
|
||||
if (!b->c.lock.write_lock_recurse) {
|
||||
struct btree_path *linked;
|
||||
unsigned i;
|
||||
|
||||
trans_for_each_path_with_node(trans, b, linked, i)
|
||||
linked->l[b->c.level].lock_seq++;
|
||||
}
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
}
|
||||
|
||||
static inline void
|
||||
bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_path *path,
|
||||
struct btree *b)
|
||||
{
|
||||
struct btree_path *linked;
|
||||
unsigned i;
|
||||
|
||||
EBUG_ON(path->l[b->c.level].b != b);
|
||||
EBUG_ON(path->l[b->c.level].lock_seq != six_lock_seq(&b->c.lock));
|
||||
EBUG_ON(btree_node_locked_type(path, b->c.level) != SIX_LOCK_write);
|
||||
|
||||
mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
|
||||
|
||||
trans_for_each_path_with_node(trans, b, linked, i)
|
||||
linked->l[b->c.level].lock_seq++;
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
__bch2_btree_node_unlock_write(trans, b);
|
||||
}
|
||||
|
||||
void bch2_btree_node_unlock_write(struct btree_trans *,
|
||||
struct btree_path *, struct btree *);
|
||||
|
||||
int bch2_six_check_for_deadlock(struct six_lock *lock, void *p);
|
||||
|
||||
/* lock: */
|
||||
|
||||
static inline void trans_set_locked(struct btree_trans *trans)
|
||||
static inline void trans_set_locked(struct btree_trans *trans, bool try)
|
||||
{
|
||||
if (!trans->locked) {
|
||||
lock_acquire_exclusive(&trans->dep_map, 0, 0, NULL, _THIS_IP_);
|
||||
lock_acquire_exclusive(&trans->dep_map, 0, try, NULL, _THIS_IP_);
|
||||
trans->locked = true;
|
||||
trans->last_unlock_ip = 0;
|
||||
|
||||
@ -282,7 +286,7 @@ static inline int btree_node_lock(struct btree_trans *trans,
|
||||
int ret = 0;
|
||||
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
bch2_trans_verify_not_unlocked(trans);
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
|
||||
if (likely(six_trylock_type(&b->lock, type)) ||
|
||||
btree_node_lock_increment(trans, b, level, (enum btree_node_locked_type) type) ||
|
||||
|
@ -12,6 +12,8 @@
|
||||
#include "recovery_passes.h"
|
||||
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/min_heap.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
struct find_btree_nodes_worker {
|
||||
@ -22,17 +24,15 @@ struct find_btree_nodes_worker {
|
||||
|
||||
static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n)
|
||||
{
|
||||
prt_printf(out, "%s l=%u seq=%u journal_seq=%llu cookie=%llx ",
|
||||
bch2_btree_id_str(n->btree_id), n->level, n->seq,
|
||||
n->journal_seq, n->cookie);
|
||||
bch2_btree_id_level_to_text(out, n->btree_id, n->level);
|
||||
prt_printf(out, " seq=%u journal_seq=%llu cookie=%llx ",
|
||||
n->seq, n->journal_seq, n->cookie);
|
||||
bch2_bpos_to_text(out, n->min_key);
|
||||
prt_str(out, "-");
|
||||
bch2_bpos_to_text(out, n->max_key);
|
||||
|
||||
if (n->range_updated)
|
||||
prt_str(out, " range updated");
|
||||
if (n->overwritten)
|
||||
prt_str(out, " overwritten");
|
||||
|
||||
for (unsigned i = 0; i < n->nr_ptrs; i++) {
|
||||
prt_char(out, ' ');
|
||||
@ -140,6 +140,24 @@ static int found_btree_node_cmp_pos(const void *_l, const void *_r)
|
||||
-found_btree_node_cmp_time(l, r);
|
||||
}
|
||||
|
||||
static inline bool found_btree_node_cmp_pos_less(const void *l, const void *r, void *arg)
|
||||
{
|
||||
return found_btree_node_cmp_pos(l, r) < 0;
|
||||
}
|
||||
|
||||
static inline void found_btree_node_swap(void *_l, void *_r, void *arg)
|
||||
{
|
||||
struct found_btree_node *l = _l;
|
||||
struct found_btree_node *r = _r;
|
||||
|
||||
swap(*l, *r);
|
||||
}
|
||||
|
||||
static const struct min_heap_callbacks found_btree_node_heap_cbs = {
|
||||
.less = found_btree_node_cmp_pos_less,
|
||||
.swp = found_btree_node_swap,
|
||||
};
|
||||
|
||||
static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
|
||||
struct bio *bio, struct btree_node *bn, u64 offset)
|
||||
{
|
||||
@ -149,16 +167,25 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
|
||||
bio->bi_iter.bi_sector = offset;
|
||||
bch2_bio_map(bio, bn, PAGE_SIZE);
|
||||
|
||||
u64 submit_time = local_clock();
|
||||
submit_bio_wait(bio);
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
|
||||
"IO error in try_read_btree_node() at %llu: %s",
|
||||
offset, bch2_blk_status_to_str(bio->bi_status)))
|
||||
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
|
||||
|
||||
if (bio->bi_status) {
|
||||
bch_err_dev_ratelimited(ca,
|
||||
"IO error in try_read_btree_node() at %llu: %s",
|
||||
offset, bch2_blk_status_to_str(bio->bi_status));
|
||||
return;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(bn->magic) != bset_magic(c))
|
||||
return;
|
||||
|
||||
if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
|
||||
if (!c->chacha20)
|
||||
return;
|
||||
|
||||
struct nonce nonce = btree_nonce(&bn->keys, 0);
|
||||
unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
|
||||
|
||||
@ -186,7 +213,7 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
|
||||
.ptrs[0].type = 1 << BCH_EXTENT_ENTRY_ptr,
|
||||
.ptrs[0].offset = offset,
|
||||
.ptrs[0].dev = ca->dev_idx,
|
||||
.ptrs[0].gen = *bucket_gen(ca, sector_to_bucket(ca, offset)),
|
||||
.ptrs[0].gen = bucket_gen_get(ca, sector_to_bucket(ca, offset)),
|
||||
};
|
||||
rcu_read_unlock();
|
||||
|
||||
@ -244,7 +271,7 @@ static int read_btree_nodes_worker(void *p)
|
||||
err:
|
||||
bio_put(bio);
|
||||
free_page((unsigned long) buf);
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
closure_put(w->cl);
|
||||
kfree(w);
|
||||
return 0;
|
||||
@ -263,84 +290,77 @@ static int read_btree_nodes(struct find_btree_nodes *f)
|
||||
continue;
|
||||
|
||||
struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
|
||||
struct task_struct *t;
|
||||
|
||||
if (!w) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
closure_get(&cl);
|
||||
w->cl = &cl;
|
||||
w->f = f;
|
||||
w->ca = ca;
|
||||
|
||||
t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
|
||||
struct task_struct *t = kthread_create(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
|
||||
ret = PTR_ERR_OR_ZERO(t);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
closure_put(&cl);
|
||||
f->ret = ret;
|
||||
bch_err(c, "error starting kthread: %i", ret);
|
||||
kfree(w);
|
||||
bch_err_msg(c, ret, "starting kthread");
|
||||
break;
|
||||
}
|
||||
|
||||
closure_get(&cl);
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
wake_up_process(t);
|
||||
}
|
||||
err:
|
||||
closure_sync(&cl);
|
||||
while (closure_sync_timeout(&cl, sysctl_hung_task_timeout_secs * HZ / 2))
|
||||
;
|
||||
return f->ret ?: ret;
|
||||
}
|
||||
|
||||
static void bubble_up(struct found_btree_node *n, struct found_btree_node *end)
|
||||
static bool nodes_overlap(const struct found_btree_node *l,
|
||||
const struct found_btree_node *r)
|
||||
{
|
||||
while (n + 1 < end &&
|
||||
found_btree_node_cmp_pos(n, n + 1) > 0) {
|
||||
swap(n[0], n[1]);
|
||||
n++;
|
||||
}
|
||||
return (l->btree_id == r->btree_id &&
|
||||
l->level == r->level &&
|
||||
bpos_gt(l->max_key, r->min_key));
|
||||
}
|
||||
|
||||
static int handle_overwrites(struct bch_fs *c,
|
||||
struct found_btree_node *start,
|
||||
struct found_btree_node *end)
|
||||
struct found_btree_node *l,
|
||||
found_btree_nodes *nodes_heap)
|
||||
{
|
||||
struct found_btree_node *n;
|
||||
again:
|
||||
for (n = start + 1;
|
||||
n < end &&
|
||||
n->btree_id == start->btree_id &&
|
||||
n->level == start->level &&
|
||||
bpos_lt(n->min_key, start->max_key);
|
||||
n++) {
|
||||
int cmp = found_btree_node_cmp_time(start, n);
|
||||
struct found_btree_node *r;
|
||||
|
||||
while ((r = min_heap_peek(nodes_heap)) &&
|
||||
nodes_overlap(l, r)) {
|
||||
int cmp = found_btree_node_cmp_time(l, r);
|
||||
|
||||
if (cmp > 0) {
|
||||
if (bpos_cmp(start->max_key, n->max_key) >= 0)
|
||||
n->overwritten = true;
|
||||
if (bpos_cmp(l->max_key, r->max_key) >= 0)
|
||||
min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
|
||||
else {
|
||||
n->range_updated = true;
|
||||
n->min_key = bpos_successor(start->max_key);
|
||||
n->range_updated = true;
|
||||
bubble_up(n, end);
|
||||
goto again;
|
||||
r->range_updated = true;
|
||||
r->min_key = bpos_successor(l->max_key);
|
||||
r->range_updated = true;
|
||||
min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL);
|
||||
}
|
||||
} else if (cmp < 0) {
|
||||
BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0);
|
||||
BUG_ON(bpos_eq(l->min_key, r->min_key));
|
||||
|
||||
start->max_key = bpos_predecessor(n->min_key);
|
||||
start->range_updated = true;
|
||||
} else if (n->level) {
|
||||
n->overwritten = true;
|
||||
l->max_key = bpos_predecessor(r->min_key);
|
||||
l->range_updated = true;
|
||||
} else if (r->level) {
|
||||
min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
|
||||
} else {
|
||||
if (bpos_cmp(start->max_key, n->max_key) >= 0)
|
||||
n->overwritten = true;
|
||||
if (bpos_cmp(l->max_key, r->max_key) >= 0)
|
||||
min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
|
||||
else {
|
||||
n->range_updated = true;
|
||||
n->min_key = bpos_successor(start->max_key);
|
||||
n->range_updated = true;
|
||||
bubble_up(n, end);
|
||||
goto again;
|
||||
r->range_updated = true;
|
||||
r->min_key = bpos_successor(l->max_key);
|
||||
r->range_updated = true;
|
||||
min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -352,6 +372,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
|
||||
{
|
||||
struct find_btree_nodes *f = &c->found_btree_nodes;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
found_btree_nodes nodes_heap = {};
|
||||
size_t dst;
|
||||
int ret = 0;
|
||||
|
||||
@ -406,29 +427,57 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
|
||||
bch2_print_string_as_lines(KERN_INFO, buf.buf);
|
||||
}
|
||||
|
||||
dst = 0;
|
||||
darray_for_each(f->nodes, i) {
|
||||
if (i->overwritten)
|
||||
continue;
|
||||
swap(nodes_heap, f->nodes);
|
||||
|
||||
ret = handle_overwrites(c, i, &darray_top(f->nodes));
|
||||
{
|
||||
/* darray must have same layout as a heap */
|
||||
min_heap_char real_heap;
|
||||
BUILD_BUG_ON(sizeof(nodes_heap.nr) != sizeof(real_heap.nr));
|
||||
BUILD_BUG_ON(sizeof(nodes_heap.size) != sizeof(real_heap.size));
|
||||
BUILD_BUG_ON(offsetof(found_btree_nodes, nr) != offsetof(min_heap_char, nr));
|
||||
BUILD_BUG_ON(offsetof(found_btree_nodes, size) != offsetof(min_heap_char, size));
|
||||
}
|
||||
|
||||
min_heapify_all(&nodes_heap, &found_btree_node_heap_cbs, NULL);
|
||||
|
||||
if (nodes_heap.nr) {
|
||||
ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
BUG_ON(i->overwritten);
|
||||
f->nodes.data[dst++] = *i;
|
||||
min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL);
|
||||
}
|
||||
f->nodes.nr = dst;
|
||||
|
||||
if (c->opts.verbose) {
|
||||
while (true) {
|
||||
ret = handle_overwrites(c, &darray_last(f->nodes), &nodes_heap);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!nodes_heap.nr)
|
||||
break;
|
||||
|
||||
ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL);
|
||||
}
|
||||
|
||||
for (struct found_btree_node *n = f->nodes.data; n < &darray_last(f->nodes); n++)
|
||||
BUG_ON(nodes_overlap(n, n + 1));
|
||||
|
||||
if (0 && c->opts.verbose) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
|
||||
found_btree_nodes_to_text(&buf, c, f->nodes);
|
||||
bch2_print_string_as_lines(KERN_INFO, buf.buf);
|
||||
} else {
|
||||
bch_info(c, "btree node scan found %zu nodes after overwrites", f->nodes.nr);
|
||||
}
|
||||
|
||||
eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
|
||||
err:
|
||||
darray_exit(&nodes_heap);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
@ -499,7 +548,9 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
|
||||
if (c->opts.verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level);
|
||||
prt_str(&buf, "recovery ");
|
||||
bch2_btree_id_level_to_text(&buf, btree, level);
|
||||
prt_str(&buf, " ");
|
||||
bch2_bpos_to_text(&buf, node_min);
|
||||
prt_str(&buf, " - ");
|
||||
bch2_bpos_to_text(&buf, node_max);
|
||||
@ -528,12 +579,19 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
|
||||
|
||||
found_btree_node_to_key(&tmp.k, &n);
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
|
||||
bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
if (c->opts.verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
|
||||
bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0));
|
||||
BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k),
|
||||
(struct bkey_validate_context) {
|
||||
.from = BKEY_VALIDATE_btree_node,
|
||||
.level = level + 1,
|
||||
.btree = btree,
|
||||
}));
|
||||
|
||||
ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k);
|
||||
if (ret)
|
||||
|
@ -6,7 +6,6 @@
|
||||
|
||||
struct found_btree_node {
|
||||
bool range_updated:1;
|
||||
bool overwritten:1;
|
||||
u8 btree_id;
|
||||
u8 level;
|
||||
unsigned sectors_written;
|
||||
|
@ -133,7 +133,7 @@ static inline int bch2_trans_lock_write(struct btree_trans *trans)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void bch2_trans_unlock_write(struct btree_trans *trans)
|
||||
static inline void bch2_trans_unlock_updates_write(struct btree_trans *trans)
|
||||
{
|
||||
if (likely(trans->write_locked)) {
|
||||
trans_for_each_update(trans, i)
|
||||
@ -164,6 +164,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
|
||||
EBUG_ON(bpos_gt(insert->k.p, b->data->max_key));
|
||||
EBUG_ON(insert->k.u64s > bch2_btree_keys_u64s_remaining(b));
|
||||
EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos));
|
||||
kmsan_check_memory(insert, bkey_bytes(&insert->k));
|
||||
|
||||
k = bch2_btree_node_iter_peek_all(node_iter, b);
|
||||
if (k && bkey_cmp_left_packed(b, k, &insert->k.p))
|
||||
@ -249,7 +250,7 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
new |= 1 << BTREE_NODE_need_write;
|
||||
} while (!try_cmpxchg(&b->flags, &old, new));
|
||||
|
||||
btree_node_write_if_need(c, b, SIX_LOCK_read);
|
||||
btree_node_write_if_need(trans, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
bch2_trans_put(trans);
|
||||
@ -336,6 +337,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
|
||||
BUG_ON(i->cached != path->cached);
|
||||
BUG_ON(i->level != path->level);
|
||||
BUG_ON(i->btree_id != path->btree_id);
|
||||
BUG_ON(i->bkey_type != __btree_node_type(path->level, path->btree_id));
|
||||
EBUG_ON(!i->level &&
|
||||
btree_type_has_snapshots(i->btree_id) &&
|
||||
!(i->flags & BTREE_UPDATE_internal_snapshot_node) &&
|
||||
@ -348,7 +350,7 @@ static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
|
||||
unsigned flags)
|
||||
{
|
||||
return bch2_journal_res_get(&trans->c->journal, &trans->journal_res,
|
||||
trans->journal_u64s, flags);
|
||||
trans->journal_u64s, flags, trans);
|
||||
}
|
||||
|
||||
#define JSET_ENTRY_LOG_U64s 4
|
||||
@ -384,7 +386,7 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
|
||||
struct bkey_i *new_k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_unlock_write(trans);
|
||||
bch2_trans_unlock_updates_write(trans);
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
|
||||
@ -479,8 +481,7 @@ static int run_one_mem_trigger(struct btree_trans *trans,
|
||||
old, flags);
|
||||
}
|
||||
|
||||
static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i,
|
||||
bool overwrite)
|
||||
static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i)
|
||||
{
|
||||
verify_update_old_key(trans, i);
|
||||
|
||||
@ -507,10 +508,10 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
|
||||
return bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k),
|
||||
BTREE_TRIGGER_insert|
|
||||
BTREE_TRIGGER_overwrite|flags) ?: 1;
|
||||
} else if (overwrite && !i->overwrite_trigger_run) {
|
||||
} else if (!i->overwrite_trigger_run) {
|
||||
i->overwrite_trigger_run = true;
|
||||
return bch2_key_trigger_old(trans, i->btree_id, i->level, old, flags) ?: 1;
|
||||
} else if (!overwrite && !i->insert_trigger_run) {
|
||||
} else if (!i->insert_trigger_run) {
|
||||
i->insert_trigger_run = true;
|
||||
return bch2_key_trigger_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), flags) ?: 1;
|
||||
} else {
|
||||
@ -518,73 +519,43 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
|
||||
}
|
||||
}
|
||||
|
||||
static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
|
||||
unsigned btree_id_start)
|
||||
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
|
||||
{
|
||||
for (int overwrite = 1; overwrite >= 0; --overwrite) {
|
||||
unsigned sort_id_start = 0;
|
||||
|
||||
while (sort_id_start < trans->nr_updates) {
|
||||
unsigned i, sort_id = trans->updates[sort_id_start].sort_order;
|
||||
bool trans_trigger_run;
|
||||
|
||||
/*
|
||||
* Running triggers will append more updates to the list of updates as
|
||||
* we're walking it:
|
||||
* For a given btree, this algorithm runs insert triggers before
|
||||
* overwrite triggers: this is so that when extents are being
|
||||
* moved (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop
|
||||
* references before they are re-added.
|
||||
*
|
||||
* Running triggers will append more updates to the list of
|
||||
* updates as we're walking it:
|
||||
*/
|
||||
do {
|
||||
trans_trigger_run = false;
|
||||
|
||||
for (unsigned i = btree_id_start;
|
||||
i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
|
||||
for (i = sort_id_start;
|
||||
i < trans->nr_updates && trans->updates[i].sort_order <= sort_id;
|
||||
i++) {
|
||||
if (trans->updates[i].btree_id != btree_id)
|
||||
if (trans->updates[i].sort_order < sort_id) {
|
||||
sort_id_start = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
int ret = run_one_trans_trigger(trans, trans->updates + i, overwrite);
|
||||
int ret = run_one_trans_trigger(trans, trans->updates + i);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
trans_trigger_run = true;
|
||||
}
|
||||
} while (trans_trigger_run);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
|
||||
{
|
||||
unsigned btree_id = 0, btree_id_start = 0;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
*
|
||||
* For a given btree, this algorithm runs insert triggers before
|
||||
* overwrite triggers: this is so that when extents are being moved
|
||||
* (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before
|
||||
* they are re-added.
|
||||
*/
|
||||
for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
|
||||
if (btree_id == BTREE_ID_alloc)
|
||||
continue;
|
||||
|
||||
while (btree_id_start < trans->nr_updates &&
|
||||
trans->updates[btree_id_start].btree_id < btree_id)
|
||||
btree_id_start++;
|
||||
|
||||
ret = run_btree_triggers(trans, btree_id, btree_id_start);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
|
||||
struct btree_insert_entry *i = trans->updates + idx;
|
||||
|
||||
if (i->btree_id > BTREE_ID_alloc)
|
||||
break;
|
||||
if (i->btree_id == BTREE_ID_alloc) {
|
||||
ret = run_btree_triggers(trans, BTREE_ID_alloc, idx);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
}
|
||||
sort_id_start = i;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
@ -609,14 +580,6 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset)
|
||||
{
|
||||
return (struct bversion) {
|
||||
.hi = res->seq >> 32,
|
||||
.lo = (res->seq << 32) | (res->offset + offset),
|
||||
};
|
||||
}
|
||||
|
||||
static inline int
|
||||
bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
struct btree_insert_entry **stopped_at,
|
||||
@ -627,12 +590,11 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
unsigned u64s = 0;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_verify_not_unlocked(trans);
|
||||
bch2_trans_verify_not_in_restart(trans);
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
|
||||
if (race_fault()) {
|
||||
trace_and_count(c, trans_restart_fault_inject, trans, trace_ip);
|
||||
return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -701,25 +663,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
struct jset_entry *entry = trans->journal_entries;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
|
||||
for (entry = trans->journal_entries;
|
||||
entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
|
||||
entry = vstruct_next(entry))
|
||||
if (entry->type == BCH_JSET_ENTRY_write_buffer_keys &&
|
||||
entry->start->k.type == KEY_TYPE_accounting) {
|
||||
BUG_ON(!trans->journal_res.ref);
|
||||
|
||||
struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start);
|
||||
|
||||
a->k.bversion = journal_pos_to_bversion(&trans->journal_res,
|
||||
(u64 *) entry - (u64 *) trans->journal_entries);
|
||||
BUG_ON(bversion_zero(a->k.bversion));
|
||||
|
||||
if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) {
|
||||
ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal);
|
||||
if (ret)
|
||||
goto revert_fs_usage;
|
||||
}
|
||||
ret = bch2_accounting_trans_commit_hook(trans, bkey_i_to_accounting(entry->start), flags);
|
||||
if (ret)
|
||||
goto revert_fs_usage;
|
||||
}
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
@ -739,14 +690,29 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
goto fatal_err;
|
||||
}
|
||||
|
||||
struct bkey_validate_context validate_context = { .from = BKEY_VALIDATE_commit };
|
||||
|
||||
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
|
||||
validate_context.flags = BCH_VALIDATE_write|BCH_VALIDATE_commit;
|
||||
|
||||
for (struct jset_entry *i = trans->journal_entries;
|
||||
i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
|
||||
i = vstruct_next(i)) {
|
||||
ret = bch2_journal_entry_validate(c, NULL, i,
|
||||
bcachefs_metadata_version_current,
|
||||
CPU_BIG_ENDIAN, validate_context);
|
||||
if (unlikely(ret)) {
|
||||
bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n",
|
||||
trans->fn);
|
||||
goto fatal_err;
|
||||
}
|
||||
}
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
enum bch_validate_flags invalid_flags = 0;
|
||||
validate_context.level = i->level;
|
||||
validate_context.btree = i->btree_id;
|
||||
|
||||
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
|
||||
invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
|
||||
|
||||
ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k),
|
||||
i->bkey_type, invalid_flags);
|
||||
ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), validate_context);
|
||||
if (unlikely(ret)){
|
||||
bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n",
|
||||
trans->fn, (void *) i->ip_allocated);
|
||||
@ -755,24 +721,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
btree_insert_entry_checks(trans, i);
|
||||
}
|
||||
|
||||
for (struct jset_entry *i = trans->journal_entries;
|
||||
i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
|
||||
i = vstruct_next(i)) {
|
||||
enum bch_validate_flags invalid_flags = 0;
|
||||
|
||||
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
|
||||
invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
|
||||
|
||||
ret = bch2_journal_entry_validate(c, NULL, i,
|
||||
bcachefs_metadata_version_current,
|
||||
CPU_BIG_ENDIAN, invalid_flags);
|
||||
if (unlikely(ret)) {
|
||||
bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n",
|
||||
trans->fn);
|
||||
goto fatal_err;
|
||||
}
|
||||
}
|
||||
|
||||
if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) {
|
||||
struct journal *j = &c->journal;
|
||||
struct jset_entry *entry;
|
||||
@ -833,13 +781,9 @@ revert_fs_usage:
|
||||
entry2 != entry;
|
||||
entry2 = vstruct_next(entry2))
|
||||
if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys &&
|
||||
entry2->start->k.type == KEY_TYPE_accounting) {
|
||||
struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start);
|
||||
|
||||
bch2_accounting_neg(a);
|
||||
bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal);
|
||||
bch2_accounting_neg(a);
|
||||
}
|
||||
entry2->start->k.type == KEY_TYPE_accounting)
|
||||
bch2_accounting_trans_commit_revert(trans,
|
||||
bkey_i_to_accounting(entry2->start), flags);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
return ret;
|
||||
}
|
||||
@ -902,7 +846,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
|
||||
if (!ret && unlikely(trans->journal_replay_not_finished))
|
||||
bch2_drop_overwrites_from_journal(trans);
|
||||
|
||||
bch2_trans_unlock_write(trans);
|
||||
bch2_trans_unlock_updates_write(trans);
|
||||
|
||||
if (!ret && trans->journal_pin)
|
||||
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
|
||||
@ -937,6 +881,24 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
struct bch_fs *c = trans->c;
|
||||
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_journal_res_blocked)) {
|
||||
/*
|
||||
* XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
|
||||
* flag
|
||||
*/
|
||||
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
|
||||
watermark < BCH_WATERMARK_reclaim) {
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_trans_journal_res_get(trans,
|
||||
(flags & BCH_WATERMARK_MASK)|
|
||||
JOURNAL_RES_GET_CHECK));
|
||||
goto out;
|
||||
}
|
||||
|
||||
switch (ret) {
|
||||
case -BCH_ERR_btree_insert_btree_node_full:
|
||||
ret = bch2_btree_split_leaf(trans, i->path, flags);
|
||||
@ -948,22 +910,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_accounting_update_sb(trans));
|
||||
break;
|
||||
case -BCH_ERR_journal_res_get_blocked:
|
||||
/*
|
||||
* XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
|
||||
* flag
|
||||
*/
|
||||
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
|
||||
watermark < BCH_WATERMARK_reclaim) {
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_trans_journal_res_get(trans,
|
||||
(flags & BCH_WATERMARK_MASK)|
|
||||
JOURNAL_RES_GET_CHECK));
|
||||
break;
|
||||
case -BCH_ERR_btree_insert_need_journal_reclaim:
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
@ -984,7 +930,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
BUG_ON(ret >= 0);
|
||||
break;
|
||||
}
|
||||
|
||||
out:
|
||||
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
|
||||
|
||||
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) &&
|
||||
@ -994,24 +940,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int
|
||||
bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret;
|
||||
|
||||
if (likely(!(flags & BCH_TRANS_COMMIT_lazy_rw)) ||
|
||||
test_bit(BCH_FS_started, &c->flags))
|
||||
return -BCH_ERR_erofs_trans_commit;
|
||||
|
||||
ret = drop_locks_do(trans, bch2_fs_read_write_early(c));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_write_ref_get(c, BCH_WRITE_REF_trans);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is for updates done in the early part of fsck - btree_gc - before we've
|
||||
* gone RW. we only add the new key to the list of keys for journal replay to
|
||||
@ -1022,6 +950,8 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
BUG_ON(current != c->recovery_task);
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
int ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k);
|
||||
if (ret)
|
||||
@ -1047,8 +977,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_verify_not_unlocked(trans);
|
||||
bch2_trans_verify_not_in_restart(trans);
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret))
|
||||
goto out_reset;
|
||||
|
||||
if (!trans->nr_updates &&
|
||||
!trans->journal_entries_u64s)
|
||||
@ -1058,16 +991,13 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
if (ret)
|
||||
goto out_reset;
|
||||
|
||||
if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) {
|
||||
ret = do_bch2_trans_commit_to_journal_replay(trans);
|
||||
goto out_reset;
|
||||
}
|
||||
|
||||
if (!(flags & BCH_TRANS_COMMIT_no_check_rw) &&
|
||||
unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) {
|
||||
ret = bch2_trans_commit_get_rw_cold(trans, flags);
|
||||
if (ret)
|
||||
goto out_reset;
|
||||
if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags)))
|
||||
ret = do_bch2_trans_commit_to_journal_replay(trans);
|
||||
else
|
||||
ret = -BCH_ERR_erofs_trans_commit;
|
||||
goto out_reset;
|
||||
}
|
||||
|
||||
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
|
||||
@ -1112,8 +1042,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
}
|
||||
retry:
|
||||
errored_at = NULL;
|
||||
bch2_trans_verify_not_unlocked(trans);
|
||||
bch2_trans_verify_not_in_restart(trans);
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res)))
|
||||
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
|
||||
memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta));
|
||||
|
@ -423,6 +423,7 @@ static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
|
||||
|
||||
struct btree_insert_entry {
|
||||
unsigned flags;
|
||||
u8 sort_order;
|
||||
u8 bkey_type;
|
||||
enum btree_id btree_id:8;
|
||||
u8 level:4;
|
||||
@ -509,10 +510,16 @@ struct btree_trans {
|
||||
bool notrace_relock_fail:1;
|
||||
enum bch_errcode restarted:16;
|
||||
u32 restart_count;
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
u32 restart_count_this_trans;
|
||||
#endif
|
||||
|
||||
u64 last_begin_time;
|
||||
unsigned long last_begin_ip;
|
||||
unsigned long last_restarted_ip;
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
bch_stacktrace last_restarted_trace;
|
||||
#endif
|
||||
unsigned long last_unlock_ip;
|
||||
unsigned long srcu_lock_time;
|
||||
|
||||
@ -787,53 +794,76 @@ static inline bool btree_node_type_has_triggers(enum btree_node_type type)
|
||||
return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRIGGERS;
|
||||
}
|
||||
|
||||
static inline bool btree_node_type_is_extents(enum btree_node_type type)
|
||||
{
|
||||
const u64 mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return BIT_ULL(type) & mask;
|
||||
}
|
||||
|
||||
static inline bool btree_id_is_extents(enum btree_id btree)
|
||||
{
|
||||
return btree_node_type_is_extents(__btree_node_type(0, btree));
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_snapshots(enum btree_id id)
|
||||
{
|
||||
const u64 mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_extents)) << nr)
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return BIT_ULL(id) & mask;
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_snapshot_field(enum btree_id id)
|
||||
static inline bool btree_node_type_is_extents(enum btree_node_type type)
|
||||
{
|
||||
return type != BKEY_TYPE_btree && btree_id_is_extents(type - 1);
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_snapshots(enum btree_id btree)
|
||||
{
|
||||
const u64 mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_snapshots)) << nr)
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return BIT_ULL(id) & mask;
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_ptrs(enum btree_id id)
|
||||
static inline bool btree_type_has_snapshot_field(enum btree_id btree)
|
||||
{
|
||||
const u64 mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr)
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_IS_snapshot_field|BTREE_IS_snapshots))) << nr)
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return BIT_ULL(id) & mask;
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_ptrs(enum btree_id btree)
|
||||
{
|
||||
const u64 mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_data)) << nr)
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
static inline bool btree_type_uses_write_buffer(enum btree_id btree)
|
||||
{
|
||||
const u64 mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_write_buffer)) << nr)
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
static inline u8 btree_trigger_order(enum btree_id btree)
|
||||
{
|
||||
switch (btree) {
|
||||
case BTREE_ID_alloc:
|
||||
return U8_MAX;
|
||||
case BTREE_ID_stripes:
|
||||
return U8_MAX - 1;
|
||||
default:
|
||||
return btree;
|
||||
}
|
||||
}
|
||||
|
||||
struct btree_root {
|
||||
|
@ -17,7 +17,7 @@
|
||||
static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
|
||||
const struct btree_insert_entry *r)
|
||||
{
|
||||
return cmp_int(l->btree_id, r->btree_id) ?:
|
||||
return cmp_int(l->sort_order, r->sort_order) ?:
|
||||
cmp_int(l->cached, r->cached) ?:
|
||||
-cmp_int(l->level, r->level) ?:
|
||||
bpos_cmp(l->k->k.p, r->k->k.p);
|
||||
@ -144,7 +144,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
|
||||
!(ret = bkey_err(old_k)) &&
|
||||
bkey_eq(old_pos, old_k.k->p)) {
|
||||
struct bpos whiteout_pos =
|
||||
SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);;
|
||||
SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);
|
||||
|
||||
if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) ||
|
||||
snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot))
|
||||
@ -296,7 +296,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
BTREE_ITER_intent|
|
||||
BTREE_ITER_with_updates|
|
||||
BTREE_ITER_not_extents);
|
||||
k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
|
||||
k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX));
|
||||
if ((ret = bkey_err(k)))
|
||||
goto err;
|
||||
if (!k.k)
|
||||
@ -323,7 +323,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
goto out;
|
||||
next:
|
||||
bch2_btree_iter_advance(&iter);
|
||||
k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
|
||||
k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX));
|
||||
if ((ret = bkey_err(k)))
|
||||
goto err;
|
||||
if (!k.k)
|
||||
@ -397,6 +397,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx,
|
||||
|
||||
n = (struct btree_insert_entry) {
|
||||
.flags = flags,
|
||||
.sort_order = btree_trigger_order(path->btree_id),
|
||||
.bkey_type = __btree_node_type(path->level, path->btree_id),
|
||||
.btree_id = path->btree_id,
|
||||
.level = path->level,
|
||||
@ -511,6 +512,8 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
|
||||
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
|
||||
{
|
||||
kmsan_check_memory(k, bkey_bytes(&k->k));
|
||||
|
||||
btree_path_idx_t path_idx = iter->update_path ?: iter->path;
|
||||
int ret;
|
||||
|
||||
@ -588,12 +591,9 @@ struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsi
|
||||
int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
|
||||
enum btree_id btree, struct bpos end)
|
||||
{
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_intent);
|
||||
k = bch2_btree_iter_prev(iter);
|
||||
ret = bkey_err(k);
|
||||
bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent);
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_prev(iter);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -668,29 +668,21 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k,
|
||||
struct disk_reservation *disk_res, int flags,
|
||||
enum btree_iter_update_trigger_flags iter_flags)
|
||||
{
|
||||
return bch2_trans_do(c, disk_res, NULL, flags,
|
||||
return bch2_trans_commit_do(c, disk_res, NULL, flags,
|
||||
bch2_btree_insert_trans(trans, id, k, iter_flags));
|
||||
}
|
||||
|
||||
int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter,
|
||||
unsigned len, unsigned update_flags)
|
||||
{
|
||||
struct bkey_i *k;
|
||||
|
||||
k = bch2_trans_kmalloc(trans, sizeof(*k));
|
||||
if (IS_ERR(k))
|
||||
return PTR_ERR(k);
|
||||
|
||||
bkey_init(&k->k);
|
||||
k->k.p = iter->pos;
|
||||
bch2_key_resize(&k->k, len);
|
||||
return bch2_trans_update(trans, iter, k, update_flags);
|
||||
}
|
||||
|
||||
int bch2_btree_delete_at(struct btree_trans *trans,
|
||||
struct btree_iter *iter, unsigned update_flags)
|
||||
{
|
||||
return bch2_btree_delete_extent_at(trans, iter, 0, update_flags);
|
||||
struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k));
|
||||
int ret = PTR_ERR_OR_ZERO(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bkey_init(&k->k);
|
||||
k->k.p = iter->pos;
|
||||
return bch2_trans_update(trans, iter, k, update_flags);
|
||||
}
|
||||
|
||||
int bch2_btree_delete(struct btree_trans *trans,
|
||||
@ -721,7 +713,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent);
|
||||
while ((k = bch2_btree_iter_peek_upto(&iter, end)).k) {
|
||||
while ((k = bch2_btree_iter_peek_max(&iter, end)).k) {
|
||||
struct disk_reservation disk_res =
|
||||
bch2_disk_reservation_init(trans->c, 0);
|
||||
struct bkey_i delete;
|
||||
@ -794,8 +786,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
|
||||
struct bpos pos, bool set)
|
||||
int bch2_btree_bit_mod_iter(struct btree_trans *trans, struct btree_iter *iter, bool set)
|
||||
{
|
||||
struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k));
|
||||
int ret = PTR_ERR_OR_ZERO(k);
|
||||
@ -804,13 +795,21 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
|
||||
|
||||
bkey_init(&k->k);
|
||||
k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
|
||||
k->k.p = pos;
|
||||
k->k.p = iter->pos;
|
||||
if (iter->flags & BTREE_ITER_is_extents)
|
||||
bch2_key_resize(&k->k, 1);
|
||||
|
||||
return bch2_trans_update(trans, iter, k, 0);
|
||||
}
|
||||
|
||||
int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
|
||||
struct bpos pos, bool set)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent);
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_trans_update(trans, &iter, k, 0);
|
||||
int ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_btree_bit_mod_iter(trans, &iter, set);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
@ -827,10 +826,17 @@ int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree,
|
||||
return bch2_trans_update_buffered(trans, btree, &k);
|
||||
}
|
||||
|
||||
static int __bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf, unsigned u64s)
|
||||
int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf)
|
||||
{
|
||||
unsigned u64s = DIV_ROUND_UP(buf->pos, sizeof(u64));
|
||||
prt_chars(buf, '\0', u64s * sizeof(u64) - buf->pos);
|
||||
|
||||
int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s));
|
||||
int ret = PTR_ERR_OR_ZERO(e);
|
||||
ret = PTR_ERR_OR_ZERO(e);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -840,6 +846,19 @@ static int __bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_trans_log_bkey(struct btree_trans *trans, enum btree_id btree,
|
||||
unsigned level, struct bkey_i *k)
|
||||
{
|
||||
struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s));
|
||||
int ret = PTR_ERR_OR_ZERO(e);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
journal_entry_init(e, BCH_JSET_ENTRY_log_bkey, btree, level, k->k.u64s);
|
||||
bkey_copy(e->start, k);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__printf(3, 0)
|
||||
static int
|
||||
__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
|
||||
@ -865,9 +884,8 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
|
||||
memcpy(l->d, buf.buf, buf.pos);
|
||||
c->journal.early_journal_entries.nr += jset_u64s(u64s);
|
||||
} else {
|
||||
ret = bch2_trans_do(c, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_lazy_rw|commit_flags,
|
||||
__bch2_trans_log_msg(trans, &buf, u64s));
|
||||
ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags,
|
||||
bch2_trans_log_msg(trans, &buf));
|
||||
}
|
||||
err:
|
||||
printbuf_exit(&buf);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user