mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-04-21 00:00:04 +03:00
Compare commits
77 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
3b819fd0d7 | ||
|
d681613129 | ||
|
5551e45735 | ||
|
dd2c7b9b89 | ||
|
05c6cb61f9 | ||
|
0f59cc6a9e | ||
|
28039ec698 | ||
|
642bc34abc | ||
|
0335994507 | ||
|
ae29e3069f | ||
|
bb0107dce7 | ||
|
96e82871a4 | ||
|
c9ee646718 | ||
|
844721635c | ||
|
ad3875d1d9 | ||
|
e99da4dddb | ||
|
7b35cb1e4a | ||
|
4f5dba7a87 | ||
|
007d6654bc | ||
|
bd3e0742af | ||
|
de5af03b9d | ||
|
3af7cf7550 | ||
|
32d4213924 | ||
|
24f9bd29a6 | ||
|
bf44ffca50 | ||
|
4812b3e3d9 | ||
|
4a5c50774e | ||
|
e47a5ce2bc | ||
|
3aa2a4c47c | ||
|
33483738c6 | ||
|
ea5bdadabd | ||
|
66c943be6c | ||
|
3d968816a4 | ||
|
76b3ec9d66 | ||
|
3e2d5b2b9a | ||
|
7c47145f6c | ||
|
396545c2ea | ||
|
3bfe433d22 | ||
|
19d3a1778c | ||
|
e54f288992 | ||
|
7c66e9fac5 | ||
|
2160e9a7bd | ||
|
941dfd7a29 | ||
|
18b4914587 | ||
|
8af5c93e48 | ||
|
6657ce2de3 | ||
|
5504533986 | ||
|
a55c655158 | ||
|
b8435c5693 | ||
|
80d0a21ed3 | ||
|
a1080f2f6a | ||
|
3bf9a19571 | ||
|
00dd889d33 | ||
|
1a8e684ae0 | ||
|
3d972489a8 | ||
|
197437be12 | ||
|
0a23a5ab1a | ||
|
017c3cc66e | ||
|
8b7810403b | ||
|
ce0c2241a4 | ||
|
846f3398a2 | ||
|
e6aecdd3b8 | ||
|
62ea232b09 | ||
|
64ce740ac6 | ||
|
c0836924b1 | ||
|
f42ee45c6e | ||
|
6cbadc946d | ||
|
86cbeaf1c2 | ||
|
dd1a882d17 | ||
|
3e15e96cb9 | ||
|
d7f02b4ed5 | ||
|
1606364799 | ||
|
553e605886 | ||
|
55538d928b | ||
|
7d30f895c6 | ||
|
8cc5bdede9 | ||
|
67c9b378c7 |
.bcachefs_revision
.github/workflows
Cargo.lockCargo.tomlINSTALL.mdMakefileMakefile.compilerbcachefs.8bch_bindgen
c_src
bcachefs.ccmd_attr.ccmd_data.ccmd_device.ccmd_dump.ccmd_format.ccmd_fs.ccmd_fsck.ccmd_fusemount.ccmd_list_journal.ccmd_migrate.ccmd_option.ccmds.hcrypto.clibbcachefs.clibbcachefs.hposix_to_bcachefs.cposix_to_bcachefs.htools-util.ctools-util.h
debian
flake.lockflake.nixinclude
crypto
linux
libbcachefs
acl.calloc_background.calloc_background.halloc_foreground.calloc_foreground.halloc_types.hbackpointers.cbackpointers.hbcachefs.hbcachefs_format.hbcachefs_ioctl.hbkey.hbkey_methods.cbtree_cache.cbtree_gc.cbtree_gc.hbtree_io.cbtree_io.hbtree_iter.cbtree_iter.hbtree_journal_iter.cbtree_key_cache.cbtree_locking.cbtree_locking.hbtree_node_scan.cbtree_trans_commit.cbtree_types.hbtree_update.cbtree_update.hbtree_update_interior.cbtree_update_interior.hbtree_write_buffer.cbtree_write_buffer.hbuckets.cbuckets.hbuckets_types.hchardev.c
@ -1 +1 @@
|
||||
63bbe0ca416791095c994aba7bea388e947dd60a
|
||||
7307b739bbe5f1d9415f5c1da070723b3fb5abbd
|
||||
|
39
.github/workflows/nix-flake.yml
vendored
39
.github/workflows/nix-flake.yml
vendored
@ -1,22 +1,31 @@
|
||||
name: "Nix-Tests"
|
||||
name: Nix Flake actions
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
|
||||
jobs:
|
||||
nix-flake-check:
|
||||
nix-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: cachix/install-nix-action@v27
|
||||
with:
|
||||
extra_nix_config: |
|
||||
experimental-features = nix-command flakes
|
||||
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
|
||||
- uses: cachix/cachix-action@v15
|
||||
with:
|
||||
name: bcachefs-tools
|
||||
# If you chose API tokens for write access OR if you have a private cache
|
||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
||||
- run: nix flake show
|
||||
- run: nix flake check --print-build-logs
|
||||
- run: nix build --print-build-logs
|
||||
- uses: cachix/install-nix-action@v30
|
||||
- id: set-matrix
|
||||
name: Generate Nix Matrix
|
||||
run: |
|
||||
set -Eeu
|
||||
matrix="$(nix eval --json '.#githubActions.matrix')"
|
||||
echo "matrix=$matrix" >> "$GITHUB_OUTPUT"
|
||||
|
||||
nix-build:
|
||||
name: ${{ matrix.name }} (${{ matrix.system }})
|
||||
needs: nix-matrix
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix: ${{fromJSON(needs.nix-matrix.outputs.matrix)}}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: cachix/install-nix-action@v30
|
||||
- run: nix build -L '.#${{ matrix.attr }}'
|
||||
|
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -68,7 +68,7 @@ checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
|
||||
|
||||
[[package]]
|
||||
name = "bcachefs-tools"
|
||||
version = "1.12.0"
|
||||
version = "1.25.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bch_bindgen",
|
||||
|
@ -1,9 +1,13 @@
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
default-members = [".", "bch_bindgen"]
|
||||
|
||||
[package]
|
||||
name = "bcachefs-tools"
|
||||
version = "1.20.0"
|
||||
version = "1.25.1"
|
||||
authors = ["Yuxuan Shui <yshuiv7@gmail.com>", "Kayla Firestack <dev@kaylafire.me>", "Kent Overstreet <kent.overstreet@linux.dev>" ]
|
||||
edition = "2021"
|
||||
rust-version = "1.77"
|
||||
rust-version = "1.77.0"
|
||||
|
||||
[[bin]]
|
||||
name = "bcachefs"
|
||||
|
18
INSTALL.md
18
INSTALL.md
@ -39,20 +39,19 @@ Starting from Debian Trixie and Ubuntu 23.10, you will additionally need:
|
||||
apt install -y systemd-dev
|
||||
```
|
||||
|
||||
Fedora: install the "Development tools" group along with:
|
||||
Fedora: install build dependencies either with `dnf builddep bcachefs-tools` or with:
|
||||
```shell
|
||||
dnf install -y libaio-devel libsodium-devel \
|
||||
dnf install -y @c-development libaio-devel libsodium-devel \
|
||||
libblkid-devel libzstd-devel zlib-devel userspace-rcu-devel \
|
||||
lz4-devel libuuid-devel valgrind-devel keyutils-libs-devel \
|
||||
findutils udev systemd-devel llvm-devel
|
||||
findutils systemd-devel clang-devel llvm-devel rust cargo
|
||||
```
|
||||
|
||||
openSUSE: install build dependencies with:
|
||||
```shell
|
||||
zypper install -y libaio-devel libsodium-devel \
|
||||
libblkid-devel liburcu-devel libzstd-devel zlib-devel \
|
||||
liblz4-devel libuuid-devel valgrind-devel keyutils-devel \
|
||||
findutils udev systemd-devel llvm-devel
|
||||
zypper in -y libaio-devel libsodium-devel libblkid-devel liburcu-devel \
|
||||
libzstd-devel zlib-devel liblz4-devel libuuid-devel valgrind-devel \
|
||||
keyutils-devel findutils udev systemd-devel llvm-devel
|
||||
```
|
||||
|
||||
Arch: install bcachefs-tools-git from the AUR.
|
||||
@ -88,6 +87,11 @@ Arch:
|
||||
pacman -S fuse3
|
||||
```
|
||||
|
||||
openSUSE:
|
||||
```shell
|
||||
zypper in -y fuse3-devel
|
||||
```
|
||||
|
||||
Then, make using the `BCACHEFS_FUSE` environment variable (make clean first if
|
||||
previously built without fuse support):
|
||||
|
||||
|
13
Makefile
13
Makefile
@ -1,4 +1,4 @@
|
||||
VERSION=1.20.0
|
||||
VERSION=1.25.1
|
||||
|
||||
PREFIX?=/usr/local
|
||||
LIBEXECDIR?=$(PREFIX)/libexec
|
||||
@ -22,6 +22,13 @@ else
|
||||
CARGO_CLEAN_ARGS = --quiet
|
||||
endif
|
||||
|
||||
# when cross compiling, cargo places the built binary in a different location
|
||||
ifdef CARGO_BUILD_TARGET
|
||||
BUILT_BIN = target/$(CARGO_BUILD_TARGET)/release/bcachefs
|
||||
else
|
||||
BUILT_BIN = target/release/bcachefs
|
||||
endif
|
||||
|
||||
# Prevent recursive expansions of $(CFLAGS) to avoid repeatedly performing
|
||||
# compile tests
|
||||
CFLAGS:=$(CFLAGS)
|
||||
@ -73,7 +80,7 @@ CFLAGS+=$(call cc-disable-warning, zero-length-array)
|
||||
CFLAGS+=$(call cc-disable-warning, shift-overflow)
|
||||
CFLAGS+=$(call cc-disable-warning, enum-conversion)
|
||||
CFLAGS+=$(call cc-disable-warning, gnu-variable-sized-type-not-at-end)
|
||||
export RUSTFLAGS=-C default-linker-libraries
|
||||
export RUSTFLAGS:=$(RUSTFLAGS) -C default-linker-libraries
|
||||
|
||||
PKGCONFIG_LIBS="blkid uuid liburcu libsodium zlib liblz4 libzstd libudev libkeyutils"
|
||||
ifdef BCACHEFS_FUSE
|
||||
@ -195,7 +202,7 @@ cmd_version.o : .version
|
||||
install: INITRAMFS_HOOK=$(INITRAMFS_DIR)/hooks/bcachefs
|
||||
install: INITRAMFS_SCRIPT=$(INITRAMFS_DIR)/scripts/local-premount/bcachefs
|
||||
install: bcachefs $(optional_install)
|
||||
$(INSTALL) -m0755 -D target/release/bcachefs -t $(DESTDIR)$(ROOT_SBINDIR)
|
||||
$(INSTALL) -m0755 -D $(BUILT_BIN) -t $(DESTDIR)$(ROOT_SBINDIR)
|
||||
$(INSTALL) -m0644 -D bcachefs.8 -t $(DESTDIR)$(PREFIX)/share/man/man8/
|
||||
$(INSTALL) -m0755 -D initramfs/script $(DESTDIR)$(INITRAMFS_SCRIPT)
|
||||
$(INSTALL) -m0755 -D initramfs/hook $(DESTDIR)$(INITRAMFS_HOOK)
|
||||
|
@ -67,6 +67,10 @@ gcc-min-version = $(call test-ge, $(CONFIG_GCC_VERSION), $1)
|
||||
# Usage: cflags-$(call clang-min-version, 110000) += -foo
|
||||
clang-min-version = $(call test-ge, $(CONFIG_CLANG_VERSION), $1)
|
||||
|
||||
# rustc-min-version
|
||||
# Usage: rustc-$(call rustc-min-version, 108500) += -Cfoo
|
||||
rustc-min-version = $(call test-ge, $(CONFIG_RUSTC_VERSION), $1)
|
||||
|
||||
# ld-option
|
||||
# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
|
||||
ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
|
||||
|
@ -37,6 +37,8 @@ Check an existing filesystem for errors.
|
||||
.Bl -tag -width 18n -compact
|
||||
.It Ic fs usage
|
||||
Show disk usage
|
||||
.It Ic fs top
|
||||
Show runtime performance information
|
||||
.El
|
||||
.Ss Commands for managing devices within a running filesystem
|
||||
.Bl -tag -width 22n -compact
|
||||
|
@ -61,6 +61,7 @@ fn main() {
|
||||
.allowlist_function("keyctl_search")
|
||||
.allowlist_function("match_string")
|
||||
.allowlist_function("printbuf.*")
|
||||
.blocklist_function("bch2_prt_vprintf")
|
||||
.blocklist_type("rhash_lock_head")
|
||||
.blocklist_type("srcu_struct")
|
||||
.allowlist_var("BCH_.*")
|
||||
|
@ -1,3 +1,8 @@
|
||||
#![allow(clippy::missing_safety_doc)]
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
#![allow(clippy::transmute_int_to_bool)]
|
||||
#![allow(clippy::unnecessary_cast)]
|
||||
#![allow(clippy::useless_transmute)]
|
||||
#![allow(non_upper_case_globals)]
|
||||
#![allow(non_camel_case_types)]
|
||||
#![allow(non_snake_case)]
|
||||
|
@ -65,6 +65,7 @@ impl<'a, 'b> BkeySC<'a> {
|
||||
BkeySCToText { k: self, fs }
|
||||
}
|
||||
|
||||
#[allow(clippy::missing_transmute_annotations)]
|
||||
pub fn v(&'a self) -> BkeyValC<'a> {
|
||||
unsafe {
|
||||
let ty: c::bch_bkey_type = transmute(self.k.type_ as u32);
|
||||
@ -129,7 +130,7 @@ pub struct BkeySCToText<'a, 'b> {
|
||||
fs: &'b Fs,
|
||||
}
|
||||
|
||||
impl<'a, 'b> fmt::Display for BkeySCToText<'a, 'b> {
|
||||
impl fmt::Display for BkeySCToText<'_, '_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
unsafe {
|
||||
printbuf_to_formatter(f, |buf| {
|
||||
|
@ -25,7 +25,7 @@ impl<'f> BtreeTrans<'f> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'f> Drop for BtreeTrans<'f> {
|
||||
impl Drop for BtreeTrans<'_> {
|
||||
fn drop(&mut self) {
|
||||
unsafe { c::bch2_trans_put(&mut *self.raw) }
|
||||
}
|
||||
@ -53,7 +53,7 @@ bitflags! {
|
||||
|
||||
pub struct BtreeIter<'t> {
|
||||
raw: c::btree_iter,
|
||||
trans: PhantomData<&'t BtreeTrans<'t>>,
|
||||
trans: &'t BtreeTrans<'t>,
|
||||
}
|
||||
|
||||
impl<'t> BtreeIter<'t> {
|
||||
@ -74,16 +74,13 @@ impl<'t> BtreeIter<'t> {
|
||||
flags.bits as u32,
|
||||
);
|
||||
|
||||
BtreeIter {
|
||||
raw: iter.assume_init(),
|
||||
trans: PhantomData,
|
||||
}
|
||||
BtreeIter { raw: iter.assume_init(), trans }
|
||||
}
|
||||
}
|
||||
|
||||
pub fn peek_max<'i>(&'i mut self, end: c::bpos) -> Result<Option<BkeySC<'i>>, bch_errcode> {
|
||||
pub fn peek_max(&mut self, end: c::bpos) -> Result<Option<BkeySC<'_>>, bch_errcode> {
|
||||
unsafe {
|
||||
let k = c::bch2_btree_iter_peek_max(&mut self.raw, end);
|
||||
let k = c::bch2_btree_iter_peek_max(self.trans.raw, &mut self.raw, end);
|
||||
errptr_to_result_c(k.k).map(|_| {
|
||||
if !k.k.is_null() {
|
||||
Some(BkeySC {
|
||||
@ -104,7 +101,7 @@ impl<'t> BtreeIter<'t> {
|
||||
|
||||
pub fn peek_and_restart(&mut self) -> Result<Option<BkeySC>, bch_errcode> {
|
||||
unsafe {
|
||||
let k = c::bch2_btree_iter_peek_and_restart_outlined(&mut self.raw);
|
||||
let k = c::bch2_btree_iter_peek_and_restart_outlined(self.trans.raw, &mut self.raw);
|
||||
|
||||
errptr_to_result_c(k.k).map(|_| {
|
||||
if !k.k.is_null() {
|
||||
@ -122,20 +119,20 @@ impl<'t> BtreeIter<'t> {
|
||||
|
||||
pub fn advance(&mut self) {
|
||||
unsafe {
|
||||
c::bch2_btree_iter_advance(&mut self.raw);
|
||||
c::bch2_btree_iter_advance(self.trans.raw, &mut self.raw);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Drop for BtreeIter<'t> {
|
||||
impl Drop for BtreeIter<'_> {
|
||||
fn drop(&mut self) {
|
||||
unsafe { c::bch2_trans_iter_exit(self.raw.trans, &mut self.raw) }
|
||||
unsafe { c::bch2_trans_iter_exit(self.trans.raw, &mut self.raw) }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BtreeNodeIter<'t> {
|
||||
raw: c::btree_iter,
|
||||
trans: PhantomData<&'t BtreeTrans<'t>>,
|
||||
trans: &'t BtreeTrans<'t>,
|
||||
}
|
||||
|
||||
impl<'t> BtreeNodeIter<'t> {
|
||||
@ -159,54 +156,52 @@ impl<'t> BtreeNodeIter<'t> {
|
||||
flags.bits as u32,
|
||||
);
|
||||
|
||||
BtreeNodeIter {
|
||||
raw: iter.assume_init(),
|
||||
trans: PhantomData,
|
||||
}
|
||||
BtreeNodeIter { raw: iter.assume_init(), trans }
|
||||
}
|
||||
}
|
||||
|
||||
pub fn peek<'i>(&'i mut self) -> Result<Option<&'i c::btree>, bch_errcode> {
|
||||
pub fn peek(&mut self) -> Result<Option<&c::btree>, bch_errcode> {
|
||||
unsafe {
|
||||
let b = c::bch2_btree_iter_peek_node(&mut self.raw);
|
||||
let b = c::bch2_btree_iter_peek_node(self.trans.raw, &mut self.raw);
|
||||
errptr_to_result_c(b).map(|b| if !b.is_null() { Some(&*b) } else { None })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn peek_and_restart<'i>(&'i mut self) -> Result<Option<&'i c::btree>, bch_errcode> {
|
||||
pub fn peek_and_restart(&mut self) -> Result<Option<&c::btree>, bch_errcode> {
|
||||
unsafe {
|
||||
let b = c::bch2_btree_iter_peek_node_and_restart(&mut self.raw);
|
||||
let b = c::bch2_btree_iter_peek_node_and_restart(self.trans.raw, &mut self.raw);
|
||||
errptr_to_result_c(b).map(|b| if !b.is_null() { Some(&*b) } else { None })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn advance<'i>(&'i mut self) {
|
||||
pub fn advance(&mut self) {
|
||||
unsafe {
|
||||
c::bch2_btree_iter_next_node(&mut self.raw);
|
||||
c::bch2_btree_iter_next_node(self.trans.raw, &mut self.raw);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next<'i>(&'i mut self) -> Result<Option<&'i c::btree>, bch_errcode> {
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn next(&mut self) -> Result<Option<&c::btree>, bch_errcode> {
|
||||
unsafe {
|
||||
let b = c::bch2_btree_iter_next_node(&mut self.raw);
|
||||
let b = c::bch2_btree_iter_next_node(self.trans.raw, &mut self.raw);
|
||||
errptr_to_result_c(b).map(|b| if !b.is_null() { Some(&*b) } else { None })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Drop for BtreeNodeIter<'t> {
|
||||
impl Drop for BtreeNodeIter<'_> {
|
||||
fn drop(&mut self) {
|
||||
unsafe { c::bch2_trans_iter_exit(self.raw.trans, &mut self.raw) }
|
||||
unsafe { c::bch2_trans_iter_exit(self.trans.raw, &mut self.raw) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'b, 'f> c::btree {
|
||||
pub fn to_text(&'b self, fs: &'f Fs) -> BtreeNodeToText<'b, 'f> {
|
||||
BtreeNodeToText { b: &self, fs }
|
||||
BtreeNodeToText { b: self, fs }
|
||||
}
|
||||
|
||||
pub fn ondisk_to_text(&'b self, fs: &'f Fs) -> BtreeNodeOndiskToText<'b, 'f> {
|
||||
BtreeNodeOndiskToText { b: &self, fs }
|
||||
BtreeNodeOndiskToText { b: self, fs }
|
||||
}
|
||||
}
|
||||
|
||||
@ -215,7 +210,7 @@ pub struct BtreeNodeToText<'b, 'f> {
|
||||
fs: &'f Fs,
|
||||
}
|
||||
|
||||
impl<'b, 'f> fmt::Display for BtreeNodeToText<'b, 'f> {
|
||||
impl fmt::Display for BtreeNodeToText<'_, '_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
printbuf_to_formatter(f, |buf| unsafe {
|
||||
c::bch2_btree_node_to_text(buf, self.fs.raw, self.b)
|
||||
@ -228,7 +223,7 @@ pub struct BtreeNodeOndiskToText<'b, 'f> {
|
||||
fs: &'f Fs,
|
||||
}
|
||||
|
||||
impl<'b, 'f> fmt::Display for BtreeNodeOndiskToText<'b, 'f> {
|
||||
impl fmt::Display for BtreeNodeOndiskToText<'_, '_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
printbuf_to_formatter(f, |buf| unsafe {
|
||||
c::bch2_btree_node_ondisk_to_text(buf, self.fs.raw, self.b)
|
||||
|
@ -9,7 +9,7 @@ pub struct Fs {
|
||||
}
|
||||
|
||||
impl Fs {
|
||||
pub fn open(devs: &Vec<PathBuf>, opts: c::bch_opts) -> Result<Fs, bch_errcode> {
|
||||
pub fn open(devs: &[PathBuf], opts: c::bch_opts) -> Result<Fs, bch_errcode> {
|
||||
let devs: Vec<_> = devs
|
||||
.iter()
|
||||
.map(|i| CString::new(i.as_os_str().as_bytes()).unwrap().into_raw())
|
||||
|
@ -109,15 +109,10 @@ impl FromStr for c::btree_id {
|
||||
let s = CString::new(s).unwrap();
|
||||
let p = s.as_ptr();
|
||||
|
||||
let v = unsafe {
|
||||
c::match_string(
|
||||
c::__bch2_btree_ids[..].as_ptr(),
|
||||
(-(1 as isize)) as usize,
|
||||
p,
|
||||
)
|
||||
};
|
||||
let v =
|
||||
unsafe { c::match_string(c::__bch2_btree_ids[..].as_ptr(), (-1_isize) as usize, p) };
|
||||
if v >= 0 {
|
||||
Ok(unsafe { std::mem::transmute(v) })
|
||||
Ok(unsafe { std::mem::transmute::<i32, bcachefs::btree_id>(v) })
|
||||
} else {
|
||||
Err(BchToolsErr::InvalidBtreeId)
|
||||
}
|
||||
@ -131,11 +126,9 @@ impl FromStr for c::bch_bkey_type {
|
||||
let s = CString::new(s).unwrap();
|
||||
let p = s.as_ptr();
|
||||
|
||||
let v = unsafe {
|
||||
c::match_string(c::bch2_bkey_types[..].as_ptr(), (-(1 as isize)) as usize, p)
|
||||
};
|
||||
let v = unsafe { c::match_string(c::bch2_bkey_types[..].as_ptr(), (-1_isize) as usize, p) };
|
||||
if v >= 0 {
|
||||
Ok(unsafe { std::mem::transmute(v) })
|
||||
Ok(unsafe { std::mem::transmute::<i32, bcachefs::bch_bkey_type>(v) })
|
||||
} else {
|
||||
Err(BchToolsErr::InvalidBkeyType)
|
||||
}
|
||||
@ -192,7 +185,7 @@ impl FromStr for c::bpos {
|
||||
|
||||
let ino: u64 = ino_str.parse().map_err(|_| BchToolsErr::InvalidBpos)?;
|
||||
let off: u64 = off_str.parse().map_err(|_| BchToolsErr::InvalidBpos)?;
|
||||
let snp: u32 = snp_str.map(|s| s.parse().ok()).flatten().unwrap_or(0);
|
||||
let snp: u32 = snp_str.and_then(|s| s.parse().ok()).unwrap_or(0);
|
||||
|
||||
Ok(c::bpos {
|
||||
inode: ino,
|
||||
|
@ -1,3 +1,7 @@
|
||||
use crate::c;
|
||||
use crate::fs::Fs;
|
||||
use std::ffi::{CString, c_char};
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! opt_set {
|
||||
($opts:ident, $n:ident, $v:expr) => {
|
||||
@ -33,3 +37,29 @@ macro_rules! opt_get {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn parse_mount_opts(fs: Option<&mut Fs>, optstr: Option<&str>, ignore_unknown: bool)
|
||||
-> Result<c::bch_opts, c::bch_errcode> {
|
||||
let mut opts: c::bch_opts = Default::default();
|
||||
|
||||
if let Some(optstr) = optstr {
|
||||
let optstr = CString::new(optstr).unwrap();
|
||||
let optstr_ptr = optstr.as_ptr();
|
||||
|
||||
let ret = unsafe {
|
||||
c::bch2_parse_mount_opts(fs.map_or(std::ptr::null_mut(), |f| f.raw),
|
||||
&mut opts as *mut c::bch_opts,
|
||||
std::ptr::null_mut(),
|
||||
optstr_ptr as *mut c_char,
|
||||
ignore_unknown)
|
||||
};
|
||||
|
||||
drop(optstr);
|
||||
|
||||
if ret != 0 {
|
||||
let err: c::bch_errcode = unsafe { std::mem::transmute(-ret) };
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
Ok(opts)
|
||||
}
|
||||
|
@ -33,6 +33,7 @@ void bcachefs_usage(void)
|
||||
"Superblock commands:\n"
|
||||
" format Format a new filesystem\n"
|
||||
" show-super Dump superblock information to stdout\n"
|
||||
" recover-super Attempt to recover overwritten superblock from backups\n"
|
||||
" set-fs-option Set a filesystem option\n"
|
||||
" reset-counters Reset all counters on an unmounted device\n"
|
||||
"\n"
|
||||
@ -92,79 +93,27 @@ void bcachefs_usage(void)
|
||||
" list List filesystem metadata in textual form\n"
|
||||
" list_journal List contents of journal\n"
|
||||
"\n"
|
||||
#ifdef BCACHEFS_FUSE
|
||||
"FUSE:\n"
|
||||
" fusemount Mount a filesystem via FUSE\n"
|
||||
"\n"
|
||||
#endif
|
||||
"Miscellaneous:\n"
|
||||
" completions Generate shell completions\n"
|
||||
" version Display the version of the invoked bcachefs tool\n");
|
||||
}
|
||||
|
||||
static char *pop_cmd(int *argc, char *argv[])
|
||||
{
|
||||
char *cmd = argv[1];
|
||||
if (!(*argc < 2))
|
||||
memmove(&argv[1], &argv[2], (*argc - 2) * sizeof(argv[0]));
|
||||
(*argc)--;
|
||||
argv[*argc] = NULL;
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
int fs_cmds(int argc, char *argv[])
|
||||
{
|
||||
char *cmd = pop_cmd(&argc, argv);
|
||||
|
||||
if (argc < 1) {
|
||||
bcachefs_usage();
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (argc < 1)
|
||||
return fs_usage();
|
||||
if (!strcmp(cmd, "usage"))
|
||||
return cmd_fs_usage(argc, argv);
|
||||
if (!strcmp(cmd, "top"))
|
||||
return cmd_fs_top(argc, argv);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int device_cmds(int argc, char *argv[])
|
||||
{
|
||||
char *cmd = pop_cmd(&argc, argv);
|
||||
|
||||
if (argc < 1)
|
||||
return device_usage();
|
||||
if (!strcmp(cmd, "add"))
|
||||
return cmd_device_add(argc, argv);
|
||||
if (!strcmp(cmd, "remove"))
|
||||
return cmd_device_remove(argc, argv);
|
||||
if (!strcmp(cmd, "online"))
|
||||
return cmd_device_online(argc, argv);
|
||||
if (!strcmp(cmd, "offline"))
|
||||
return cmd_device_offline(argc, argv);
|
||||
if (!strcmp(cmd, "evacuate"))
|
||||
return cmd_device_evacuate(argc, argv);
|
||||
if (!strcmp(cmd, "set-state"))
|
||||
return cmd_device_set_state(argc, argv);
|
||||
if (!strcmp(cmd, "resize"))
|
||||
return cmd_device_resize(argc, argv);
|
||||
if (!strcmp(cmd, "resize-journal"))
|
||||
return cmd_device_resize_journal(argc, argv);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int data_cmds(int argc, char *argv[])
|
||||
{
|
||||
char *cmd = pop_cmd(&argc, argv);
|
||||
|
||||
if (argc < 1)
|
||||
return data_usage();
|
||||
if (!strcmp(cmd, "rereplicate"))
|
||||
return cmd_data_rereplicate(argc, argv);
|
||||
if (!strcmp(cmd, "scrub"))
|
||||
return cmd_data_scrub(argc, argv);
|
||||
if (!strcmp(cmd, "job"))
|
||||
return cmd_data_job(argc, argv);
|
||||
|
||||
return 0;
|
||||
fs_usage();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ static void propagate_recurse(int dirfd)
|
||||
continue;
|
||||
}
|
||||
propagate_recurse(fd);
|
||||
close(fd);
|
||||
xclose(fd);
|
||||
}
|
||||
|
||||
if (errno)
|
||||
@ -80,7 +80,7 @@ static void do_setattr(char *path, struct bch_opt_strs opts)
|
||||
die("error opening %s: %m", path);
|
||||
|
||||
propagate_recurse(dirfd);
|
||||
close(dirfd);
|
||||
xclose(dirfd);
|
||||
}
|
||||
|
||||
static void setattr_usage(void)
|
||||
|
@ -10,19 +10,6 @@
|
||||
#include "cmds.h"
|
||||
#include "libbcachefs.h"
|
||||
|
||||
int data_usage(void)
|
||||
{
|
||||
puts("bcachefs data - manage filesystem data\n"
|
||||
"Usage: bcachefs data <CMD> [OPTIONS]\n"
|
||||
"\n"
|
||||
"Commands:\n"
|
||||
" rereplicate Rereplicate degraded data\n"
|
||||
" job Kick off low level data jobs\n"
|
||||
"\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void data_rereplicate_usage(void)
|
||||
{
|
||||
puts("bcachefs data rereplicate\n"
|
||||
@ -37,7 +24,7 @@ static void data_rereplicate_usage(void)
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_data_rereplicate(int argc, char *argv[])
|
||||
static int cmd_data_rereplicate(int argc, char *argv[])
|
||||
{
|
||||
int opt;
|
||||
|
||||
@ -78,7 +65,7 @@ static void data_scrub_usage(void)
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_data_scrub(int argc, char *argv[])
|
||||
static int cmd_data_scrub(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "metadata", no_argument, NULL, 'm' },
|
||||
@ -180,7 +167,7 @@ int cmd_data_scrub(int argc, char *argv[])
|
||||
|
||||
if (dev->progress_fd >= 0 &&
|
||||
read(dev->progress_fd, &e, sizeof(e)) != sizeof(e)) {
|
||||
close(dev->progress_fd);
|
||||
xclose(dev->progress_fd);
|
||||
dev->progress_fd = -1;
|
||||
}
|
||||
|
||||
@ -199,7 +186,7 @@ int cmd_data_scrub(int argc, char *argv[])
|
||||
}
|
||||
|
||||
if (dev->progress_fd >= 0 && e.ret) {
|
||||
close(dev->progress_fd);
|
||||
xclose(dev->progress_fd);
|
||||
dev->progress_fd = -1;
|
||||
dev->ret = e.ret;
|
||||
}
|
||||
@ -283,7 +270,7 @@ static void data_job_usage(void)
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_data_job(int argc, char *argv[])
|
||||
static int cmd_data_job(int argc, char *argv[])
|
||||
{
|
||||
struct bch_ioctl_data op = {
|
||||
.start_btree = 0,
|
||||
@ -326,3 +313,33 @@ int cmd_data_job(int argc, char *argv[])
|
||||
|
||||
return bchu_data(bcache_fs_open(fs_path), op);
|
||||
}
|
||||
|
||||
static int data_usage(void)
|
||||
{
|
||||
puts("bcachefs data - manage filesystem data\n"
|
||||
"Usage: bcachefs data <CMD> [OPTIONS]\n"
|
||||
"\n"
|
||||
"Commands:\n"
|
||||
" rereplicate Rereplicate degraded data\n"
|
||||
" job Kick off low level data jobs\n"
|
||||
"\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int data_cmds(int argc, char *argv[])
|
||||
{
|
||||
char *cmd = pop_cmd(&argc, argv);
|
||||
|
||||
if (argc < 1)
|
||||
return data_usage();
|
||||
if (!strcmp(cmd, "rereplicate"))
|
||||
return cmd_data_rereplicate(argc, argv);
|
||||
if (!strcmp(cmd, "scrub"))
|
||||
return cmd_data_scrub(argc, argv);
|
||||
if (!strcmp(cmd, "job"))
|
||||
return cmd_data_job(argc, argv);
|
||||
|
||||
data_usage();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -23,47 +23,25 @@
|
||||
#include "libbcachefs/opts.h"
|
||||
#include "tools-util.h"
|
||||
|
||||
int device_usage(void)
|
||||
{
|
||||
puts("bcachefs device - manage devices within a running filesystem\n"
|
||||
"Usage: bcachefs device <CMD> [OPTION]\n"
|
||||
"\n"
|
||||
"Commands:\n"
|
||||
" add add a new device to an existing filesystem\n"
|
||||
" remove remove a device from an existing filesystem\n"
|
||||
" online re-add an existing member to a filesystem\n"
|
||||
" offline take a device offline, without removing it\n"
|
||||
" evacuate migrate data off a specific device\n"
|
||||
" set-state mark a device as failed\n"
|
||||
" resize resize filesystem on a device\n"
|
||||
" resize-journal resize journal on a device\n"
|
||||
"\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void device_add_usage(void)
|
||||
{
|
||||
puts("bcachefs device add - add a device to an existing filesystem\n"
|
||||
"Usage: bcachefs device add [OPTION]... filesystem device\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -S, --fs_size=size Size of filesystem on device\n"
|
||||
" -B, --bucket=size Bucket size\n"
|
||||
" -D, --discard Enable discards\n"
|
||||
" -l, --label=label Disk label\n"
|
||||
"Options:\n");
|
||||
|
||||
bch2_opts_usage(OPT_FORMAT|OPT_DEVICE);
|
||||
|
||||
puts(" -l, --label=label Disk label\n"
|
||||
" -f, --force Use device even if it appears to already be formatted\n"
|
||||
" -h, --help Display this help and exit\n"
|
||||
"\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
}
|
||||
|
||||
int cmd_device_add(int argc, char *argv[])
|
||||
static int cmd_device_add(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "fs_size", required_argument, NULL, 'S' },
|
||||
{ "bucket", required_argument, NULL, 'B' },
|
||||
{ "discard", no_argument, NULL, 'D' },
|
||||
{ "label", required_argument, NULL, 'l' },
|
||||
{ "force", no_argument, NULL, 'f' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
@ -72,22 +50,31 @@ int cmd_device_add(int argc, char *argv[])
|
||||
struct format_opts format_opts = format_opts_default();
|
||||
struct dev_opts dev_opts = dev_opts_default();
|
||||
bool force = false;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "S:B:Dl:fh",
|
||||
longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'S':
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.size))
|
||||
die("invalid filesystem size");
|
||||
break;
|
||||
case 'B':
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.bucket_size))
|
||||
die("bad bucket_size %s", optarg);
|
||||
break;
|
||||
case 'D':
|
||||
dev_opts.discard = true;
|
||||
while (true) {
|
||||
const struct bch_option *opt =
|
||||
bch2_cmdline_opt_parse(argc, argv, OPT_FORMAT|OPT_DEVICE);
|
||||
if (opt) {
|
||||
unsigned id = opt - bch2_opt_table;
|
||||
u64 v;
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_opt_parse(NULL, opt, optarg, &v, &err);
|
||||
if (ret)
|
||||
die("invalid %s: %s", opt->attr.name, err.buf);
|
||||
|
||||
if (opt->flags & OPT_DEVICE)
|
||||
bch2_opt_set_by_id(&dev_opts.opts, id, v);
|
||||
else
|
||||
die("got bch_opt of wrong type %s", opt->attr.name);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
int optid = getopt_long(argc, argv, "S:B:Dl:fh", longopts, NULL);
|
||||
if (optid == -1)
|
||||
break;
|
||||
|
||||
switch (optid) {
|
||||
case 'l':
|
||||
dev_opts.label = strdup(optarg);
|
||||
break;
|
||||
@ -97,7 +84,11 @@ int cmd_device_add(int argc, char *argv[])
|
||||
case 'h':
|
||||
device_add_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
case '?':
|
||||
exit(EXIT_FAILURE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
args_shift(optind);
|
||||
|
||||
char *fs_path = arg_pop();
|
||||
@ -127,10 +118,11 @@ int cmd_device_add(int argc, char *argv[])
|
||||
opt_set(fs_opts, btree_node_size,
|
||||
read_file_u64(fs.sysfs_fd, "options/btree_node_size"));
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs,
|
||||
fs_opts,
|
||||
format_opts,
|
||||
&dev_opts, 1);
|
||||
dev_opts_list devs = {};
|
||||
darray_push(&devs, dev_opts);
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs);
|
||||
darray_exit(&devs);
|
||||
free(sb);
|
||||
bchu_disk_add(fs, dev_opts.path);
|
||||
return 0;
|
||||
@ -152,7 +144,7 @@ static void device_remove_usage(void)
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_device_remove(int argc, char *argv[])
|
||||
static int cmd_device_remove(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "by-id", 0, NULL, 'i' },
|
||||
@ -218,7 +210,7 @@ static void device_online_usage(void)
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
}
|
||||
|
||||
int cmd_device_online(int argc, char *argv[])
|
||||
static int cmd_device_online(int argc, char *argv[])
|
||||
{
|
||||
int opt;
|
||||
|
||||
@ -255,7 +247,7 @@ static void device_offline_usage(void)
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
}
|
||||
|
||||
int cmd_device_offline(int argc, char *argv[])
|
||||
static int cmd_device_offline(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "force", 0, NULL, 'f' },
|
||||
@ -299,7 +291,7 @@ static void device_evacuate_usage(void)
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
}
|
||||
|
||||
int cmd_device_evacuate(int argc, char *argv[])
|
||||
static int cmd_device_evacuate(int argc, char *argv[])
|
||||
{
|
||||
int opt;
|
||||
|
||||
@ -357,7 +349,7 @@ static void device_set_state_usage(void)
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_device_set_state(int argc, char *argv[])
|
||||
static int cmd_device_set_state(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "force", 0, NULL, 'f' },
|
||||
@ -461,7 +453,7 @@ static void device_resize_usage(void)
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_device_resize(int argc, char *argv[])
|
||||
static int cmd_device_resize(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "help", 0, NULL, 'h' },
|
||||
@ -533,7 +525,7 @@ int cmd_device_resize(int argc, char *argv[])
|
||||
if (resize)
|
||||
die("confused: more than one online device?");
|
||||
resize = ca;
|
||||
percpu_ref_get(&resize->io_ref);
|
||||
percpu_ref_get(&resize->io_ref[READ]);
|
||||
}
|
||||
|
||||
u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
|
||||
@ -546,7 +538,7 @@ int cmd_device_resize(int argc, char *argv[])
|
||||
if (ret)
|
||||
fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
|
||||
|
||||
percpu_ref_put(&resize->io_ref);
|
||||
percpu_ref_put(&resize->io_ref[READ]);
|
||||
bch2_fs_stop(c);
|
||||
}
|
||||
return 0;
|
||||
@ -563,7 +555,7 @@ static void device_resize_journal_usage(void)
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_device_resize_journal(int argc, char *argv[])
|
||||
static int cmd_device_resize_journal(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "help", 0, NULL, 'h' },
|
||||
@ -630,7 +622,7 @@ int cmd_device_resize_journal(int argc, char *argv[])
|
||||
if (resize)
|
||||
die("confused: more than one online device?");
|
||||
resize = ca;
|
||||
percpu_ref_get(&resize->io_ref);
|
||||
percpu_ref_get(&resize->io_ref[READ]);
|
||||
}
|
||||
|
||||
u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
|
||||
@ -640,8 +632,54 @@ int cmd_device_resize_journal(int argc, char *argv[])
|
||||
if (ret)
|
||||
fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
|
||||
|
||||
percpu_ref_put(&resize->io_ref);
|
||||
percpu_ref_put(&resize->io_ref[READ]);
|
||||
bch2_fs_stop(c);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int device_usage(void)
|
||||
{
|
||||
puts("bcachefs device - manage devices within a running filesystem\n"
|
||||
"Usage: bcachefs device <CMD> [OPTION]\n"
|
||||
"\n"
|
||||
"Commands:\n"
|
||||
" add add a new device to an existing filesystem\n"
|
||||
" remove remove a device from an existing filesystem\n"
|
||||
" online re-add an existing member to a filesystem\n"
|
||||
" offline take a device offline, without removing it\n"
|
||||
" evacuate migrate data off a specific device\n"
|
||||
" set-state mark a device as failed\n"
|
||||
" resize resize filesystem on a device\n"
|
||||
" resize-journal resize journal on a device\n"
|
||||
"\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int device_cmds(int argc, char *argv[])
|
||||
{
|
||||
char *cmd = pop_cmd(&argc, argv);
|
||||
|
||||
if (argc < 1)
|
||||
return device_usage();
|
||||
if (!strcmp(cmd, "add"))
|
||||
return cmd_device_add(argc, argv);
|
||||
if (!strcmp(cmd, "remove"))
|
||||
return cmd_device_remove(argc, argv);
|
||||
if (!strcmp(cmd, "online"))
|
||||
return cmd_device_online(argc, argv);
|
||||
if (!strcmp(cmd, "offline"))
|
||||
return cmd_device_offline(argc, argv);
|
||||
if (!strcmp(cmd, "evacuate"))
|
||||
return cmd_device_evacuate(argc, argv);
|
||||
if (!strcmp(cmd, "set-state"))
|
||||
return cmd_device_set_state(argc, argv);
|
||||
if (!strcmp(cmd, "resize"))
|
||||
return cmd_device_resize(argc, argv);
|
||||
if (!strcmp(cmd, "resize-journal"))
|
||||
return cmd_device_resize_journal(argc, argv);
|
||||
|
||||
device_usage();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -116,8 +116,7 @@ int cmd_dump(int argc, char *argv[])
|
||||
opt_set(opts, read_only, true);
|
||||
opt_set(opts, nochanges, true);
|
||||
opt_set(opts, norecovery, true);
|
||||
opt_set(opts, degraded, true);
|
||||
opt_set(opts, very_degraded, true);
|
||||
opt_set(opts, degraded, BCH_DEGRADED_very);
|
||||
opt_set(opts, errors, BCH_ON_ERROR_continue);
|
||||
opt_set(opts, fix_errors, FSCK_FIX_no);
|
||||
|
||||
@ -172,7 +171,7 @@ int cmd_dump(int argc, char *argv[])
|
||||
free(path);
|
||||
|
||||
dump_one_device(c, ca, fd, entire_journal);
|
||||
close(fd);
|
||||
xclose(fd);
|
||||
}
|
||||
|
||||
up_read(&c->state_lock);
|
||||
|
@ -39,11 +39,7 @@ x('L', fs_label, required_argument) \
|
||||
x('U', uuid, required_argument) \
|
||||
x(0, fs_size, required_argument) \
|
||||
x(0, superblock_size, required_argument) \
|
||||
x(0, bucket_size, required_argument) \
|
||||
x('l', label, required_argument) \
|
||||
x(0, discard, no_argument) \
|
||||
x(0, data_allowed, required_argument) \
|
||||
x(0, durability, required_argument) \
|
||||
x(0, version, required_argument) \
|
||||
x(0, no_initialize, no_argument) \
|
||||
x(0, source, required_argument) \
|
||||
@ -52,17 +48,16 @@ x('q', quiet, no_argument) \
|
||||
x('v', verbose, no_argument) \
|
||||
x('h', help, no_argument)
|
||||
|
||||
static void usage(void)
|
||||
static void format_usage(void)
|
||||
{
|
||||
puts("bcachefs format - create a new bcachefs filesystem on one or more devices\n"
|
||||
"Usage: bcachefs format [OPTION]... <devices>\n"
|
||||
"\n"
|
||||
"Options:");
|
||||
|
||||
bch2_opts_usage(OPT_FORMAT);
|
||||
bch2_opts_usage(OPT_FORMAT|OPT_FS);
|
||||
|
||||
puts(
|
||||
" --replicas=# Sets both data and metadata replicas\n"
|
||||
puts(" --replicas=# Sets both data and metadata replicas\n"
|
||||
" --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
|
||||
" --no_passphrase Don't encrypt master encryption key\n"
|
||||
" -L, --fs_label=label\n"
|
||||
@ -72,9 +67,10 @@ static void usage(void)
|
||||
"\n"
|
||||
"Device specific options:");
|
||||
|
||||
bch2_opts_usage(OPT_DEVICE);
|
||||
bch2_opts_usage(OPT_FORMAT|OPT_DEVICE);
|
||||
|
||||
puts(" -l, --label=label Disk label\n"
|
||||
puts(" --fs_size=size Size of filesystem on device\n"
|
||||
" -l, --label=label Disk label\n"
|
||||
"\n"
|
||||
" -f, --force\n"
|
||||
" -q, --quiet Only print errors\n"
|
||||
@ -116,41 +112,62 @@ u64 read_flag_list_or_die(char *opt, const char * const list[],
|
||||
return v;
|
||||
}
|
||||
|
||||
void build_fs(struct bch_fs *c, const char *src_path)
|
||||
static void build_fs(struct bch_fs *c, const char *src_path)
|
||||
{
|
||||
struct copy_fs_state s = {};
|
||||
int src_fd = xopen(src_path, O_RDONLY|O_NOATIME);
|
||||
struct stat stat = xfstat(src_fd);
|
||||
|
||||
if (!S_ISDIR(stat.st_mode))
|
||||
die("%s is not a directory", src_path);
|
||||
|
||||
copy_fs(c, src_fd, src_path, &s);
|
||||
copy_fs(c, src_fd, src_path, &s, 0);
|
||||
}
|
||||
|
||||
int cmd_format(int argc, char *argv[])
|
||||
{
|
||||
DARRAY(struct dev_opts) devices = { 0 };
|
||||
DARRAY(char *) device_paths = { 0 };
|
||||
dev_opts_list devices = {};
|
||||
darray_str device_paths = {};
|
||||
struct format_opts opts = format_opts_default();
|
||||
struct dev_opts dev_opts = dev_opts_default();
|
||||
bool force = false, no_passphrase = false, quiet = false, initialize = true, verbose = false;
|
||||
bool unconsumed_dev_option = false;
|
||||
unsigned v;
|
||||
int opt;
|
||||
|
||||
struct bch_opt_strs fs_opt_strs =
|
||||
bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
|
||||
struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
|
||||
struct bch_opt_strs fs_opt_strs = {};
|
||||
struct bch_opts fs_opts = bch2_opts_empty();
|
||||
|
||||
if (getenv("BCACHEFS_KERNEL_ONLY"))
|
||||
initialize = false;
|
||||
while (true) {
|
||||
const struct bch_option *opt =
|
||||
bch2_cmdline_opt_parse(argc, argv, OPT_FORMAT|OPT_FS|OPT_DEVICE);
|
||||
if (opt) {
|
||||
unsigned id = opt - bch2_opt_table;
|
||||
u64 v;
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_opt_parse(NULL, opt, optarg, &v, &err);
|
||||
if (ret == -BCH_ERR_option_needs_open_fs) {
|
||||
fs_opt_strs.by_id[id] = strdup(optarg);
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
die("invalid option: %s", err.buf);
|
||||
|
||||
while ((opt = getopt_long(argc, argv,
|
||||
if (opt->flags & OPT_DEVICE) {
|
||||
bch2_opt_set_by_id(&dev_opts.opts, id, v);
|
||||
unconsumed_dev_option = true;
|
||||
} else if (opt->flags & OPT_FS) {
|
||||
bch2_opt_set_by_id(&fs_opts, id, v);
|
||||
} else {
|
||||
die("got bch_opt of wrong type %s", opt->attr.name);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
int optid = getopt_long(argc, argv,
|
||||
"-L:l:U:g:fqhv",
|
||||
format_opts,
|
||||
NULL)) != -1)
|
||||
switch (opt) {
|
||||
NULL);
|
||||
if (optid == -1)
|
||||
break;
|
||||
|
||||
switch (optid) {
|
||||
case O_replicas:
|
||||
if (kstrtouint(optarg, 10, &v) ||
|
||||
!v ||
|
||||
@ -183,7 +200,7 @@ int cmd_format(int argc, char *argv[])
|
||||
force = true;
|
||||
break;
|
||||
case O_fs_size:
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.size))
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.fs_size))
|
||||
die("invalid filesystem size");
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
@ -193,32 +210,11 @@ int cmd_format(int argc, char *argv[])
|
||||
|
||||
opts.superblock_size >>= 9;
|
||||
break;
|
||||
case O_bucket_size:
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.bucket_size))
|
||||
die("bad bucket_size %s", optarg);
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_label:
|
||||
case 'l':
|
||||
dev_opts.label = optarg;
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_discard:
|
||||
dev_opts.discard = true;
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_data_allowed:
|
||||
dev_opts.data_allowed =
|
||||
read_flag_list_or_die(optarg,
|
||||
__bch2_data_types, "data type");
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_durability:
|
||||
if (kstrtouint(optarg, 10, &dev_opts.durability) ||
|
||||
dev_opts.durability > BCH_REPLICAS_MAX)
|
||||
die("invalid durability");
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_version:
|
||||
opts.version = version_parse(optarg);
|
||||
break;
|
||||
@ -229,7 +225,7 @@ int cmd_format(int argc, char *argv[])
|
||||
darray_push(&device_paths, optarg);
|
||||
dev_opts.path = optarg;
|
||||
darray_push(&devices, dev_opts);
|
||||
dev_opts.size = 0;
|
||||
dev_opts.fs_size = 0;
|
||||
unconsumed_dev_option = false;
|
||||
break;
|
||||
case O_quiet:
|
||||
@ -241,49 +237,57 @@ int cmd_format(int argc, char *argv[])
|
||||
break;
|
||||
case O_help:
|
||||
case 'h':
|
||||
usage();
|
||||
format_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
break;
|
||||
case '?':
|
||||
exit(EXIT_FAILURE);
|
||||
break;
|
||||
default:
|
||||
die("getopt ret %i %c", optid, optid);
|
||||
}
|
||||
}
|
||||
|
||||
if (unconsumed_dev_option)
|
||||
die("Options for devices apply to subsequent devices; got a device option with no device");
|
||||
|
||||
if (opts.version != bcachefs_metadata_version_current)
|
||||
initialize = false;
|
||||
|
||||
if (!devices.nr)
|
||||
die("Please supply a device");
|
||||
|
||||
if (opts.source && !initialize)
|
||||
die("--source, --no_initialize are incompatible");
|
||||
|
||||
if (opts.encrypted && !no_passphrase) {
|
||||
opts.passphrase = read_passphrase_twice("Enter passphrase: ");
|
||||
initialize = false;
|
||||
}
|
||||
|
||||
if (!opts.source) {
|
||||
if (getenv("BCACHEFS_KERNEL_ONLY"))
|
||||
initialize = false;
|
||||
|
||||
if (opts.version != bcachefs_metadata_version_current) {
|
||||
printf("version mismatch, not initializing");
|
||||
if (opts.source)
|
||||
die("--source, --version are incompatible");
|
||||
initialize = false;
|
||||
}
|
||||
}
|
||||
|
||||
darray_for_each(devices, dev) {
|
||||
int ret = open_for_format(dev, force);
|
||||
if (ret)
|
||||
die("Error opening %s: %s", dev_opts.path, strerror(-ret));
|
||||
die("Error opening %s: %s", dev->path, strerror(-ret));
|
||||
}
|
||||
|
||||
struct bch_sb *sb =
|
||||
bch2_format(fs_opt_strs,
|
||||
fs_opts,
|
||||
opts,
|
||||
devices.data, devices.nr);
|
||||
bch2_opt_strs_free(&fs_opt_strs);
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, opts, devices);
|
||||
|
||||
if (!quiet) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
buf.human_readable_units = true;
|
||||
|
||||
bch2_sb_to_text(&buf, sb, false, 1 << BCH_SB_FIELD_members_v2);
|
||||
printf("%s", buf.buf);
|
||||
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
free(sb);
|
||||
@ -293,41 +297,26 @@ int cmd_format(int argc, char *argv[])
|
||||
free(opts.passphrase);
|
||||
}
|
||||
|
||||
darray_exit(&devices);
|
||||
|
||||
/* don't skip initialization when we have to build an image from a source */
|
||||
if (opts.source && !initialize) {
|
||||
printf("Warning: Forcing the initialization because the source flag was supplied\n");
|
||||
initialize = 1;
|
||||
}
|
||||
|
||||
if (initialize) {
|
||||
struct bch_opts mount_opts = bch2_opts_empty();
|
||||
|
||||
|
||||
opt_set(mount_opts, verbose, verbose);
|
||||
|
||||
/*
|
||||
* Start the filesystem once, to allocate the journal and create
|
||||
* the root directory:
|
||||
*/
|
||||
struct bch_fs *c = bch2_fs_open(device_paths.data,
|
||||
device_paths.nr,
|
||||
mount_opts);
|
||||
bch2_opts_empty());
|
||||
if (IS_ERR(c))
|
||||
die("error opening %s: %s", device_paths.data[0],
|
||||
bch2_err_str(PTR_ERR(c)));
|
||||
|
||||
if (opts.source) {
|
||||
if (opts.source)
|
||||
build_fs(c, opts.source);
|
||||
}
|
||||
|
||||
|
||||
bch2_fs_stop(c);
|
||||
}
|
||||
|
||||
bch2_opt_strs_free(&fs_opt_strs);
|
||||
darray_exit(&devices);
|
||||
darray_exit(&device_paths);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -433,3 +422,204 @@ int cmd_show_super(int argc, char *argv[])
|
||||
printbuf_exit(&buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "libbcachefs/super-io.h"
|
||||
#include "libbcachefs/sb-members.h"
|
||||
|
||||
typedef DARRAY(struct bch_sb *) probed_sb_list;
|
||||
|
||||
static void probe_one_super(int dev_fd, unsigned sb_size, u64 offset,
|
||||
probed_sb_list *sbs, bool verbose)
|
||||
{
|
||||
darray_char sb_buf = {};
|
||||
darray_resize(&sb_buf, sb_size);
|
||||
|
||||
xpread(dev_fd, sb_buf.data, sb_buf.size, offset);
|
||||
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_sb_validate((void *) sb_buf.data, offset >> 9, 0, &err);
|
||||
printbuf_exit(&err);
|
||||
|
||||
if (!ret) {
|
||||
if (verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_human_readable_u64(&buf, offset);
|
||||
printf("found superblock at %s\n", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
darray_push(sbs, (void *) sb_buf.data);
|
||||
sb_buf.data = NULL;
|
||||
}
|
||||
|
||||
darray_exit(&sb_buf);
|
||||
}
|
||||
|
||||
static void probe_sb_range(int dev_fd, u64 start_offset, u64 end_offset,
|
||||
probed_sb_list *sbs, bool verbose)
|
||||
{
|
||||
start_offset &= ~((u64) 511);
|
||||
end_offset &= ~((u64) 511);
|
||||
|
||||
size_t buflen = end_offset - start_offset;
|
||||
void *buf = malloc(buflen);
|
||||
xpread(dev_fd, buf, buflen, start_offset);
|
||||
|
||||
for (u64 offset = 0; offset < buflen; offset += 512) {
|
||||
struct bch_sb *sb = buf + offset;
|
||||
|
||||
if (!uuid_equal(&sb->magic, &BCACHE_MAGIC) &&
|
||||
!uuid_equal(&sb->magic, &BCHFS_MAGIC))
|
||||
continue;
|
||||
|
||||
size_t bytes = vstruct_bytes(sb);
|
||||
if (offset + bytes > buflen) {
|
||||
fprintf(stderr, "found sb %llu size %zu that overran buffer\n",
|
||||
start_offset + offset, bytes);
|
||||
continue;
|
||||
}
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_sb_validate(sb, (start_offset + offset) >> 9, 0, &err);
|
||||
if (ret)
|
||||
fprintf(stderr, "found sb %llu that failed to validate: %s\n",
|
||||
start_offset + offset, err.buf);
|
||||
printbuf_exit(&err);
|
||||
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
if (verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_human_readable_u64(&buf, start_offset + offset);
|
||||
printf("found superblock at %s\n", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
void *sb_copy = malloc(bytes);
|
||||
memcpy(sb_copy, sb, bytes);
|
||||
darray_push(sbs, sb_copy);
|
||||
}
|
||||
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static u64 bch2_sb_last_mount_time(struct bch_sb *sb)
|
||||
{
|
||||
u64 ret = 0;
|
||||
for (unsigned i = 0; i < sb->nr_devices; i++)
|
||||
ret = max(ret, le64_to_cpu(bch2_sb_member_get(sb, i).last_mount));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_sb_time_cmp(struct bch_sb *l, struct bch_sb *r)
|
||||
{
|
||||
return cmp_int(bch2_sb_last_mount_time(l),
|
||||
bch2_sb_last_mount_time(r));
|
||||
}
|
||||
|
||||
static void recover_super_usage(void)
|
||||
{
|
||||
puts("bcachefs recover-super \n"
|
||||
"Usage: bcachefs recover-super [OPTION].. device\n"
|
||||
"\n"
|
||||
"Attempt to recover a filesystem on a device that has had the main superblock\n"
|
||||
"and superblock layout overwritten.\n"
|
||||
"All options will be guessed if not provided\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -d, --dev_size size of filessytem on device, in bytes \n"
|
||||
" -o, --offset offset to probe, in bytes\n"
|
||||
" -y, --yes Recover without prompting\n"
|
||||
" -v, --verbose Increase logging level\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_recover_super(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "dev_size", 1, NULL, 'd' },
|
||||
{ "offset", 1, NULL, 'o' },
|
||||
{ "yes", 0, NULL, 'y' },
|
||||
{ "verbose", 0, NULL, 'v' },
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ NULL }
|
||||
};
|
||||
u64 dev_size = 0, offset = 0;
|
||||
bool yes = false, verbose = false;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "d:o:yvh", longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'd':
|
||||
if (bch2_strtoull_h(optarg, &dev_size))
|
||||
die("invalid offset");
|
||||
break;
|
||||
case 'o':
|
||||
if (bch2_strtoull_h(optarg, &offset))
|
||||
die("invalid offset");
|
||||
|
||||
if (offset & 511)
|
||||
die("offset must be a multiple of 512");
|
||||
break;
|
||||
case 'y':
|
||||
yes = true;
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
case 'h':
|
||||
recover_super_usage();
|
||||
break;
|
||||
}
|
||||
args_shift(optind);
|
||||
|
||||
char *dev_path = arg_pop();
|
||||
if (!dev_path)
|
||||
die("please supply a device");
|
||||
if (argc)
|
||||
die("too many arguments");
|
||||
|
||||
int dev_fd = xopen(dev_path, O_RDWR);
|
||||
|
||||
if (!dev_size)
|
||||
dev_size = get_size(dev_fd);
|
||||
|
||||
probed_sb_list sbs = {};
|
||||
|
||||
if (offset) {
|
||||
probe_one_super(dev_fd, SUPERBLOCK_SIZE_DEFAULT, offset, &sbs, verbose);
|
||||
} else {
|
||||
unsigned scan_len = 16 << 20; /* 16MB, start and end of device */
|
||||
|
||||
probe_sb_range(dev_fd, 4096, scan_len, &sbs, verbose);
|
||||
probe_sb_range(dev_fd, dev_size - scan_len, dev_size, &sbs, verbose);
|
||||
}
|
||||
|
||||
if (!sbs.nr) {
|
||||
printf("Found no bcachefs superblocks\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
struct bch_sb *best = NULL;
|
||||
darray_for_each(sbs, sb)
|
||||
if (!best || bch2_sb_time_cmp(best, *sb) < 0)
|
||||
best = *sb;
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_sb_to_text(&buf, best, true, BIT_ULL(BCH_SB_FIELD_members_v2));
|
||||
|
||||
printf("Found superblock:\n%s", buf.buf);
|
||||
printf("Recover?");
|
||||
|
||||
if (yes || ask_yn())
|
||||
bch2_super_write(dev_fd, best);
|
||||
|
||||
printbuf_exit(&buf);
|
||||
darray_for_each(sbs, sb)
|
||||
kfree(*sb);
|
||||
darray_exit(&sbs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -237,6 +237,18 @@ static void accounting_sort(darray_accounting_p *sorted,
|
||||
sort(sorted->data, sorted->nr, sizeof(sorted->data[0]), accounting_p_cmp, NULL);
|
||||
}
|
||||
|
||||
static void accounting_swab_if_old(struct bch_ioctl_query_accounting *in)
|
||||
{
|
||||
unsigned kernel_version = bcachefs_kernel_version();
|
||||
|
||||
if (kernel_version &&
|
||||
kernel_version < bcachefs_metadata_version_disk_accounting_big_endian)
|
||||
for (struct bkey_i_accounting *a = in->accounting;
|
||||
a < (struct bkey_i_accounting *) ((u64 *) in->accounting + in->accounting_u64s);
|
||||
a = bkey_i_to_accounting(bkey_next(&a->k_i)))
|
||||
bch2_bpos_swab(&a->k.p);
|
||||
}
|
||||
|
||||
static int fs_usage_v1_to_text(struct printbuf *out,
|
||||
struct bchfs_handle fs,
|
||||
dev_names dev_names)
|
||||
@ -251,6 +263,8 @@ static int fs_usage_v1_to_text(struct printbuf *out,
|
||||
if (!a)
|
||||
return -1;
|
||||
|
||||
accounting_swab_if_old(a);
|
||||
|
||||
darray_accounting_p a_sorted = {};
|
||||
|
||||
accounting_sort(&a_sorted, a);
|
||||
@ -477,6 +491,19 @@ devs:
|
||||
bcache_fs_close(fs);
|
||||
}
|
||||
|
||||
int fs_usage(void)
|
||||
{
|
||||
puts("bcachefs fs - manage a running filesystem\n"
|
||||
"Usage: bcachefs fs <CMD> [OPTIONS]\n"
|
||||
"\n"
|
||||
"Commands:\n"
|
||||
" usage Display detailed filesystem usage\n"
|
||||
" top Show runtime performance information\n"
|
||||
"\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fs_usage_usage(void)
|
||||
{
|
||||
puts("bcachefs fs usage - display detailed filesystem usage\n"
|
||||
|
@ -21,7 +21,6 @@ static void fsck_usage(void)
|
||||
" -y Assume \"yes\" to all questions\n"
|
||||
" -f Force checking even if filesystem is marked clean\n"
|
||||
" -r, --ratelimit_errors Don't display more than 10 errors of a given type\n"
|
||||
" -R, --reconstruct_alloc Reconstruct the alloc btree\n"
|
||||
" -k, --kernel Use the in-kernel fsck implementation\n"
|
||||
" -v Be verbose\n"
|
||||
" -h, --help Display this help and exit\n"
|
||||
@ -87,7 +86,8 @@ static int splice_fd_to_stdinout(int fd)
|
||||
stdin_closed = true;
|
||||
}
|
||||
|
||||
return close(fd);
|
||||
xclose(fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fsck_online(const char *dev_path, const char *opt_str)
|
||||
@ -117,9 +117,7 @@ static bool should_use_kernel_fsck(darray_str devs)
|
||||
{
|
||||
system("modprobe bcachefs");
|
||||
|
||||
unsigned kernel_version = !access("/sys/module/bcachefs/parameters/version", R_OK)
|
||||
? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
|
||||
: 0;
|
||||
unsigned kernel_version = bcachefs_kernel_version();
|
||||
|
||||
if (!kernel_version)
|
||||
return false;
|
||||
@ -205,7 +203,6 @@ int cmd_fsck(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "ratelimit_errors", no_argument, NULL, 'r' },
|
||||
{ "reconstruct_alloc", no_argument, NULL, 'R' },
|
||||
{ "kernel", no_argument, NULL, 'k' },
|
||||
{ "no-kernel", no_argument, NULL, 'K' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
@ -224,10 +221,13 @@ int cmd_fsck(int argc, char *argv[])
|
||||
append_opt(&opts_str, "read_only");
|
||||
|
||||
while ((opt = getopt_long(argc, argv,
|
||||
"apynfo:rRkKvh",
|
||||
"apynfo:rkKvh",
|
||||
longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'a': /* outdated alias for -p */
|
||||
case 'a':
|
||||
/* "automatic" run, called by the system, for us to do checks as needed.
|
||||
* we don't need checks here: */
|
||||
exit(EXIT_SUCCESS);
|
||||
case 'p':
|
||||
case 'y':
|
||||
append_opt(&opts_str, "fix_errors=yes");
|
||||
@ -245,9 +245,6 @@ int cmd_fsck(int argc, char *argv[])
|
||||
case 'r':
|
||||
append_opt(&opts_str, "ratelimit_errors");
|
||||
break;
|
||||
case 'R':
|
||||
append_opt(&opts_str, "reconstruct_alloc");
|
||||
break;
|
||||
case 'k':
|
||||
kernel = true;
|
||||
break;
|
||||
@ -323,7 +320,7 @@ kernel_fsck_err:
|
||||
} else {
|
||||
userland_fsck:
|
||||
printf("Running userspace offline fsck\n");
|
||||
ret = bch2_parse_mount_opts(NULL, &opts, &parse_later, opts_str.buf);
|
||||
ret = bch2_parse_mount_opts(NULL, &opts, &parse_later, opts_str.buf, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include "libbcachefs/dirent.h"
|
||||
#include "libbcachefs/errcode.h"
|
||||
#include "libbcachefs/error.h"
|
||||
#include "libbcachefs/fs-common.h"
|
||||
#include "libbcachefs/namei.h"
|
||||
#include "libbcachefs/inode.h"
|
||||
#include "libbcachefs/io_read.h"
|
||||
#include "libbcachefs/io_write.h"
|
||||
@ -31,9 +31,6 @@
|
||||
|
||||
#include <linux/dcache.h>
|
||||
|
||||
/* XXX cut and pasted from fsck.c */
|
||||
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
|
||||
|
||||
/* used by write_aligned function for waiting on bch2_write closure */
|
||||
struct write_aligned_op_t {
|
||||
struct closure cl;
|
||||
@ -478,10 +475,9 @@ static int read_aligned(struct bch_fs *c, subvol_inum inum, size_t aligned_size,
|
||||
closure_init_stack(&cl);
|
||||
|
||||
closure_get(&cl);
|
||||
rbio.bio.bi_end_io = bcachefs_fuse_read_endio;
|
||||
rbio.bio.bi_private = &cl;
|
||||
|
||||
bch2_read(c, rbio_init(&rbio.bio, io_opts), inum);
|
||||
bch2_read(c, rbio_init(&rbio.bio, c, io_opts, bcachefs_fuse_read_endio), inum);
|
||||
|
||||
closure_sync(&cl);
|
||||
|
||||
|
@ -65,9 +65,21 @@ static bool bkey_matches_filter(d_bbpos_range filter, struct jset_entry *entry,
|
||||
struct bbpos k_start = BBPOS(entry->btree_id, bkey_start_pos(&k->k));
|
||||
struct bbpos k_end = BBPOS(entry->btree_id, k->k.p);
|
||||
|
||||
if (!i->start.pos.snapshot &&
|
||||
!i->end.pos.snapshot) {
|
||||
k_start.pos.snapshot = 0;
|
||||
k_end.pos.snapshot = 0;
|
||||
}
|
||||
|
||||
if (!k->k.size) {
|
||||
if (bbpos_cmp(k_start, i->start) >= 0 &&
|
||||
bbpos_cmp(k_end, i->end) <= 0)
|
||||
return true;
|
||||
} else {
|
||||
if (bbpos_cmp(i->start, k_end) <= 0 &&
|
||||
bbpos_cmp(i->end, k_start) >= 0)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -75,9 +87,9 @@ static bool bkey_matches_filter(d_bbpos_range filter, struct jset_entry *entry,
|
||||
static bool entry_matches_transaction_filter(struct jset_entry *entry,
|
||||
d_bbpos_range filter)
|
||||
{
|
||||
if (entry->type == BCH_JSET_ENTRY_btree_root ||
|
||||
entry->type == BCH_JSET_ENTRY_btree_keys ||
|
||||
entry->type == BCH_JSET_ENTRY_overwrite)
|
||||
if (!entry->level &&
|
||||
(entry->type == BCH_JSET_ENTRY_btree_keys ||
|
||||
entry->type == BCH_JSET_ENTRY_overwrite))
|
||||
jset_entry_for_each_key(entry, k)
|
||||
if (bkey_matches_filter(filter, entry, k))
|
||||
return true;
|
||||
@ -90,6 +102,8 @@ static bool should_print_transaction(struct jset_entry *entry, struct jset_entry
|
||||
{
|
||||
struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry);
|
||||
unsigned b = jset_entry_log_msg_bytes(l);
|
||||
bool have_log_messages = false;
|
||||
bool have_non_log_messages = false;
|
||||
|
||||
darray_for_each(msg_filter, i)
|
||||
if (!strncmp(*i, l->d, b))
|
||||
@ -100,9 +114,17 @@ static bool should_print_transaction(struct jset_entry *entry, struct jset_entry
|
||||
|
||||
for (entry = vstruct_next(entry);
|
||||
entry != end && !entry_is_transaction_start(entry);
|
||||
entry = vstruct_next(entry))
|
||||
if (entry_is_log_msg(entry) ||
|
||||
entry_matches_transaction_filter(entry, key_filter))
|
||||
entry = vstruct_next(entry)) {
|
||||
if (entry_matches_transaction_filter(entry, key_filter))
|
||||
return true;
|
||||
|
||||
if (entry_is_log_msg(entry))
|
||||
have_log_messages = true;
|
||||
else
|
||||
have_non_log_messages = true;
|
||||
}
|
||||
|
||||
if (have_log_messages && !have_non_log_messages)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@ -134,6 +156,7 @@ static void journal_entry_header_to_text(struct printbuf *out,
|
||||
prt_str(out, "blacklisted ");
|
||||
|
||||
prt_printf(out,
|
||||
"\n"
|
||||
"journal entry %llu\n"
|
||||
" version %u\n"
|
||||
" last seq %llu\n"
|
||||
@ -258,8 +281,7 @@ int cmd_list_journal(int argc, char *argv[])
|
||||
opt_set(opts, nochanges, true);
|
||||
opt_set(opts, norecovery, true);
|
||||
opt_set(opts, read_only, true);
|
||||
opt_set(opts, degraded, true);
|
||||
opt_set(opts, very_degraded, true);
|
||||
opt_set(opts, degraded, BCH_DEGRADED_very);
|
||||
opt_set(opts, errors, BCH_ON_ERROR_continue);
|
||||
opt_set(opts, fix_errors, FSCK_FIX_yes);
|
||||
opt_set(opts, retain_recovery_info ,true);
|
||||
|
@ -125,7 +125,7 @@ static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
|
||||
*bcachefs_inum = statbuf.st_ino;
|
||||
|
||||
if (fallocate(fd, 0, 0, size))
|
||||
die("Error reserving space for bcachefs metadata: %m");
|
||||
die("Error reserving space (%llu bytes) for bcachefs metadata: %m", size);
|
||||
|
||||
fsync(fd);
|
||||
|
||||
@ -147,7 +147,7 @@ static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
|
||||
range_add(&extents, e.fe_physical, e.fe_length);
|
||||
}
|
||||
fiemap_iter_exit(&iter);
|
||||
close(fd);
|
||||
xclose(fd);
|
||||
|
||||
ranges_sort_merge(&extents);
|
||||
return extents;
|
||||
@ -159,9 +159,9 @@ static void find_superblock_space(ranges extents,
|
||||
{
|
||||
darray_for_each(extents, i) {
|
||||
u64 start = round_up(max(256ULL << 10, i->start),
|
||||
dev->bucket_size << 9);
|
||||
dev->opts.bucket_size << 9);
|
||||
u64 end = round_down(i->end,
|
||||
dev->bucket_size << 9);
|
||||
dev->opts.bucket_size << 9);
|
||||
|
||||
/* Need space for two superblocks: */
|
||||
if (start + (opts.superblock_size << 9) * 2 <= end) {
|
||||
@ -209,38 +209,43 @@ static int migrate_fs(const char *fs_path,
|
||||
if (!S_ISDIR(stat.st_mode))
|
||||
die("%s is not a directory", fs_path);
|
||||
|
||||
struct dev_opts dev = dev_opts_default();
|
||||
dev_opts_list devs = {};
|
||||
darray_push(&devs, dev_opts_default());
|
||||
|
||||
dev.path = dev_t_to_path(stat.st_dev);
|
||||
dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
|
||||
struct dev_opts *dev = &devs.data[0];
|
||||
|
||||
int ret = PTR_ERR_OR_ZERO(dev.file);
|
||||
dev->path = dev_t_to_path(stat.st_dev);
|
||||
dev->file = bdev_file_open_by_path(dev->path, BLK_OPEN_READ|BLK_OPEN_WRITE, dev, NULL);
|
||||
|
||||
int ret = PTR_ERR_OR_ZERO(dev->file);
|
||||
if (ret < 0)
|
||||
die("Error opening device to format %s: %s", dev.path, strerror(-ret));
|
||||
dev.bdev = file_bdev(dev.file);
|
||||
die("Error opening device to format %s: %s", dev->path, strerror(-ret));
|
||||
dev->bdev = file_bdev(dev->file);
|
||||
|
||||
opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));
|
||||
opt_set(fs_opts, block_size, get_blocksize(dev->bdev->bd_fd));
|
||||
|
||||
char *file_path = mprintf("%s/bcachefs", fs_path);
|
||||
printf("Creating new filesystem on %s in space reserved at %s\n",
|
||||
dev.path, file_path);
|
||||
dev->path, file_path);
|
||||
|
||||
dev.size = get_size(dev.bdev->bd_fd);
|
||||
dev.bucket_size = bch2_pick_bucket_size(fs_opts, &dev);
|
||||
dev.nbuckets = dev.size / dev.bucket_size;
|
||||
dev->fs_size = get_size(dev->bdev->bd_fd);
|
||||
opt_set(dev->opts, bucket_size, bch2_pick_bucket_size(fs_opts, devs));
|
||||
|
||||
bch2_check_bucket_size(fs_opts, &dev);
|
||||
dev->nbuckets = dev->fs_size / dev->opts.bucket_size;
|
||||
|
||||
bch2_check_bucket_size(fs_opts, dev);
|
||||
|
||||
u64 bcachefs_inum;
|
||||
ranges extents = reserve_new_fs_space(file_path,
|
||||
fs_opts.block_size >> 9,
|
||||
get_size(dev.bdev->bd_fd) / 5,
|
||||
get_size(dev->bdev->bd_fd) / 10,
|
||||
&bcachefs_inum, stat.st_dev, force);
|
||||
|
||||
find_superblock_space(extents, format_opts, &dev);
|
||||
find_superblock_space(extents, format_opts, dev);
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs);
|
||||
darray_exit(&devs);
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs,
|
||||
fs_opts, format_opts, &dev, 1);
|
||||
u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
|
||||
|
||||
if (format_opts.passphrase)
|
||||
@ -248,16 +253,14 @@ static int migrate_fs(const char *fs_path,
|
||||
|
||||
free(sb);
|
||||
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_fs *c = NULL;
|
||||
char *path[1] = { dev.path };
|
||||
char *path[1] = { dev->path };
|
||||
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
opt_set(opts, sb, sb_offset);
|
||||
opt_set(opts, nostart, true);
|
||||
opt_set(opts, noexcl, true);
|
||||
opt_set(opts, nostart, true);
|
||||
|
||||
c = bch2_fs_open(path, 1, opts);
|
||||
struct bch_fs *c = bch2_fs_open(path, 1, opts);
|
||||
if (IS_ERR(c))
|
||||
die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
|
||||
|
||||
@ -265,10 +268,6 @@ static int migrate_fs(const char *fs_path,
|
||||
if (ret)
|
||||
die("Error allocating buckets_nouse: %s", bch2_err_str(ret));
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
if (IS_ERR(c))
|
||||
die("Error starting new filesystem: %s", bch2_err_str(ret));
|
||||
|
||||
mark_unreserved_space(c, extents);
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
@ -282,7 +281,12 @@ static int migrate_fs(const char *fs_path,
|
||||
.type = BCH_MIGRATE_migrate,
|
||||
};
|
||||
|
||||
copy_fs(c, fs_fd, fs_path, &s);
|
||||
|
||||
u64 reserve_start = roundup((format_opts.superblock_size * 2 + 8) << 9,
|
||||
bucket_bytes(c->devs[0]));
|
||||
BUG_ON(!reserve_start);
|
||||
|
||||
copy_fs(c, fs_fd, fs_path, &s, reserve_start);
|
||||
|
||||
bch2_fs_stop(c);
|
||||
|
||||
@ -310,7 +314,7 @@ static int migrate_fs(const char *fs_path,
|
||||
"filesystem. That file can be deleted once the old filesystem is\n"
|
||||
"no longer needed (and should be deleted prior to running\n"
|
||||
"bcachefs migrate-superblock)\n",
|
||||
sb_offset, dev.path, dev.path, sb_offset);
|
||||
sb_offset, dev->path, dev->path, sb_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -374,7 +378,7 @@ static void migrate_superblock_usage(void)
|
||||
int cmd_migrate_superblock(int argc, char *argv[])
|
||||
{
|
||||
char *dev = NULL;
|
||||
u64 offset = 0;
|
||||
u64 sb_offset = 0;
|
||||
int opt, ret;
|
||||
|
||||
while ((opt = getopt(argc, argv, "d:o:h")) != -1)
|
||||
@ -383,7 +387,7 @@ int cmd_migrate_superblock(int argc, char *argv[])
|
||||
dev = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
ret = kstrtou64(optarg, 10, &offset);
|
||||
ret = kstrtou64(optarg, 10, &sb_offset);
|
||||
if (ret)
|
||||
die("Invalid offset");
|
||||
break;
|
||||
@ -395,29 +399,72 @@ int cmd_migrate_superblock(int argc, char *argv[])
|
||||
if (!dev)
|
||||
die("Please specify a device");
|
||||
|
||||
if (!offset)
|
||||
if (!sb_offset)
|
||||
die("Please specify offset of existing superblock");
|
||||
|
||||
int fd = xopen(dev, O_RDWR);
|
||||
struct bch_sb *sb = __bch2_super_read(fd, offset);
|
||||
struct bch_sb *sb = __bch2_super_read(fd, sb_offset);
|
||||
unsigned sb_size = 1U << sb->layout.sb_max_size_bits;
|
||||
|
||||
if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
|
||||
die("Can't add superblock: no space left in superblock layout");
|
||||
|
||||
unsigned i;
|
||||
for (i = 0; i < sb->layout.nr_superblocks; i++)
|
||||
if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
|
||||
die("Superblock layout already has default superblock");
|
||||
for (unsigned i = 0; i < sb->layout.nr_superblocks; i++)
|
||||
if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR ||
|
||||
le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR + sb_size)
|
||||
die("Superblock layout already has default superblocks");
|
||||
|
||||
memmove(&sb->layout.sb_offset[1],
|
||||
memmove(&sb->layout.sb_offset[2],
|
||||
&sb->layout.sb_offset[0],
|
||||
sb->layout.nr_superblocks * sizeof(u64));
|
||||
sb->layout.nr_superblocks++;
|
||||
|
||||
sb->layout.nr_superblocks += 2;
|
||||
sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
|
||||
sb->layout.sb_offset[1] = cpu_to_le64(BCH_SB_SECTOR + sb_size);
|
||||
|
||||
/* also write first 0-3.5k bytes with zeroes, ensure we blow away old
|
||||
* superblock */
|
||||
static const char zeroes[BCH_SB_SECTOR << 9];
|
||||
xpwrite(fd, zeroes, BCH_SB_SECTOR << 9, 0, "zeroing start of disk");
|
||||
|
||||
bch2_super_write(fd, sb);
|
||||
close(fd);
|
||||
xclose(fd);
|
||||
|
||||
/* mark new superblocks */
|
||||
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
opt_set(opts, nostart, true);
|
||||
opt_set(opts, sb, sb_offset);
|
||||
|
||||
struct bch_fs *c = bch2_fs_open(&dev, 1, opts);
|
||||
ret = PTR_ERR_OR_ZERO(c) ?:
|
||||
bch2_buckets_nouse_alloc(c);
|
||||
if (ret)
|
||||
die("error opening filesystem: %s", bch2_err_str(ret));
|
||||
|
||||
struct bch_dev *ca = c->devs[0];
|
||||
for (u64 b = 0; bucket_to_sector(ca, b) < BCH_SB_SECTOR + sb_size * 2; b++)
|
||||
set_bit(b, ca->buckets_nouse);
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
if (ret)
|
||||
die("Error starting filesystem: %s", bch2_err_str(ret));
|
||||
|
||||
bch2_fs_stop(c);
|
||||
|
||||
opts = bch2_opts_empty();
|
||||
opt_set(opts, fsck, true);
|
||||
opt_set(opts, fix_errors, true);
|
||||
|
||||
/*
|
||||
* Hack: the free space counters are coming out wrong after marking the
|
||||
* new superblock, but it's just the device counters so it's
|
||||
* inconsequential:
|
||||
*/
|
||||
|
||||
c = bch2_fs_open(&dev, 1, opts);
|
||||
ret = PTR_ERR_OR_ZERO(c);
|
||||
if (ret)
|
||||
die("error opening filesystem: %s", bch2_err_str(ret));
|
||||
bch2_fs_stop(c);
|
||||
return 0;
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ static void set_option_usage(void)
|
||||
"Usage: bcachefs set-fs-option [OPTION].. device\n"
|
||||
"\n"
|
||||
"Options:\n");
|
||||
bch2_opts_usage(OPT_MOUNT);
|
||||
bch2_opts_usage(OPT_MOUNT|OPT_RUNTIME);
|
||||
puts(" -d, --dev-idx index for device specific options\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
@ -111,16 +111,16 @@ int cmd_set_option(int argc, char *argv[])
|
||||
if (!bch2_opt_defined_by_id(&new_opts, i))
|
||||
continue;
|
||||
|
||||
ret = bch2_opt_check_may_set(c, i, v);
|
||||
if (!(opt->flags & (OPT_FS|OPT_DEVICE)))
|
||||
fprintf(stderr, "Can't set option %s\n", opt->attr.name);
|
||||
|
||||
if (opt->flags & OPT_FS) {
|
||||
ret = bch2_opt_hook_pre_set(c, NULL, i, v);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(opt->flags & (OPT_FS|OPT_DEVICE)))
|
||||
fprintf(stderr, "Can't set option %s\n", opt->attr.name);
|
||||
|
||||
if (opt->flags & OPT_FS) {
|
||||
bch2_opt_set_sb(c, NULL, opt, v);
|
||||
}
|
||||
|
||||
@ -133,6 +133,12 @@ int cmd_set_option(int argc, char *argv[])
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = bch2_opt_hook_pre_set(c, ca, i, v);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
|
||||
continue;
|
||||
}
|
||||
|
||||
bch2_opt_set_sb(c, ca, opt, v);
|
||||
bch2_dev_put(ca);
|
||||
}
|
||||
|
19
c_src/cmds.h
19
c_src/cmds.h
@ -11,26 +11,17 @@
|
||||
|
||||
int cmd_format(int argc, char *argv[]);
|
||||
int cmd_show_super(int argc, char *argv[]);
|
||||
int cmd_recover_super(int argc, char *argv[]);
|
||||
int cmd_reset_counters(int argc, char *argv[]);
|
||||
int cmd_set_option(int argc, char *argv[]);
|
||||
|
||||
int fs_usage(void);
|
||||
int cmd_fs_usage(int argc, char *argv[]);
|
||||
int cmd_fs_top(int argc, char *argv[]);
|
||||
|
||||
int device_usage(void);
|
||||
int cmd_device_add(int argc, char *argv[]);
|
||||
int cmd_device_remove(int argc, char *argv[]);
|
||||
int cmd_device_online(int argc, char *argv[]);
|
||||
int cmd_device_offline(int argc, char *argv[]);
|
||||
int cmd_device_evacuate(int argc, char *argv[]);
|
||||
int cmd_device_set_state(int argc, char *argv[]);
|
||||
int cmd_device_resize(int argc, char *argv[]);
|
||||
int cmd_device_resize_journal(int argc, char *argv[]);
|
||||
int device_cmds(int argc, char *argv[]);
|
||||
|
||||
int data_usage(void);
|
||||
int cmd_data_rereplicate(int argc, char *argv[]);
|
||||
int cmd_data_scrub(int argc, char *argv[]);
|
||||
int cmd_data_job(int argc, char *argv[]);
|
||||
int data_cmds(int argc, char *argv[]);
|
||||
|
||||
int cmd_unlock(int argc, char *argv[]);
|
||||
int cmd_set_passphrase(int argc, char *argv[]);
|
||||
@ -57,9 +48,7 @@ int cmd_subvolume_snapshot(int argc, char *argv[]);
|
||||
int cmd_fusemount(int argc, char *argv[]);
|
||||
|
||||
void bcachefs_usage(void);
|
||||
int device_cmds(int argc, char *argv[]);
|
||||
int fs_cmds(int argc, char *argv[]);
|
||||
int data_cmds(int argc, char *argv[]);
|
||||
int subvolume_cmds(int argc, char *argv[]);
|
||||
|
||||
#endif /* _CMDS_H */
|
||||
|
@ -124,10 +124,7 @@ void bch2_passphrase_check(struct bch_sb *sb, const char *passphrase,
|
||||
|
||||
*passphrase_key = derive_passphrase(crypt, passphrase);
|
||||
|
||||
/* Check if the user supplied the correct passphrase: */
|
||||
if (bch2_chacha_encrypt_key(passphrase_key, __bch2_sb_key_nonce(sb),
|
||||
sb_key, sizeof(*sb_key)))
|
||||
die("error encrypting key");
|
||||
bch2_chacha20(passphrase_key, __bch2_sb_key_nonce(sb), sb_key, sizeof(*sb_key));
|
||||
|
||||
if (bch2_key_is_encrypted(sb_key))
|
||||
die("incorrect passphrase");
|
||||
@ -211,9 +208,7 @@ void bch_crypt_update_passphrase(
|
||||
|
||||
struct bch_key passphrase_key = derive_passphrase(crypt, new_passphrase);
|
||||
|
||||
if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(sb),
|
||||
&new_key, sizeof(new_key)))
|
||||
die("error encrypting key");
|
||||
bch2_chacha20(&passphrase_key, __bch2_sb_key_nonce(sb), &new_key, sizeof(new_key));
|
||||
|
||||
memzero_explicit(&passphrase_key, sizeof(passphrase_key));
|
||||
|
||||
|
@ -16,6 +16,9 @@
|
||||
|
||||
#include <uuid/uuid.h>
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include "libbcachefs.h"
|
||||
#include "crypto.h"
|
||||
#include "libbcachefs/bcachefs_format.h"
|
||||
@ -31,7 +34,7 @@
|
||||
|
||||
#define NSEC_PER_SEC 1000000000L
|
||||
|
||||
static void init_layout(struct bch_sb_layout *l,
|
||||
void bch2_sb_layout_init(struct bch_sb_layout *l,
|
||||
unsigned block_size,
|
||||
unsigned sb_size,
|
||||
u64 sb_start, u64 sb_end)
|
||||
@ -60,83 +63,88 @@ static void init_layout(struct bch_sb_layout *l,
|
||||
sb_start, sb_pos, sb_end, sb_size);
|
||||
}
|
||||
|
||||
/* minimum size filesystem we can create, given a bucket size: */
|
||||
static u64 min_size(unsigned bucket_size)
|
||||
static u64 dev_max_bucket_size(u64 dev_size)
|
||||
{
|
||||
return BCH_MIN_NR_NBUCKETS * bucket_size;
|
||||
return rounddown_pow_of_two(dev_size / (BCH_MIN_NR_NBUCKETS * 4));
|
||||
}
|
||||
|
||||
u64 bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev)
|
||||
u64 bch2_pick_bucket_size(struct bch_opts opts, dev_opts_list devs)
|
||||
{
|
||||
u64 bucket_size;
|
||||
|
||||
if (dev->size < min_size(opts.block_size))
|
||||
die("cannot format %s, too small (%llu bytes, min %llu)",
|
||||
dev->path, dev->size, min_size(opts.block_size));
|
||||
|
||||
/* Bucket size must be >= block size: */
|
||||
bucket_size = opts.block_size;
|
||||
u64 bucket_size = opts.block_size;
|
||||
|
||||
/* Bucket size must be >= btree node size: */
|
||||
if (opt_defined(opts, btree_node_size))
|
||||
bucket_size = max_t(unsigned, bucket_size,
|
||||
opts.btree_node_size);
|
||||
bucket_size = max_t(u64, bucket_size, opts.btree_node_size);
|
||||
|
||||
/* Want a bucket size of at least 128k, if possible: */
|
||||
bucket_size = max(bucket_size, 128ULL << 10);
|
||||
u64 min_dev_size = BCH_MIN_NR_NBUCKETS * bucket_size;
|
||||
darray_for_each(devs, i)
|
||||
if (i->fs_size < min_dev_size)
|
||||
die("cannot format %s, too small (%llu bytes, min %llu)",
|
||||
i->path, i->fs_size, min_dev_size);
|
||||
|
||||
if (dev->size >= min_size(bucket_size)) {
|
||||
unsigned scale = max(1,
|
||||
ilog2(dev->size / min_size(bucket_size)) / 4);
|
||||
u64 total_fs_size = 0;
|
||||
darray_for_each(devs, i)
|
||||
total_fs_size += i->fs_size;
|
||||
|
||||
scale = rounddown_pow_of_two(scale);
|
||||
struct sysinfo info;
|
||||
si_meminfo(&info);
|
||||
|
||||
/* max bucket size 1 mb */
|
||||
bucket_size = min(bucket_size * scale, 1ULL << 20);
|
||||
} else {
|
||||
do {
|
||||
bucket_size /= 2;
|
||||
} while (dev->size < min_size(bucket_size));
|
||||
}
|
||||
/*
|
||||
* Large fudge factor to allow for other fsck processes and devices
|
||||
* being added after creation
|
||||
*/
|
||||
u64 mem_available_for_fsck = info.totalram / 8;
|
||||
u64 buckets_can_fsck = mem_available_for_fsck / (sizeof(struct bucket) * 1.5);
|
||||
u64 mem_lower_bound = roundup_pow_of_two(total_fs_size / buckets_can_fsck);
|
||||
|
||||
/*
|
||||
* Lower bound to avoid fragmenting encoded (checksummed, compressed)
|
||||
* extents too much as they're moved:
|
||||
*/
|
||||
bucket_size = max(bucket_size, opt_get(opts, encoded_extent_max) * 4);
|
||||
|
||||
/* Lower bound to ensure we can fsck: */
|
||||
bucket_size = max(bucket_size, mem_lower_bound);
|
||||
|
||||
u64 perf_lower_bound = min(2ULL << 20, total_fs_size / (1ULL << 20));
|
||||
|
||||
/* We also prefer larger buckets for performance, up to 2MB at 2T */
|
||||
bucket_size = max(bucket_size, perf_lower_bound);
|
||||
|
||||
bucket_size = roundup_pow_of_two(bucket_size);
|
||||
|
||||
return bucket_size;
|
||||
}
|
||||
|
||||
void bch2_check_bucket_size(struct bch_opts opts, struct dev_opts *dev)
|
||||
{
|
||||
if (dev->bucket_size < opts.block_size)
|
||||
die("Bucket size (%llu) cannot be smaller than block size (%u)",
|
||||
dev->bucket_size, opts.block_size);
|
||||
if (dev->opts.bucket_size < opts.block_size)
|
||||
die("Bucket size (%u) cannot be smaller than block size (%u)",
|
||||
dev->opts.bucket_size, opts.block_size);
|
||||
|
||||
if (opt_defined(opts, btree_node_size) &&
|
||||
dev->bucket_size < opts.btree_node_size)
|
||||
die("Bucket size (%llu) cannot be smaller than btree node size (%u)",
|
||||
dev->bucket_size, opts.btree_node_size);
|
||||
dev->opts.bucket_size < opts.btree_node_size)
|
||||
die("Bucket size (%u) cannot be smaller than btree node size (%u)",
|
||||
dev->opts.bucket_size, opts.btree_node_size);
|
||||
|
||||
if (dev->nbuckets < BCH_MIN_NR_NBUCKETS)
|
||||
die("Not enough buckets: %llu, need %u (bucket size %llu)",
|
||||
dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->bucket_size);
|
||||
|
||||
if (dev->bucket_size > (u32) U16_MAX << 9)
|
||||
die("Bucket size (%llu) too big (max %u)",
|
||||
dev->bucket_size, (u32) U16_MAX << 9);
|
||||
die("Not enough buckets: %llu, need %u (bucket size %u)",
|
||||
dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->opts.bucket_size);
|
||||
}
|
||||
|
||||
static unsigned parse_target(struct bch_sb_handle *sb,
|
||||
struct dev_opts *devs, size_t nr_devs,
|
||||
dev_opts_list devs,
|
||||
const char *s)
|
||||
{
|
||||
struct dev_opts *i;
|
||||
int idx;
|
||||
|
||||
if (!s)
|
||||
return 0;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
darray_for_each(devs, i)
|
||||
if (!strcmp(s, i->path))
|
||||
return dev_to_target(i - devs);
|
||||
return dev_to_target(i - devs.data);
|
||||
|
||||
idx = bch2_disk_path_find(sb, s);
|
||||
int idx = bch2_disk_path_find(sb, s);
|
||||
if (idx >= 0)
|
||||
return group_to_target(idx);
|
||||
|
||||
@ -144,56 +152,61 @@ static unsigned parse_target(struct bch_sb_handle *sb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bch2_opt_set_sb_all(struct bch_sb *sb, int dev_idx, struct bch_opts *opts)
|
||||
{
|
||||
for (unsigned id = 0; id < bch2_opts_nr; id++) {
|
||||
u64 v = bch2_opt_defined_by_id(opts, id)
|
||||
? bch2_opt_get_by_id(opts, id)
|
||||
: bch2_opt_get_by_id(&bch2_opts_default, id);
|
||||
|
||||
__bch2_opt_set_sb(sb, dev_idx, &bch2_opt_table[id], v);
|
||||
}
|
||||
}
|
||||
|
||||
struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
struct bch_opts fs_opts,
|
||||
struct format_opts opts,
|
||||
struct dev_opts *devs,
|
||||
size_t nr_devs)
|
||||
dev_opts_list devs)
|
||||
{
|
||||
struct bch_sb_handle sb = { NULL };
|
||||
struct dev_opts *i;
|
||||
unsigned max_dev_block_size = 0;
|
||||
unsigned opt_id;
|
||||
u64 min_bucket_size = U64_MAX;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
darray_for_each(devs, i)
|
||||
max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd));
|
||||
|
||||
/* calculate block size: */
|
||||
if (!opt_defined(fs_opts, block_size)) {
|
||||
if (!opt_defined(fs_opts, block_size))
|
||||
opt_set(fs_opts, block_size, max_dev_block_size);
|
||||
} else if (fs_opts.block_size < max_dev_block_size)
|
||||
|
||||
if (fs_opts.block_size < max_dev_block_size)
|
||||
die("blocksize too small: %u, must be greater than device blocksize %u",
|
||||
fs_opts.block_size, max_dev_block_size);
|
||||
|
||||
/* get device size, if it wasn't specified: */
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
if (!i->size)
|
||||
i->size = get_size(i->bdev->bd_fd);
|
||||
darray_for_each(devs, i)
|
||||
if (!i->fs_size)
|
||||
i->fs_size = get_size(i->bdev->bd_fd);
|
||||
|
||||
/* calculate bucket sizes: */
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
min_bucket_size = min(min_bucket_size,
|
||||
i->bucket_size ?: bch2_pick_bucket_size(fs_opts, i));
|
||||
u64 fs_bucket_size = bch2_pick_bucket_size(fs_opts, devs);
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
if (!i->bucket_size)
|
||||
i->bucket_size = min_bucket_size;
|
||||
darray_for_each(devs, i)
|
||||
if (!opt_defined(i->opts, bucket_size))
|
||||
opt_set(i->opts, bucket_size,
|
||||
min(fs_bucket_size, dev_max_bucket_size(i->fs_size)));
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
i->nbuckets = i->size / i->bucket_size;
|
||||
darray_for_each(devs, i) {
|
||||
i->nbuckets = i->fs_size / i->opts.bucket_size;
|
||||
bch2_check_bucket_size(fs_opts, i);
|
||||
}
|
||||
|
||||
/* calculate btree node size: */
|
||||
if (!opt_defined(fs_opts, btree_node_size)) {
|
||||
/* 256k default btree node size */
|
||||
opt_set(fs_opts, btree_node_size, 256 << 10);
|
||||
unsigned s = bch2_opts_default.btree_node_size;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
fs_opts.btree_node_size =
|
||||
min_t(unsigned, fs_opts.btree_node_size,
|
||||
i->bucket_size);
|
||||
darray_for_each(devs, i)
|
||||
s = min(s, i->opts.bucket_size);
|
||||
opt_set(fs_opts, btree_node_size, s);
|
||||
}
|
||||
|
||||
if (uuid_is_null(opts.uuid.b))
|
||||
@ -206,7 +219,8 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
sb.sb->version_min = le16_to_cpu(opts.version);
|
||||
sb.sb->magic = BCHFS_MAGIC;
|
||||
sb.sb->user_uuid = opts.uuid;
|
||||
sb.sb->nr_devices = nr_devs;
|
||||
sb.sb->nr_devices = devs.nr;
|
||||
SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb.sb, opts.version);
|
||||
|
||||
if (opts.version == bcachefs_metadata_version_current)
|
||||
sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
|
||||
@ -218,17 +232,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
opts.label,
|
||||
min(strlen(opts.label), sizeof(sb.sb->label)));
|
||||
|
||||
for (opt_id = 0;
|
||||
opt_id < bch2_opts_nr;
|
||||
opt_id++) {
|
||||
u64 v;
|
||||
|
||||
v = bch2_opt_defined_by_id(&fs_opts, opt_id)
|
||||
? bch2_opt_get_by_id(&fs_opts, opt_id)
|
||||
: bch2_opt_get_by_id(&bch2_opts_default, opt_id);
|
||||
|
||||
__bch2_opt_set_sb(sb.sb, -1, &bch2_opt_table[opt_id], v);
|
||||
}
|
||||
bch2_opt_set_sb_all(sb.sb, -1, &fs_opts);
|
||||
|
||||
struct timespec now;
|
||||
if (clock_gettime(CLOCK_REALTIME, &now))
|
||||
@ -240,31 +244,25 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
/* Member info: */
|
||||
struct bch_sb_field_members_v2 *mi =
|
||||
bch2_sb_field_resize(&sb, members_v2,
|
||||
(sizeof(*mi) + sizeof(struct bch_member) *
|
||||
nr_devs) / sizeof(u64));
|
||||
(sizeof(*mi) + sizeof(struct bch_member) * devs.nr) / sizeof(u64));
|
||||
|
||||
mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
struct bch_member *m = bch2_members_v2_get_mut(sb.sb, (i - devs));
|
||||
darray_for_each(devs, i) {
|
||||
unsigned idx = i - devs.data;
|
||||
struct bch_member *m = bch2_members_v2_get_mut(sb.sb, idx);
|
||||
|
||||
uuid_generate(m->uuid.b);
|
||||
m->nbuckets = cpu_to_le64(i->nbuckets);
|
||||
m->first_bucket = 0;
|
||||
m->bucket_size = cpu_to_le16(i->bucket_size >> 9);
|
||||
|
||||
SET_BCH_MEMBER_DISCARD(m, i->discard);
|
||||
SET_BCH_MEMBER_DATA_ALLOWED(m, i->data_allowed);
|
||||
SET_BCH_MEMBER_DURABILITY(m, i->durability + 1);
|
||||
bch2_opt_set_sb_all(sb.sb, idx, &i->opts);
|
||||
}
|
||||
|
||||
/* Disk labels*/
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
struct bch_member *m;
|
||||
int idx;
|
||||
|
||||
darray_for_each(devs, i) {
|
||||
if (!i->label)
|
||||
continue;
|
||||
|
||||
idx = bch2_disk_path_find_or_create(&sb, i->label);
|
||||
int idx = bch2_disk_path_find_or_create(&sb, i->label);
|
||||
if (idx < 0)
|
||||
die("error creating disk path: %s", strerror(-idx));
|
||||
|
||||
@ -272,18 +270,18 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
* Recompute mi and m after each sb modification: its location
|
||||
* in memory may have changed due to reallocation.
|
||||
*/
|
||||
m = bch2_members_v2_get_mut(sb.sb, (i - devs));
|
||||
struct bch_member *m = bch2_members_v2_get_mut(sb.sb, (i - devs.data));
|
||||
SET_BCH_MEMBER_GROUP(m, idx + 1);
|
||||
}
|
||||
|
||||
SET_BCH_SB_FOREGROUND_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.foreground_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.foreground_target));
|
||||
SET_BCH_SB_BACKGROUND_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.background_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.background_target));
|
||||
SET_BCH_SB_PROMOTE_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.promote_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.promote_target));
|
||||
SET_BCH_SB_METADATA_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.metadata_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.metadata_target));
|
||||
|
||||
/* Crypt: */
|
||||
if (opts.encrypted) {
|
||||
@ -296,17 +294,17 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
|
||||
bch2_sb_members_cpy_v2_v1(&sb);
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
u64 size_sectors = i->size >> 9;
|
||||
darray_for_each(devs, i) {
|
||||
u64 size_sectors = i->fs_size >> 9;
|
||||
|
||||
sb.sb->dev_idx = i - devs;
|
||||
sb.sb->dev_idx = i - devs.data;
|
||||
|
||||
if (!i->sb_offset) {
|
||||
i->sb_offset = BCH_SB_SECTOR;
|
||||
i->sb_end = size_sectors;
|
||||
}
|
||||
|
||||
init_layout(&sb.sb->layout, fs_opts.block_size,
|
||||
bch2_sb_layout_init(&sb.sb->layout, fs_opts.block_size,
|
||||
opts.superblock_size,
|
||||
i->sb_offset, i->sb_end);
|
||||
|
||||
@ -321,7 +319,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
struct bch_sb_layout *l = &sb.sb->layout;
|
||||
u64 backup_sb = size_sectors - (1 << l->sb_max_size_bits);
|
||||
|
||||
backup_sb = rounddown(backup_sb, i->bucket_size >> 9);
|
||||
backup_sb = rounddown(backup_sb, i->opts.bucket_size >> 9);
|
||||
l->sb_offset[l->nr_superblocks++] = cpu_to_le64(backup_sb);
|
||||
}
|
||||
|
||||
@ -334,7 +332,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
}
|
||||
|
||||
bch2_super_write(i->bdev->bd_fd, sb.sb);
|
||||
close(i->bdev->bd_fd);
|
||||
xclose(i->bdev->bd_fd);
|
||||
}
|
||||
|
||||
return sb.sb;
|
||||
@ -352,9 +350,9 @@ void bch2_super_write(int fd, struct bch_sb *sb)
|
||||
if (sb->offset == BCH_SB_SECTOR) {
|
||||
/* Write backup layout */
|
||||
|
||||
BUG_ON(bs > 4096);
|
||||
unsigned buflen = max(bs, 4096);
|
||||
|
||||
char *buf = aligned_alloc(bs, bs);
|
||||
char *buf = aligned_alloc(buflen, buflen);
|
||||
xpread(fd, buf, bs, 4096 - bs);
|
||||
memcpy(buf + bs - sizeof(sb->layout),
|
||||
&sb->layout,
|
||||
@ -407,16 +405,16 @@ int bcachectl_open(void)
|
||||
|
||||
void bcache_fs_close(struct bchfs_handle fs)
|
||||
{
|
||||
close(fs.ioctl_fd);
|
||||
close(fs.sysfs_fd);
|
||||
xclose(fs.ioctl_fd);
|
||||
xclose(fs.sysfs_fd);
|
||||
}
|
||||
|
||||
static int bcache_fs_open_by_uuid(const char *uuid_str, struct bchfs_handle *fs)
|
||||
static int bcache_fs_open_by_name(const char *name, struct bchfs_handle *fs)
|
||||
{
|
||||
if (uuid_parse(uuid_str, fs->uuid.b))
|
||||
return -1;
|
||||
if (uuid_parse(name, fs->uuid.b))
|
||||
memset(&fs->uuid, 0, sizeof(fs->uuid));
|
||||
|
||||
char *sysfs = mprintf(SYSFS_BASE "%s", uuid_str);
|
||||
char *sysfs = mprintf(SYSFS_BASE "%s", name);
|
||||
fs->sysfs_fd = open(sysfs, O_RDONLY);
|
||||
free(sysfs);
|
||||
|
||||
@ -432,13 +430,21 @@ static int bcache_fs_open_by_uuid(const char *uuid_str, struct bchfs_handle *fs)
|
||||
return fs->ioctl_fd < 0 ? -errno : 0;
|
||||
}
|
||||
|
||||
#ifndef FS_IOC_GETFSSYSFSPATH
|
||||
struct fs_sysfs_path {
|
||||
__u8 len;
|
||||
__u8 name[128];
|
||||
};
|
||||
#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path)
|
||||
#endif
|
||||
|
||||
int bcache_fs_open_fallible(const char *path, struct bchfs_handle *fs)
|
||||
{
|
||||
memset(fs, 0, sizeof(*fs));
|
||||
fs->dev_idx = -1;
|
||||
|
||||
if (!uuid_parse(path, fs->uuid.b))
|
||||
return bcache_fs_open_by_uuid(path, fs);
|
||||
return bcache_fs_open_by_name(path, fs);
|
||||
|
||||
/* It's a path: */
|
||||
int path_fd = open(path, O_RDONLY);
|
||||
@ -452,12 +458,19 @@ int bcache_fs_open_fallible(const char *path, struct bchfs_handle *fs)
|
||||
|
||||
fs->uuid = uuid.uuid;
|
||||
|
||||
struct fs_sysfs_path fs_sysfs_path;
|
||||
if (!ioctl(path_fd, FS_IOC_GETFSSYSFSPATH, &fs_sysfs_path)) {
|
||||
char *sysfs = mprintf("/sys/fs/%s", fs_sysfs_path.name);
|
||||
fs->sysfs_fd = xopen(sysfs, O_RDONLY);
|
||||
free(sysfs);
|
||||
} else {
|
||||
char uuid_str[40];
|
||||
uuid_unparse(uuid.uuid.b, uuid_str);
|
||||
|
||||
char *sysfs = mprintf(SYSFS_BASE "%s", uuid_str);
|
||||
fs->sysfs_fd = xopen(sysfs, O_RDONLY);
|
||||
free(sysfs);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -465,7 +478,7 @@ int bcache_fs_open_fallible(const char *path, struct bchfs_handle *fs)
|
||||
char buf[1024], *uuid_str;
|
||||
|
||||
struct stat stat = xstat(path);
|
||||
close(path_fd);
|
||||
xclose(path_fd);
|
||||
|
||||
if (S_ISBLK(stat.st_mode)) {
|
||||
char *sysfs = mprintf("/sys/dev/block/%u:%u/bcachefs",
|
||||
@ -502,7 +515,7 @@ read_super:
|
||||
bch2_free_super(&sb);
|
||||
}
|
||||
|
||||
return bcache_fs_open_by_uuid(uuid_str, fs);
|
||||
return bcache_fs_open_by_name(uuid_str, fs);
|
||||
}
|
||||
|
||||
struct bchfs_handle bcache_fs_open(const char *path)
|
||||
@ -521,14 +534,14 @@ struct bchfs_handle bcache_fs_open(const char *path)
|
||||
struct bchfs_handle bchu_fs_open_by_dev(const char *path, int *idx)
|
||||
{
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
char buf[1024], *uuid_str;
|
||||
char buf[1024], *fs_str;
|
||||
|
||||
struct stat stat = xstat(path);
|
||||
|
||||
if (S_ISBLK(stat.st_mode)) {
|
||||
char *sysfs = mprintf("/sys/dev/block/%u:%u/bcachefs",
|
||||
major(stat.st_dev),
|
||||
minor(stat.st_dev));
|
||||
major(stat.st_rdev),
|
||||
minor(stat.st_rdev));
|
||||
|
||||
ssize_t len = readlink(sysfs, buf, sizeof(buf));
|
||||
free(sysfs);
|
||||
@ -536,13 +549,19 @@ struct bchfs_handle bchu_fs_open_by_dev(const char *path, int *idx)
|
||||
if (len <= 0)
|
||||
goto read_super;
|
||||
|
||||
char *p = strrchr(buf, '/');
|
||||
if (!p || sscanf(p + 1, "dev-%u", idx) != 1)
|
||||
fs_str = strstr(buf, "bcachefs/");
|
||||
if (!fs_str)
|
||||
die("error parsing sysfs");
|
||||
|
||||
*p = '\0';
|
||||
p = strrchr(buf, '/');
|
||||
uuid_str = p + 1;
|
||||
fs_str += 9;
|
||||
char *dev_str = strchr(fs_str, '/');
|
||||
if (!dev_str)
|
||||
die("error parsing sysfs");
|
||||
|
||||
*dev_str = '\0';
|
||||
dev_str++;
|
||||
if (sscanf(dev_str, "dev-%u", idx) != 1)
|
||||
die("error parsing sysfs");
|
||||
} else {
|
||||
read_super:
|
||||
opt_set(opts, noexcl, true);
|
||||
@ -554,13 +573,18 @@ read_super:
|
||||
die("Error opening %s: %s", path, strerror(-ret));
|
||||
|
||||
*idx = sb.sb->dev_idx;
|
||||
uuid_str = buf;
|
||||
uuid_unparse(sb.sb->user_uuid.b, uuid_str);
|
||||
fs_str = buf;
|
||||
uuid_unparse(sb.sb->user_uuid.b, fs_str);
|
||||
|
||||
bch2_free_super(&sb);
|
||||
}
|
||||
|
||||
return bcache_fs_open(uuid_str);
|
||||
struct bchfs_handle fs;
|
||||
int ret = bcache_fs_open_by_name(fs_str, &fs);
|
||||
if (ret)
|
||||
die("Error opening filesystem at %s (%s): %s",
|
||||
path, fs_str, strerror(-ret));
|
||||
return fs;
|
||||
}
|
||||
|
||||
int bchu_dev_path_to_idx(struct bchfs_handle fs, const char *dev_path)
|
||||
@ -612,12 +636,14 @@ int bchu_data(struct bchfs_handle fs, struct bch_ioctl_data cmd)
|
||||
}
|
||||
printf("\nDone\n");
|
||||
|
||||
close(progress_fd);
|
||||
xclose(progress_fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* option parsing */
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
void bch2_opt_strs_free(struct bch_opt_strs *opts)
|
||||
{
|
||||
unsigned i;
|
||||
@ -628,6 +654,64 @@ void bch2_opt_strs_free(struct bch_opt_strs *opts)
|
||||
}
|
||||
}
|
||||
|
||||
static bool opt_type_filter(const struct bch_option *opt, unsigned opt_types)
|
||||
{
|
||||
if (!(opt->flags & opt_types))
|
||||
return false;
|
||||
|
||||
if ((opt_types & OPT_FORMAT) &&
|
||||
!opt->set_sb && !opt->set_member)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const struct bch_option *bch2_cmdline_opt_parse(int argc, char *argv[],
|
||||
unsigned opt_types)
|
||||
{
|
||||
if (optind >= argc)
|
||||
return NULL;
|
||||
|
||||
if (argv[optind][0] != '-' ||
|
||||
argv[optind][1] != '-')
|
||||
return NULL;
|
||||
|
||||
char *optstr = strdup(argv[optind] + 2);
|
||||
optarg = argv[optind + 1];
|
||||
|
||||
char *eq = strchr(optstr, '=');
|
||||
if (eq) {
|
||||
*eq = '\0';
|
||||
optarg = eq + 1;
|
||||
}
|
||||
|
||||
if (!optarg)
|
||||
optarg = "1";
|
||||
|
||||
|
||||
int optid = bch2_opt_lookup(optstr);
|
||||
if (optid < 0)
|
||||
goto noopt;
|
||||
|
||||
const struct bch_option *opt = bch2_opt_table + optid;
|
||||
if (!opt_type_filter(opt, opt_types))
|
||||
goto noopt;
|
||||
|
||||
optind++;
|
||||
|
||||
if (opt->type != BCH_OPT_BOOL) {
|
||||
if (optarg == argv[optind])
|
||||
optind++;
|
||||
} else {
|
||||
optarg = NULL;
|
||||
}
|
||||
|
||||
return opt;
|
||||
noopt:
|
||||
free(optstr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct bch_opt_strs bch2_cmdline_opts_get(int *argc, char *argv[],
|
||||
unsigned opt_types)
|
||||
{
|
||||
@ -722,12 +806,10 @@ void bch2_opts_usage(unsigned opt_types)
|
||||
const struct bch_option *opt;
|
||||
unsigned i, c = 0, helpcol = 30;
|
||||
|
||||
|
||||
|
||||
for (opt = bch2_opt_table;
|
||||
opt < bch2_opt_table + bch2_opts_nr;
|
||||
opt++) {
|
||||
if (!(opt->flags & opt_types))
|
||||
if (!opt_type_filter(opt, opt_types))
|
||||
continue;
|
||||
|
||||
c += printf(" --%s", opt->attr.name);
|
||||
@ -777,7 +859,7 @@ void bch2_opts_usage(unsigned opt_types)
|
||||
|
||||
dev_names bchu_fs_get_devices(struct bchfs_handle fs)
|
||||
{
|
||||
DIR *dir = fdopendir(fs.sysfs_fd);
|
||||
DIR *dir = fdopendir(dup(fs.sysfs_fd));
|
||||
struct dirent *d;
|
||||
dev_names devs;
|
||||
|
||||
|
@ -28,6 +28,9 @@ struct {
|
||||
};
|
||||
|
||||
void bch2_opt_strs_free(struct bch_opt_strs *);
|
||||
|
||||
const struct bch_option *bch2_cmdline_opt_parse(int argc, char *argv[],
|
||||
unsigned opt_types);
|
||||
struct bch_opt_strs bch2_cmdline_opts_get(int *, char *[], unsigned);
|
||||
struct bch_opts bch2_parse_opts(struct bch_opt_strs);
|
||||
void bch2_opts_usage(unsigned);
|
||||
@ -42,14 +45,24 @@ struct format_opts {
|
||||
char *source;
|
||||
};
|
||||
|
||||
static inline unsigned bcachefs_kernel_version(void)
|
||||
{
|
||||
return !access("/sys/module/bcachefs/parameters/version", R_OK)
|
||||
? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
|
||||
: 0;
|
||||
}
|
||||
|
||||
static inline struct format_opts format_opts_default()
|
||||
{
|
||||
unsigned version = !access( "/sys/module/bcachefs/parameters/version", R_OK)
|
||||
? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
|
||||
: bcachefs_metadata_version_current;
|
||||
/*
|
||||
* Ensure bcachefs module is loaded so we know the supported on disk
|
||||
* format version:
|
||||
*/
|
||||
system("modprobe bcachefs > /dev/null 2>&1");
|
||||
|
||||
return (struct format_opts) {
|
||||
.version = version,
|
||||
.version = bcachefs_kernel_version() ?:
|
||||
bcachefs_metadata_version_current,
|
||||
.superblock_size = SUPERBLOCK_SIZE_DEFAULT,
|
||||
};
|
||||
}
|
||||
@ -58,33 +71,35 @@ struct dev_opts {
|
||||
struct file *file;
|
||||
struct block_device *bdev;
|
||||
char *path;
|
||||
u64 size; /* bytes*/
|
||||
u64 bucket_size; /* bytes */
|
||||
const char *label;
|
||||
unsigned data_allowed;
|
||||
unsigned durability;
|
||||
bool discard;
|
||||
|
||||
u64 nbuckets;
|
||||
|
||||
u64 sb_offset;
|
||||
u64 sb_end;
|
||||
|
||||
u64 nbuckets;
|
||||
u64 fs_size;
|
||||
|
||||
const char *label; /* make this a bch_opt */
|
||||
|
||||
struct bch_opts opts;
|
||||
};
|
||||
|
||||
typedef DARRAY(struct dev_opts) dev_opts_list;
|
||||
|
||||
static inline struct dev_opts dev_opts_default()
|
||||
{
|
||||
return (struct dev_opts) {
|
||||
.data_allowed = ~0U << 2,
|
||||
.durability = 1,
|
||||
};
|
||||
return (struct dev_opts) { .opts = bch2_opts_empty() };
|
||||
}
|
||||
|
||||
u64 bch2_pick_bucket_size(struct bch_opts, struct dev_opts *);
|
||||
void bch2_sb_layout_init(struct bch_sb_layout *,
|
||||
unsigned, unsigned, u64, u64);
|
||||
|
||||
u64 bch2_pick_bucket_size(struct bch_opts, dev_opts_list);
|
||||
void bch2_check_bucket_size(struct bch_opts, struct dev_opts *);
|
||||
|
||||
struct bch_sb *bch2_format(struct bch_opt_strs,
|
||||
struct bch_opts,
|
||||
struct format_opts, struct dev_opts *, size_t);
|
||||
struct format_opts,
|
||||
dev_opts_list devs);
|
||||
|
||||
void bch2_super_write(int, struct bch_sb *);
|
||||
struct bch_sb *__bch2_super_read(int, u64);
|
||||
|
@ -1,13 +1,14 @@
|
||||
#include <dirent.h>
|
||||
#include <sys/xattr.h>
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/xattr.h>
|
||||
|
||||
#include "posix_to_bcachefs.h"
|
||||
#include "libbcachefs/alloc_foreground.h"
|
||||
#include "libbcachefs/buckets.h"
|
||||
#include "libbcachefs/fs-common.h"
|
||||
#include "libbcachefs/io_write.h"
|
||||
#include "libbcachefs/namei.h"
|
||||
#include "libbcachefs/str_hash.h"
|
||||
#include "libbcachefs/xattr.h"
|
||||
|
||||
@ -159,7 +160,7 @@ static void write_data(struct bch_fs *c,
|
||||
op.nr_replicas = 1;
|
||||
op.subvol = 1;
|
||||
op.pos = SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX);
|
||||
op.flags |= BCH_WRITE_sync;
|
||||
op.flags |= BCH_WRITE_sync|BCH_WRITE_only_specified_devs;
|
||||
|
||||
int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
|
||||
c->opts.data_replicas, 0);
|
||||
@ -264,7 +265,8 @@ void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
|
||||
static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
int src_fd, u64 src_size,
|
||||
char *src_path, struct copy_fs_state *s)
|
||||
char *src_path, struct copy_fs_state *s,
|
||||
u64 reserve_start)
|
||||
{
|
||||
struct fiemap_iter iter;
|
||||
struct fiemap_extent e;
|
||||
@ -295,11 +297,8 @@ static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* if the data is below 1 MB, copy it so it doesn't conflict
|
||||
* with bcachefs's potentially larger superblock:
|
||||
*/
|
||||
if (e.fe_physical < 1 << 20) {
|
||||
/* If the data is in bcachefs's superblock region, copy it: */
|
||||
if (e.fe_physical < reserve_start) {
|
||||
copy_data(c, dst, src_fd, e.fe_logical,
|
||||
e.fe_logical + min(src_size - e.fe_logical,
|
||||
e.fe_length));
|
||||
@ -315,20 +314,38 @@ static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
fiemap_iter_exit(&iter);
|
||||
}
|
||||
|
||||
static int dirent_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct dirent *l = _l;
|
||||
const struct dirent *r = _r;
|
||||
|
||||
return strcmp(l->d_name, r->d_name);
|
||||
}
|
||||
|
||||
static void copy_dir(struct copy_fs_state *s,
|
||||
struct bch_fs *c,
|
||||
struct bch_inode_unpacked *dst,
|
||||
int src_fd, const char *src_path)
|
||||
int src_fd, const char *src_path,
|
||||
u64 reserve_start)
|
||||
{
|
||||
DIR *dir = fdopendir(src_fd);
|
||||
struct dirent *d;
|
||||
DARRAY(struct dirent) dirents = {};
|
||||
|
||||
while ((errno = 0), (d = readdir(dir))) {
|
||||
while ((errno = 0), (d = readdir(dir)))
|
||||
darray_push(&dirents, *d);
|
||||
|
||||
if (errno)
|
||||
die("readdir error: %m");
|
||||
|
||||
sort(dirents.data, dirents.nr, sizeof(dirents.data[0]), dirent_cmp, NULL);
|
||||
|
||||
darray_for_each(dirents, d) {
|
||||
struct bch_inode_unpacked inode;
|
||||
int fd;
|
||||
|
||||
if (fchdir(src_fd))
|
||||
die("chdir error: %m");
|
||||
die("fchdir error: %m");
|
||||
|
||||
struct stat stat =
|
||||
xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
|
||||
@ -369,16 +386,16 @@ static void copy_dir(struct copy_fs_state *s,
|
||||
switch (mode_to_type(stat.st_mode)) {
|
||||
case DT_DIR:
|
||||
fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
|
||||
copy_dir(s, c, &inode, fd, child_path);
|
||||
close(fd);
|
||||
copy_dir(s, c, &inode, fd, child_path, reserve_start);
|
||||
xclose(fd);
|
||||
break;
|
||||
case DT_REG:
|
||||
inode.bi_size = stat.st_size;
|
||||
|
||||
fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
|
||||
copy_file(c, &inode, fd, stat.st_size,
|
||||
child_path, s);
|
||||
close(fd);
|
||||
child_path, s, reserve_start);
|
||||
xclose(fd);
|
||||
break;
|
||||
case DT_LNK:
|
||||
inode.bi_size = stat.st_size;
|
||||
@ -402,14 +419,13 @@ next:
|
||||
free(child_path);
|
||||
}
|
||||
|
||||
if (errno)
|
||||
die("readdir error: %m");
|
||||
closedir(dir);
|
||||
darray_exit(&dirents);
|
||||
}
|
||||
|
||||
static void reserve_old_fs_space(struct bch_fs *c,
|
||||
struct bch_inode_unpacked *root_inode,
|
||||
ranges *extents)
|
||||
ranges *extents,
|
||||
u64 reserve_start)
|
||||
{
|
||||
struct bch_dev *ca = c->devs[0];
|
||||
struct bch_inode_unpacked dst;
|
||||
@ -422,15 +438,25 @@ static void reserve_old_fs_space(struct bch_fs *c,
|
||||
|
||||
ranges_sort_merge(extents);
|
||||
|
||||
for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
|
||||
link_data(c, &dst, i.start, i.start, i.end - i.start);
|
||||
for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
|
||||
if (i.end <= reserve_start)
|
||||
continue;
|
||||
|
||||
u64 start = max(i.start, reserve_start);
|
||||
|
||||
link_data(c, &dst, start, start, i.end - start);
|
||||
}
|
||||
|
||||
update_inode(c, &dst);
|
||||
}
|
||||
|
||||
void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
struct copy_fs_state *s)
|
||||
struct copy_fs_state *s, u64 reserve_start)
|
||||
{
|
||||
if (!S_ISDIR(xfstat(src_fd).st_mode))
|
||||
die("%s is not a directory", src_path);
|
||||
|
||||
if (s->type == BCH_MIGRATE_migrate)
|
||||
syncfs(src_fd);
|
||||
|
||||
struct bch_inode_unpacked root_inode;
|
||||
@ -440,23 +466,20 @@ void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
die("error looking up root directory: %s", bch2_err_str(ret));
|
||||
|
||||
if (fchdir(src_fd))
|
||||
die("chdir error: %m");
|
||||
die("fchdir error: %m");
|
||||
|
||||
struct stat stat = xfstat(src_fd);
|
||||
copy_times(c, &root_inode, &stat);
|
||||
copy_xattrs(c, &root_inode, ".");
|
||||
|
||||
|
||||
/* now, copy: */
|
||||
copy_dir(s, c, &root_inode, src_fd, src_path);
|
||||
copy_dir(s, c, &root_inode, src_fd, src_path, reserve_start);
|
||||
|
||||
if (BCH_MIGRATE_migrate == s->type)
|
||||
reserve_old_fs_space(c, &root_inode, &s->extents);
|
||||
if (s->type == BCH_MIGRATE_migrate)
|
||||
reserve_old_fs_space(c, &root_inode, &s->extents, reserve_start);
|
||||
|
||||
update_inode(c, &root_inode);
|
||||
|
||||
if (BCH_MIGRATE_migrate == s->type)
|
||||
darray_exit(&s->extents);
|
||||
|
||||
genradix_free(&s->hardlinks);
|
||||
}
|
||||
|
@ -50,5 +50,5 @@ struct copy_fs_state {
|
||||
* initialized (`hardlinks` is initialized with zeroes).
|
||||
*/
|
||||
void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
struct copy_fs_state *s);
|
||||
struct copy_fs_state *s, u64);
|
||||
#endif /* _LIBBCACHE_H */
|
||||
|
@ -107,7 +107,7 @@ void write_file_str(int dirfd, const char *path, const char *str)
|
||||
wrote = write(fd, str, len);
|
||||
if (wrote != len)
|
||||
die("read error: %m");
|
||||
close(fd);
|
||||
xclose(fd);
|
||||
}
|
||||
|
||||
char *read_file_str(int dirfd, const char *path)
|
||||
@ -129,7 +129,7 @@ char *read_file_str(int dirfd, const char *path)
|
||||
buf = NULL;
|
||||
}
|
||||
|
||||
close(fd);
|
||||
xclose(fd);
|
||||
|
||||
return buf;
|
||||
}
|
||||
@ -761,3 +761,14 @@ darray_str get_or_split_cmdline_devs(int argc, char *argv[])
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
char *pop_cmd(int *argc, char *argv[])
|
||||
{
|
||||
char *cmd = argv[1];
|
||||
if (!(*argc < 2))
|
||||
memmove(&argv[1], &argv[2], (*argc - 2) * sizeof(argv[0]));
|
||||
(*argc)--;
|
||||
argv[*argc] = NULL;
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
@ -82,6 +82,12 @@ static inline void *xrealloc(void *p, size_t size)
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define xclose(_fd) \
|
||||
do { \
|
||||
if (close(_fd)) \
|
||||
die("error closing fd: %m at %s:%u", __FILE__, __LINE__);\
|
||||
} while (0)
|
||||
|
||||
void write_file_str(int, const char *, const char *);
|
||||
char *read_file_str(int, const char *);
|
||||
u64 read_file_u64(int, const char *);
|
||||
@ -213,4 +219,6 @@ unsigned version_parse(char *);
|
||||
|
||||
darray_str get_or_split_cmdline_devs(int argc, char *argv[]);
|
||||
|
||||
char *pop_cmd(int *argc, char *argv[]);
|
||||
|
||||
#endif /* _TOOLS_UTIL_H */
|
||||
|
17
debian/changelog
vendored
17
debian/changelog
vendored
@ -1,3 +1,20 @@
|
||||
bcachefs-tools (1:1.25.2-1) unstable; urgency=medium
|
||||
|
||||
* don't pick a non power of two bucket size
|
||||
* pick_bucket_size() was picking non blocksize aligned bucket sizes, which
|
||||
uncovered a few bugs.
|
||||
|
||||
-- Kent Overstreet <kent.overstreet@linux.dev> Sat, 19 Apr 2025 00:24:19 +0300
|
||||
|
||||
bcachefs-tools (1:1.25.1-1) unstable; urgency=medium
|
||||
|
||||
* bcachefs migrate now works
|
||||
* bcachefs list-journal transaction filter fixes
|
||||
* bcachefs format now loads the bcachefs module, so that we can correctly
|
||||
* detect the version supported by the running kernel
|
||||
|
||||
-- Kent Overstreet <kent.overstreet@linux.dev> Tue, 01 Apr 2025 13:06:37 -0400
|
||||
|
||||
bcachefs-tools (1:1.9.1-1) unstable; urgency=medium
|
||||
|
||||
* New upstream release
|
||||
|
117
flake.lock
generated
117
flake.lock
generated
@ -1,17 +1,12 @@
|
||||
{
|
||||
"nodes": {
|
||||
"crane": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1721842668,
|
||||
"narHash": "sha256-k3oiD2z2AAwBFLa4+xfU+7G5fisRXfkvrMTCJrjZzXo=",
|
||||
"lastModified": 1742394900,
|
||||
"narHash": "sha256-vVOAp9ahvnU+fQoKd4SEXB2JG2wbENkpqcwlkIXgUC0=",
|
||||
"owner": "ipetkov",
|
||||
"repo": "crane",
|
||||
"rev": "529c1a0b1f29f0d78fa3086b8f6a134c71ef3aaf",
|
||||
"rev": "70947c1908108c0c551ddfd73d4f750ff2ea67cd",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -20,35 +15,14 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"fenix": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
],
|
||||
"rust-analyzer-src": "rust-analyzer-src"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722320953,
|
||||
"narHash": "sha256-DfGaJtgrzcwPQYLTvjL1KaVIjpvi85b2MpM6yEGvJzM=",
|
||||
"owner": "nix-community",
|
||||
"repo": "fenix",
|
||||
"rev": "483df76def3e5010d709aa3a0418ba2088503994",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-community",
|
||||
"repo": "fenix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-compat": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1696426674,
|
||||
"narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
|
||||
"lastModified": 1733328505,
|
||||
"narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=",
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
|
||||
"rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -62,11 +36,11 @@
|
||||
"nixpkgs-lib": "nixpkgs-lib"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1719994518,
|
||||
"narHash": "sha256-pQMhCCHyQGRzdfAkdJ4cIWiw+JNuWsTX7f0ZYSyz0VY=",
|
||||
"lastModified": 1741352980,
|
||||
"narHash": "sha256-+u2UunDA4Cl5Fci3m7S643HzKmIDAe+fiXrLqYsR2fs=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "flake-parts",
|
||||
"rev": "9227223f6d922fee3c7b190b2cc238a99527bbb7",
|
||||
"rev": "f4330d22f1c5d2ba72d3d22df5597d123fdb60a9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -75,13 +49,33 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nix-github-actions": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1737420293,
|
||||
"narHash": "sha256-F1G5ifvqTpJq7fdkT34e/Jy9VCyzd5XfJ9TO8fHhJWE=",
|
||||
"owner": "nix-community",
|
||||
"repo": "nix-github-actions",
|
||||
"rev": "f4158fa080ef4503c8f4c820967d946c2af31ec9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-community",
|
||||
"repo": "nix-github-actions",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1722185531,
|
||||
"narHash": "sha256-veKR07psFoJjINLC8RK4DiLniGGMgF3QMlS4tb74S6k=",
|
||||
"lastModified": 1742422364,
|
||||
"narHash": "sha256-mNqIplmEohk5jRkqYqG19GA8MbQ/D4gQSK0Mu4LvfRQ=",
|
||||
"owner": "nixos",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "52ec9ac3b12395ad677e8b62106f0b98c1f8569d",
|
||||
"rev": "a84ebe20c6bc2ecbcfb000a50776219f48d134cc",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -93,40 +87,47 @@
|
||||
},
|
||||
"nixpkgs-lib": {
|
||||
"locked": {
|
||||
"lastModified": 1719876945,
|
||||
"narHash": "sha256-Fm2rDDs86sHy0/1jxTOKB1118Q0O3Uc7EC0iXvXKpbI=",
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/5daf0514482af3f97abaefc78a6606365c9108e2.tar.gz"
|
||||
"lastModified": 1740877520,
|
||||
"narHash": "sha256-oiwv/ZK/2FhGxrCkQkB83i7GnWXPPLzoqFHpDD3uYpk=",
|
||||
"owner": "nix-community",
|
||||
"repo": "nixpkgs.lib",
|
||||
"rev": "147dee35aab2193b174e4c0868bd80ead5ce755c",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/5daf0514482af3f97abaefc78a6606365c9108e2.tar.gz"
|
||||
"owner": "nix-community",
|
||||
"repo": "nixpkgs.lib",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"crane": "crane",
|
||||
"fenix": "fenix",
|
||||
"flake-compat": "flake-compat",
|
||||
"flake-parts": "flake-parts",
|
||||
"nix-github-actions": "nix-github-actions",
|
||||
"nixpkgs": "nixpkgs",
|
||||
"rust-overlay": "rust-overlay",
|
||||
"treefmt-nix": "treefmt-nix"
|
||||
}
|
||||
},
|
||||
"rust-analyzer-src": {
|
||||
"flake": false,
|
||||
"rust-overlay": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722262053,
|
||||
"narHash": "sha256-KxjkPVn9rQqYam6DhiN/V2NcMXtYW25maxkJoiVMpmE=",
|
||||
"owner": "rust-lang",
|
||||
"repo": "rust-analyzer",
|
||||
"rev": "a021b85be57d34b1eed687fcafd5d5ec64b2d853",
|
||||
"lastModified": 1742524367,
|
||||
"narHash": "sha256-KzTwk/5ETJavJZYV1DEWdCx05M4duFCxCpRbQSKWpng=",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"rev": "70bf752d176b2ce07417e346d85486acea9040ef",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "rust-lang",
|
||||
"ref": "nightly",
|
||||
"repo": "rust-analyzer",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
@ -137,11 +138,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722330636,
|
||||
"narHash": "sha256-uru7JzOa33YlSRwf9sfXpJG+UAV+bnBEYMjrzKrQZFw=",
|
||||
"lastModified": 1742370146,
|
||||
"narHash": "sha256-XRE8hL4vKIQyVMDXykFh4ceo3KSpuJF3ts8GKwh5bIU=",
|
||||
"owner": "numtide",
|
||||
"repo": "treefmt-nix",
|
||||
"rev": "768acdb06968e53aa1ee8de207fd955335c754b7",
|
||||
"rev": "adc195eef5da3606891cedf80c0d9ce2d3190808",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
197
flake.nix
197
flake.nix
@ -11,13 +11,10 @@
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
crane = {
|
||||
url = "github:ipetkov/crane";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
crane.url = "github:ipetkov/crane";
|
||||
|
||||
fenix = {
|
||||
url = "github:nix-community/fenix";
|
||||
rust-overlay = {
|
||||
url = "github:oxalica/rust-overlay";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
@ -25,6 +22,11 @@
|
||||
url = "github:edolstra/flake-compat";
|
||||
flake = false;
|
||||
};
|
||||
|
||||
nix-github-actions = {
|
||||
url = "github:nix-community/nix-github-actions";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
};
|
||||
|
||||
outputs =
|
||||
@ -33,27 +35,31 @@
|
||||
nixpkgs,
|
||||
flake-parts,
|
||||
treefmt-nix,
|
||||
fenix,
|
||||
crane,
|
||||
...
|
||||
rust-overlay,
|
||||
flake-compat,
|
||||
nix-github-actions,
|
||||
}:
|
||||
let
|
||||
systems = nixpkgs.lib.filter (s: nixpkgs.lib.hasSuffix "-linux" s) nixpkgs.lib.systems.flakeExposed;
|
||||
in
|
||||
flake-parts.lib.mkFlake { inherit inputs; } {
|
||||
imports = [ inputs.treefmt-nix.flakeModule ];
|
||||
|
||||
# can be extended, but these have proper binary cache support in nixpkgs
|
||||
# as of writing.
|
||||
systems = [
|
||||
"aarch64-linux"
|
||||
"x86_64-linux"
|
||||
"i686-linux"
|
||||
];
|
||||
flake = {
|
||||
githubActions = nix-github-actions.lib.mkGithubMatrix {
|
||||
# github actions supports fewer architectures
|
||||
checks = nixpkgs.lib.getAttrs [ "aarch64-linux" "x86_64-linux" ] self.checks;
|
||||
};
|
||||
};
|
||||
|
||||
inherit systems;
|
||||
|
||||
perSystem =
|
||||
{
|
||||
self',
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
system,
|
||||
...
|
||||
}:
|
||||
@ -62,24 +68,63 @@
|
||||
inherit (lib.lists) findFirst;
|
||||
inherit (lib.strings) hasPrefix removePrefix substring;
|
||||
|
||||
pkgs = import nixpkgs {
|
||||
inherit system;
|
||||
overlays = [ (import rust-overlay) ];
|
||||
};
|
||||
|
||||
cargoToml = builtins.fromTOML (builtins.readFile ./Cargo.toml);
|
||||
rustfmtToml = builtins.fromTOML (builtins.readFile ./rustfmt.toml);
|
||||
|
||||
craneLib = crane.mkLib pkgs;
|
||||
|
||||
rev = self.shortRev or self.dirtyShortRev or (substring 0 8 self.lastModifiedDate);
|
||||
makefileVersion = removePrefix "VERSION=" (
|
||||
findFirst (line: hasPrefix "VERSION=" line) "VERSION=0.0.0" (split "\n" (readFile ./Makefile))
|
||||
);
|
||||
version = "${makefileVersion}+${rev}";
|
||||
|
||||
commonArgs = {
|
||||
mkCommon =
|
||||
{
|
||||
crane,
|
||||
pkgs,
|
||||
rustVersion ? "latest",
|
||||
|
||||
# build time
|
||||
buildPackages,
|
||||
pkg-config,
|
||||
rustPlatform,
|
||||
stdenv,
|
||||
|
||||
# run time
|
||||
keyutils,
|
||||
libaio,
|
||||
libsodium,
|
||||
liburcu,
|
||||
libuuid,
|
||||
lz4,
|
||||
udev,
|
||||
zlib,
|
||||
zstd,
|
||||
}:
|
||||
let
|
||||
inherit (stdenv) cc hostPlatform;
|
||||
|
||||
craneLib = (crane.mkLib pkgs).overrideToolchain (
|
||||
p: p.rust-bin.stable."${rustVersion}".minimal.override { extensions = [ "clippy" ]; }
|
||||
);
|
||||
|
||||
args = {
|
||||
inherit version;
|
||||
src = self;
|
||||
strictDeps = true;
|
||||
|
||||
env = {
|
||||
PKG_CONFIG_SYSTEMD_SYSTEMDSYSTEMUNITDIR = "${placeholder "out"}/lib/systemd/system";
|
||||
PKG_CONFIG_UDEV_UDEVDIR = "${placeholder "out"}/lib/udev";
|
||||
|
||||
CARGO_BUILD_TARGET = hostPlatform.rust.rustcTargetSpec;
|
||||
"CARGO_TARGET_${hostPlatform.rust.cargoEnvVarTarget}_LINKER" = "${cc.targetPrefix}cc";
|
||||
HOST_CC = "${cc.nativePrefix}cc";
|
||||
TARGET_CC = "${cc.targetPrefix}cc";
|
||||
};
|
||||
|
||||
makeFlags = [
|
||||
@ -90,13 +135,16 @@
|
||||
|
||||
dontStrip = true;
|
||||
|
||||
nativeBuildInputs = with pkgs; [
|
||||
depsBuildBuild = [
|
||||
buildPackages.stdenv.cc
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
pkg-config
|
||||
rustPlatform.bindgenHook
|
||||
];
|
||||
|
||||
buildInputs = with pkgs; [
|
||||
attr
|
||||
buildInputs = [
|
||||
keyutils
|
||||
libaio
|
||||
libsodium
|
||||
@ -115,19 +163,26 @@
|
||||
};
|
||||
};
|
||||
|
||||
cargoArtifacts = craneLib.buildDepsOnly (commonArgs // { pname = cargoToml.package.name; });
|
||||
cargoArtifacts = craneLib.buildDepsOnly args;
|
||||
in
|
||||
{
|
||||
packages.default = config.packages.bcachefs-tools;
|
||||
packages.bcachefs-tools = craneLib.buildPackage (
|
||||
commonArgs
|
||||
inherit args cargoArtifacts craneLib;
|
||||
};
|
||||
common = pkgs.callPackage mkCommon { inherit crane; };
|
||||
|
||||
mkPackage =
|
||||
{ common, name }:
|
||||
common.craneLib.buildPackage (
|
||||
common.args
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
inherit (common) cargoArtifacts;
|
||||
pname = name;
|
||||
|
||||
enableParallelBuilding = true;
|
||||
buildPhaseCargoCommand = ''
|
||||
make ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}} $makeFlags
|
||||
'';
|
||||
doNotPostBuildInstallCargoBinaries = true;
|
||||
installPhaseCommand = ''
|
||||
make ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}} $makeFlags install
|
||||
'';
|
||||
@ -143,26 +198,66 @@
|
||||
}
|
||||
);
|
||||
|
||||
packages.bcachefs-tools-fuse = config.packages.bcachefs-tools.overrideAttrs (
|
||||
mkPackages =
|
||||
name: systems:
|
||||
let
|
||||
packagesForSystem =
|
||||
crossSystem:
|
||||
let
|
||||
localSystem = system;
|
||||
pkgs' = import nixpkgs {
|
||||
inherit crossSystem localSystem;
|
||||
overlays = [ (import rust-overlay) ];
|
||||
};
|
||||
|
||||
common = pkgs'.callPackage mkCommon { inherit crane; };
|
||||
package = pkgs'.callPackage mkPackage { inherit common name; };
|
||||
packageFuse = package.overrideAttrs (
|
||||
final: prev: {
|
||||
makeFlags = prev.makeFlags ++ [ "BCACHEFS_FUSE=1" ];
|
||||
buildInputs = prev.buildInputs ++ [ pkgs.fuse3 ];
|
||||
buildInputs = prev.buildInputs ++ [ pkgs'.fuse3 ];
|
||||
}
|
||||
);
|
||||
|
||||
checks.cargo-clippy = craneLib.cargoClippy (
|
||||
commonArgs
|
||||
in
|
||||
[
|
||||
(lib.nameValuePair "${name}-${crossSystem}" package)
|
||||
(lib.nameValuePair "${name}-fuse-${crossSystem}" packageFuse)
|
||||
];
|
||||
in
|
||||
lib.listToAttrs (lib.flatten (map packagesForSystem systems));
|
||||
in
|
||||
{
|
||||
packages =
|
||||
let
|
||||
inherit (cargoToml.package) name;
|
||||
in
|
||||
(mkPackages name systems)
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
cargoClippyExtraArgs = "--all-targets -- --deny warnings";
|
||||
${name} = config.packages."${name}-${system}";
|
||||
"${name}-fuse" = config.packages."${name}-fuse-${system}";
|
||||
default = config.packages.${name};
|
||||
};
|
||||
|
||||
checks = {
|
||||
inherit (config.packages)
|
||||
bcachefs-tools
|
||||
bcachefs-tools-fuse
|
||||
bcachefs-tools-fuse-i686-linux
|
||||
;
|
||||
|
||||
cargo-clippy = common.craneLib.cargoClippy (
|
||||
common.args
|
||||
// {
|
||||
inherit (common) cargoArtifacts;
|
||||
cargoClippyExtraArgs = "--all-targets --all-features -- --deny warnings";
|
||||
}
|
||||
);
|
||||
|
||||
# we have to build our own `craneLib.cargoTest`
|
||||
checks.cargo-test = craneLib.mkCargoDerivation (
|
||||
commonArgs
|
||||
cargo-test = common.craneLib.mkCargoDerivation (
|
||||
common.args
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
inherit (common) cargoArtifacts;
|
||||
doCheck = true;
|
||||
|
||||
enableParallelChecking = true;
|
||||
@ -176,6 +271,23 @@
|
||||
}
|
||||
);
|
||||
|
||||
# cargo clippy with the current minimum supported rust version
|
||||
# according to Cargo.toml
|
||||
msrv =
|
||||
let
|
||||
rustVersion = cargoToml.package.rust-version;
|
||||
common = pkgs.callPackage mkCommon { inherit crane rustVersion; };
|
||||
in
|
||||
common.craneLib.cargoClippy (
|
||||
common.args
|
||||
// {
|
||||
pname = "msrv";
|
||||
inherit (common) cargoArtifacts;
|
||||
cargoClippyExtraArgs = "--all-targets --all-features";
|
||||
}
|
||||
);
|
||||
};
|
||||
|
||||
devShells.default = pkgs.mkShell {
|
||||
inputsFrom = [
|
||||
config.packages.default
|
||||
@ -190,9 +302,12 @@
|
||||
cargo-audit
|
||||
cargo-outdated
|
||||
clang-tools
|
||||
clippy
|
||||
rust-analyzer
|
||||
rustc
|
||||
(rust-bin.stable.latest.minimal.override {
|
||||
extensions = [
|
||||
"rust-analyzer"
|
||||
"rust-src"
|
||||
];
|
||||
})
|
||||
];
|
||||
};
|
||||
|
||||
@ -204,7 +319,7 @@
|
||||
nixfmt.enable = true;
|
||||
rustfmt.edition = rustfmtToml.edition;
|
||||
rustfmt.enable = true;
|
||||
rustfmt.package = fenix.packages.${system}.default.rustfmt;
|
||||
rustfmt.package = pkgs.rust-bin.selectLatestNightlyWith (toolchain: toolchain.rustfmt);
|
||||
};
|
||||
};
|
||||
};
|
||||
|
@ -1,7 +0,0 @@
|
||||
#ifndef _CRYPTO_ALGAPI_H
|
||||
#define _CRYPTO_ALGAPI_H
|
||||
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/skcipher.h>
|
||||
|
||||
#endif /* _CRYPTO_ALGAPI_H */
|
@ -7,9 +7,57 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/unaligned.h>
|
||||
|
||||
#define CHACHA_IV_SIZE 16
|
||||
#define CHACHA_KEY_SIZE 32
|
||||
#define CHACHA_BLOCK_SIZE 64
|
||||
#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32))
|
||||
|
||||
enum chacha_constants { /* expand 32-byte k */
|
||||
CHACHA_CONSTANT_EXPA = 0x61707865U,
|
||||
CHACHA_CONSTANT_ND_3 = 0x3320646eU,
|
||||
CHACHA_CONSTANT_2_BY = 0x79622d32U,
|
||||
CHACHA_CONSTANT_TE_K = 0x6b206574U
|
||||
};
|
||||
|
||||
static inline void chacha_init_consts(u32 *state)
|
||||
{
|
||||
state[0] = CHACHA_CONSTANT_EXPA;
|
||||
state[1] = CHACHA_CONSTANT_ND_3;
|
||||
state[2] = CHACHA_CONSTANT_2_BY;
|
||||
state[3] = CHACHA_CONSTANT_TE_K;
|
||||
}
|
||||
|
||||
static inline void chacha_init(u32 *state, const u32 *key, const u8 *iv)
|
||||
{
|
||||
chacha_init_consts(state);
|
||||
state[4] = key[0];
|
||||
state[5] = key[1];
|
||||
state[6] = key[2];
|
||||
state[7] = key[3];
|
||||
state[8] = key[4];
|
||||
state[9] = key[5];
|
||||
state[10] = key[6];
|
||||
state[11] = key[7];
|
||||
state[12] = get_unaligned_le32(iv + 0);
|
||||
state[13] = get_unaligned_le32(iv + 4);
|
||||
state[14] = get_unaligned_le32(iv + 8);
|
||||
state[15] = get_unaligned_le32(iv + 12);
|
||||
}
|
||||
|
||||
#include <sodium/crypto_stream_chacha20.h>
|
||||
|
||||
static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int bytes)
|
||||
{
|
||||
u32 *key = state + 4;
|
||||
u32 *iv = state + 12;
|
||||
int ret = crypto_stream_chacha20_xor_ic(dst, src, bytes,
|
||||
(void *) &iv[2],
|
||||
iv[0] | ((u64) iv[1] << 32),
|
||||
(void *) key);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,104 +0,0 @@
|
||||
/*
|
||||
* Hash: Hash algorithms under the crypto API
|
||||
*
|
||||
* Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _CRYPTO_HASH_H
|
||||
#define _CRYPTO_HASH_H
|
||||
|
||||
#include <linux/crypto.h>
|
||||
|
||||
struct shash_desc;
|
||||
|
||||
struct shash_alg {
|
||||
int (*init)(struct shash_desc *desc);
|
||||
int (*update)(struct shash_desc *desc, const u8 *data, unsigned len);
|
||||
int (*final)(struct shash_desc *desc, u8 *out);
|
||||
int (*finup)(struct shash_desc *desc, const u8 *data,
|
||||
unsigned len, u8 *out);
|
||||
int (*digest)(struct shash_desc *desc, const u8 *data,
|
||||
unsigned len, u8 *out);
|
||||
|
||||
unsigned descsize;
|
||||
unsigned digestsize;
|
||||
struct crypto_alg base;
|
||||
};
|
||||
|
||||
int crypto_register_shash(struct shash_alg *alg);
|
||||
|
||||
struct crypto_shash {
|
||||
unsigned descsize;
|
||||
struct crypto_tfm base;
|
||||
};
|
||||
|
||||
struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
|
||||
u32 mask);
|
||||
|
||||
static inline void crypto_free_shash(struct crypto_shash *tfm)
|
||||
{
|
||||
kfree(tfm);
|
||||
}
|
||||
|
||||
static inline struct shash_alg *crypto_shash_alg(struct crypto_shash *tfm)
|
||||
{
|
||||
return container_of(tfm->base.alg, struct shash_alg, base);
|
||||
}
|
||||
|
||||
static inline unsigned crypto_shash_digestsize(struct crypto_shash *tfm)
|
||||
{
|
||||
return crypto_shash_alg(tfm)->digestsize;
|
||||
}
|
||||
|
||||
static inline unsigned crypto_shash_descsize(struct crypto_shash *tfm)
|
||||
{
|
||||
return tfm->descsize;
|
||||
}
|
||||
|
||||
struct shash_desc {
|
||||
struct crypto_shash *tfm;
|
||||
u32 flags;
|
||||
|
||||
void *ctx[] CRYPTO_MINALIGN_ATTR;
|
||||
};
|
||||
|
||||
#define SHASH_DESC_ON_STACK(shash, tfm) \
|
||||
char __##shash##_desc[sizeof(struct shash_desc) + \
|
||||
crypto_shash_descsize(tfm)] CRYPTO_MINALIGN_ATTR; \
|
||||
struct shash_desc *shash = (struct shash_desc *)__##shash##_desc
|
||||
|
||||
static inline int crypto_shash_init(struct shash_desc *desc)
|
||||
{
|
||||
return crypto_shash_alg(desc->tfm)->init(desc);
|
||||
}
|
||||
|
||||
static inline int crypto_shash_update(struct shash_desc *desc,
|
||||
const u8 *data, unsigned len)
|
||||
{
|
||||
return crypto_shash_alg(desc->tfm)->update(desc, data, len);
|
||||
}
|
||||
|
||||
static inline int crypto_shash_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
return crypto_shash_alg(desc->tfm)->final(desc, out);
|
||||
}
|
||||
|
||||
static inline int crypto_shash_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned len, u8 *out)
|
||||
{
|
||||
return crypto_shash_alg(desc->tfm)->finup(desc, data, len, out);
|
||||
}
|
||||
|
||||
static inline int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned len, u8 *out)
|
||||
{
|
||||
return crypto_shash_alg(desc->tfm)->digest(desc, data, len, out);
|
||||
}
|
||||
|
||||
#endif /* _CRYPTO_HASH_H */
|
@ -10,4 +10,27 @@
|
||||
#define POLY1305_KEY_SIZE crypto_onetimeauth_poly1305_KEYBYTES
|
||||
#define POLY1305_DIGEST_SIZE crypto_onetimeauth_poly1305_BYTES
|
||||
|
||||
struct poly1305_desc_ctx {
|
||||
crypto_onetimeauth_poly1305_state s;
|
||||
};
|
||||
|
||||
static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key)
|
||||
{
|
||||
int ret = crypto_onetimeauth_poly1305_init(&desc->s, key);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
static inline void poly1305_update(struct poly1305_desc_ctx *desc,
|
||||
const u8 *src, unsigned int nbytes)
|
||||
{
|
||||
int ret = crypto_onetimeauth_poly1305_update(&desc->s, src, nbytes);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
static inline void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest)
|
||||
{
|
||||
int ret = crypto_onetimeauth_poly1305_final(&desc->s, digest);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -7,6 +7,7 @@
|
||||
#define _CRYPTO_SHA_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <sodium/crypto_hash_sha256.h>
|
||||
|
||||
#define SHA1_DIGEST_SIZE 20
|
||||
#define SHA1_BLOCK_SIZE 64
|
||||
@ -112,4 +113,9 @@ extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
|
||||
|
||||
extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *hash);
|
||||
|
||||
static inline void sha256(const u8 *data, unsigned int len, u8 *out)
|
||||
{
|
||||
crypto_hash_sha256(out, data, len);
|
||||
}
|
||||
#endif
|
||||
|
@ -1,128 +0,0 @@
|
||||
/*
|
||||
* Symmetric key ciphers.
|
||||
*
|
||||
* Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _CRYPTO_SKCIPHER_H
|
||||
#define _CRYPTO_SKCIPHER_H
|
||||
|
||||
#include <linux/crypto.h>
|
||||
|
||||
struct crypto_skcipher;
|
||||
struct skcipher_request;
|
||||
|
||||
struct skcipher_alg {
|
||||
struct crypto_alg base;
|
||||
};
|
||||
|
||||
int crypto_register_skcipher(struct skcipher_alg *alg);
|
||||
|
||||
struct crypto_skcipher {
|
||||
int (*setkey)(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keylen);
|
||||
int (*encrypt)(struct skcipher_request *req);
|
||||
int (*decrypt)(struct skcipher_request *req);
|
||||
|
||||
unsigned ivsize;
|
||||
unsigned keysize;
|
||||
|
||||
struct crypto_tfm base;
|
||||
};
|
||||
|
||||
struct crypto_sync_skcipher {
|
||||
struct crypto_skcipher base;
|
||||
};
|
||||
|
||||
struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
|
||||
u32 type, u32 mask);
|
||||
|
||||
static inline struct crypto_sync_skcipher *
|
||||
crypto_alloc_sync_skcipher(const char *alg_name, u32 type, u32 mask)
|
||||
{
|
||||
return (void *) crypto_alloc_skcipher(alg_name, type, mask);
|
||||
}
|
||||
|
||||
static inline void crypto_free_skcipher(struct crypto_skcipher *tfm)
|
||||
{
|
||||
kfree(tfm);
|
||||
}
|
||||
|
||||
static inline void crypto_free_sync_skcipher(struct crypto_sync_skcipher *tfm)
|
||||
{
|
||||
crypto_free_skcipher(&tfm->base);
|
||||
}
|
||||
|
||||
struct skcipher_request {
|
||||
unsigned cryptlen;
|
||||
u8 *iv;
|
||||
|
||||
struct scatterlist *src;
|
||||
struct scatterlist *dst;
|
||||
|
||||
struct crypto_tfm *tfm;
|
||||
};
|
||||
|
||||
#define MAX_SYNC_SKCIPHER_REQSIZE 384
|
||||
#define SYNC_SKCIPHER_REQUEST_ON_STACK(name, tfm) \
|
||||
char __##name##_desc[sizeof(struct skcipher_request) + \
|
||||
MAX_SYNC_SKCIPHER_REQSIZE + \
|
||||
(!(sizeof((struct crypto_sync_skcipher *)1 == \
|
||||
(typeof(tfm))1))) \
|
||||
] CRYPTO_MINALIGN_ATTR; \
|
||||
struct skcipher_request *name = (void *)__##name##_desc
|
||||
|
||||
static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
return tfm->setkey(tfm, key, keylen);
|
||||
}
|
||||
|
||||
static inline struct crypto_skcipher *crypto_skcipher_reqtfm(
|
||||
struct skcipher_request *req)
|
||||
{
|
||||
return container_of(req->tfm, struct crypto_skcipher, base);
|
||||
}
|
||||
|
||||
static inline int crypto_skcipher_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return crypto_skcipher_reqtfm(req)->encrypt(req);
|
||||
}
|
||||
|
||||
static inline int crypto_skcipher_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return crypto_skcipher_reqtfm(req)->decrypt(req);
|
||||
}
|
||||
|
||||
static inline void skcipher_request_set_tfm(struct skcipher_request *req,
|
||||
struct crypto_skcipher *tfm)
|
||||
{
|
||||
req->tfm = &tfm->base;
|
||||
}
|
||||
|
||||
static inline void skcipher_request_set_sync_tfm(struct skcipher_request *req,
|
||||
struct crypto_sync_skcipher *tfm)
|
||||
{
|
||||
skcipher_request_set_tfm(req, &tfm->base);
|
||||
}
|
||||
|
||||
#define skcipher_request_set_callback(...) do {} while (0)
|
||||
|
||||
static inline void skcipher_request_set_crypt(
|
||||
struct skcipher_request *req,
|
||||
struct scatterlist *src, struct scatterlist *dst,
|
||||
unsigned int cryptlen, void *iv)
|
||||
{
|
||||
req->src = src;
|
||||
req->dst = dst;
|
||||
req->cryptlen = cryptlen;
|
||||
req->iv = iv;
|
||||
}
|
||||
|
||||
#endif /* _CRYPTO_SKCIPHER_H */
|
@ -16,53 +16,6 @@ typedef struct {
|
||||
u64 counter;
|
||||
} atomic64_t;
|
||||
|
||||
#ifndef C11_ATOMICS
|
||||
|
||||
#include <urcu/uatomic.h>
|
||||
|
||||
#if (CAA_BITS_PER_LONG != 64)
|
||||
#define ATOMIC64_SPINLOCK
|
||||
#endif
|
||||
|
||||
#define __ATOMIC_READ(p) uatomic_read(p)
|
||||
#define __ATOMIC_SET(p, v) uatomic_set(p, v)
|
||||
#define __ATOMIC_SET_RELEASE(p, v) uatomic_set(p, v)
|
||||
#define __ATOMIC_ADD_RETURN(v, p) uatomic_add_return(p, v)
|
||||
#define __ATOMIC_SUB_RETURN(v, p) uatomic_sub_return(p, v)
|
||||
#define __ATOMIC_ADD(v, p) uatomic_add(p, v)
|
||||
#define __ATOMIC_SUB(v, p) uatomic_sub(p, v)
|
||||
#define __ATOMIC_INC(p) uatomic_inc(p)
|
||||
#define __ATOMIC_DEC(p) uatomic_dec(p)
|
||||
#define __ATOMIC_AND(v, p) uatomic_and(p, v)
|
||||
#define __ATOMIC_OR(v, p) uatomic_or(p, v)
|
||||
|
||||
#define xchg(p, v) uatomic_xchg(p, v)
|
||||
#define xchg_acquire(p, v) uatomic_xchg(p, v)
|
||||
#define cmpxchg(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
#define cmpxchg_acquire(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
#define cmpxchg_release(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
|
||||
#define try_cmpxchg(p, _old, _new) \
|
||||
({ \
|
||||
typeof(*(_old)) _v = cmpxchg(p, *(_old), _new); \
|
||||
bool _ret = _v == *(_old); \
|
||||
*(_old) = _v; \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define try_cmpxchg_acquire(p, _old, _new) \
|
||||
try_cmpxchg(p, _old, _new)
|
||||
|
||||
#define smp_mb__before_atomic() cmm_smp_mb__before_uatomic_add()
|
||||
#define smp_mb__after_atomic() cmm_smp_mb__after_uatomic_add()
|
||||
#define smp_wmb() cmm_smp_wmb()
|
||||
#define smp_rmb() cmm_smp_rmb()
|
||||
#define smp_mb() cmm_smp_mb()
|
||||
#define smp_read_barrier_depends() cmm_smp_read_barrier_depends()
|
||||
#define smp_acquire__after_ctrl_dep() cmm_smp_mb()
|
||||
|
||||
#else /* C11_ATOMICS */
|
||||
|
||||
#define __ATOMIC_READ(p) __atomic_load_n(p, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SET(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SET_RELEASE(p, v) __atomic_store_n(p, v, __ATOMIC_RELEASE)
|
||||
@ -83,6 +36,11 @@ typedef struct {
|
||||
__ATOMIC_SEQ_CST, \
|
||||
__ATOMIC_SEQ_CST)
|
||||
|
||||
#define try_cmpxchg_acquire(p, old, new) \
|
||||
__atomic_compare_exchange_n((p), old, new, false, \
|
||||
__ATOMIC_ACQUIRE, \
|
||||
__ATOMIC_RELAXED)
|
||||
|
||||
#define cmpxchg(p, old, new) \
|
||||
({ \
|
||||
typeof(*(p)) __old = (old); \
|
||||
@ -109,7 +67,7 @@ typedef struct {
|
||||
\
|
||||
__atomic_compare_exchange_n((p), &__old, new, false, \
|
||||
__ATOMIC_RELEASE, \
|
||||
__ATOMIC_RELEASE); \
|
||||
__ATOMIC_RELAXED); \
|
||||
__old; \
|
||||
})
|
||||
|
||||
@ -119,9 +77,7 @@ typedef struct {
|
||||
#define smp_rmb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_mb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_read_barrier_depends()
|
||||
|
||||
#endif
|
||||
|
||||
#define smp_acquire__after_ctrl_dep() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
|
@ -288,6 +288,13 @@ static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
|
||||
*flags = 0;
|
||||
}
|
||||
|
||||
static inline char *bvec_kmap_local(struct bio_vec *bvec)
|
||||
{
|
||||
return page_address(bvec->bv_page) + bvec->bv_offset;
|
||||
}
|
||||
|
||||
static inline void bvec_kunmap_local(char *buffer) {}
|
||||
|
||||
static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter,
|
||||
unsigned long *flags)
|
||||
{
|
||||
|
@ -10,6 +10,8 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/bvec.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/rwsem.h>
|
||||
|
||||
struct bio_set;
|
||||
struct bio;
|
||||
@ -63,6 +65,8 @@ struct block_device {
|
||||
struct gendisk * bd_disk;
|
||||
struct gendisk __bd_disk;
|
||||
int bd_fd;
|
||||
|
||||
struct mutex bd_holder_lock;
|
||||
};
|
||||
|
||||
#define bdev_kobj(_bdev) (&((_bdev)->kobj))
|
||||
|
@ -65,7 +65,10 @@ unsigned bdev_logical_block_size(struct block_device *bdev);
|
||||
sector_t get_capacity(struct gendisk *disk);
|
||||
|
||||
struct blk_holder_ops {
|
||||
void (*mark_dead)(struct block_device *bdev);
|
||||
void (*mark_dead)(struct block_device *bdev, bool surprise);
|
||||
void (*sync)(struct block_device *bdev);
|
||||
int (*freeze)(struct block_device *bdev);
|
||||
int (*thaw)(struct block_device *bdev);
|
||||
};
|
||||
|
||||
static inline struct block_device *file_bdev(struct file *file)
|
||||
@ -80,8 +83,12 @@ int lookup_bdev(const char *path, dev_t *);
|
||||
|
||||
struct super_block {
|
||||
void *s_fs_info;
|
||||
struct rw_semaphore s_umount;
|
||||
};
|
||||
|
||||
static inline void evict_inodes(struct super_block *sb) {}
|
||||
static inline int sync_filesystem(struct super_block *) { return 0; }
|
||||
|
||||
/*
|
||||
* File types
|
||||
*
|
||||
|
@ -72,4 +72,12 @@ static inline void le64_add_cpu(__le64 *var, u64 val)
|
||||
*var = cpu_to_le64(le64_to_cpu(*var) + val);
|
||||
}
|
||||
|
||||
static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
|
||||
{
|
||||
while (words--) {
|
||||
__le32_to_cpus(buf);
|
||||
buf++;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __LINUX_BYTEORDER_H */
|
||||
|
@ -67,6 +67,7 @@
|
||||
#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
|
||||
#define fallthrough __attribute__((__fallthrough__))
|
||||
#define __noreturn __attribute__((__noreturn__))
|
||||
#define __no_kmsan_checks
|
||||
|
||||
#ifndef __counted_by
|
||||
#define __counted_by(nr)
|
||||
@ -187,4 +188,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
|
||||
#define is_signed_type(type) (((type)(-1)) < (__force type)1)
|
||||
#define is_unsigned_type(type) (!is_signed_type(type))
|
||||
|
||||
#define TYPEOF_UNQUAL(exp) __typeof__(exp)
|
||||
|
||||
#endif /* _TOOLS_LINUX_COMPILER_H */
|
||||
|
@ -9,6 +9,8 @@ struct dentry {
|
||||
struct inode *d_inode;
|
||||
};
|
||||
|
||||
static inline void shrink_dcache_sb(struct super_block *sb) {}
|
||||
|
||||
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
|
||||
#define QSTR(n) (struct qstr)QSTR_INIT(n, strlen(n))
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <linux/byteorder.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/kmsan-checks.h>
|
||||
#include <linux/math.h>
|
||||
#include <linux/minmax.h>
|
||||
|
||||
|
98
include/linux/kmsan-checks.h
Normal file
98
include/linux/kmsan-checks.h
Normal file
@ -0,0 +1,98 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* KMSAN checks to be used for one-off annotations in subsystems.
|
||||
*
|
||||
* Copyright (C) 2017-2022 Google LLC
|
||||
* Author: Alexander Potapenko <glider@google.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_KMSAN_CHECKS_H
|
||||
#define _LINUX_KMSAN_CHECKS_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef CONFIG_KMSAN
|
||||
|
||||
/**
|
||||
* kmsan_poison_memory() - Mark the memory range as uninitialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to poison.
|
||||
* @flags: GFP flags for allocations done by this function.
|
||||
*
|
||||
* Until other data is written to this range, KMSAN will treat it as
|
||||
* uninitialized. Error reports for this memory will reference the call site of
|
||||
* kmsan_poison_memory() as origin.
|
||||
*/
|
||||
void kmsan_poison_memory(const void *address, size_t size, gfp_t flags);
|
||||
|
||||
/**
|
||||
* kmsan_unpoison_memory() - Mark the memory range as initialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to unpoison.
|
||||
*
|
||||
* Until other data is written to this range, KMSAN will treat it as
|
||||
* initialized.
|
||||
*/
|
||||
void kmsan_unpoison_memory(const void *address, size_t size);
|
||||
|
||||
/**
|
||||
* kmsan_check_memory() - Check the memory range for being initialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to check.
|
||||
*
|
||||
* If any piece of the given range is marked as uninitialized, KMSAN will report
|
||||
* an error.
|
||||
*/
|
||||
void kmsan_check_memory(const void *address, size_t size);
|
||||
|
||||
/**
|
||||
* kmsan_copy_to_user() - Notify KMSAN about a data transfer to userspace.
|
||||
* @to: destination address in the userspace.
|
||||
* @from: source address in the kernel.
|
||||
* @to_copy: number of bytes to copy.
|
||||
* @left: number of bytes not copied.
|
||||
*
|
||||
* If this is a real userspace data transfer, KMSAN checks the bytes that were
|
||||
* actually copied to ensure there was no information leak. If @to belongs to
|
||||
* the kernel space (which is possible for compat syscalls), KMSAN just copies
|
||||
* the metadata.
|
||||
*/
|
||||
void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
|
||||
size_t left);
|
||||
|
||||
/**
|
||||
* kmsan_memmove() - Notify KMSAN about a data copy within kernel.
|
||||
* @to: destination address in the kernel.
|
||||
* @from: source address in the kernel.
|
||||
* @size: number of bytes to copy.
|
||||
*
|
||||
* Invoked after non-instrumented version (e.g. implemented using assembly
|
||||
* code) of memmove()/memcpy() is called, in order to copy KMSAN's metadata.
|
||||
*/
|
||||
void kmsan_memmove(void *to, const void *from, size_t to_copy);
|
||||
|
||||
#else
|
||||
|
||||
static inline void kmsan_poison_memory(const void *address, size_t size,
|
||||
gfp_t flags)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_unpoison_memory(const void *address, size_t size)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_check_memory(const void *address, size_t size)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_copy_to_user(void __user *to, const void *from,
|
||||
size_t to_copy, size_t left)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void kmsan_memmove(void *to, const void *from, size_t to_copy)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_KMSAN_CHECKS_H */
|
@ -28,6 +28,7 @@ struct hlist_nulls_node {
|
||||
#define NULLS_MARKER(value) (1UL | (((long)value) << 1))
|
||||
#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \
|
||||
((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
|
||||
#define HLIST_NULLS_HEAD_INIT(nulls) {.first = (struct hlist_nulls_node *)NULLS_MARKER(nulls)}
|
||||
|
||||
#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
|
||||
|
||||
|
@ -82,4 +82,71 @@ static inline s64 div_s64(s64 dividend, s32 divisor)
|
||||
return div_s64_rem(dividend, divisor, &remainder);
|
||||
}
|
||||
|
||||
#ifndef mul_u32_u32
|
||||
/*
|
||||
* Many a GCC version messes this up and generates a 64x64 mult :-(
|
||||
*/
|
||||
static inline u64 mul_u32_u32(u32 a, u32 b)
|
||||
{
|
||||
return (u64)a * b;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
|
||||
|
||||
#ifndef mul_u64_u64_shr
|
||||
static __always_inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
|
||||
{
|
||||
return (u64)(((unsigned __int128)a * mul) >> shift);
|
||||
}
|
||||
#endif /* mul_u64_u64_shr */
|
||||
|
||||
#else
|
||||
|
||||
#ifndef mul_u64_u64_shr
|
||||
static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
|
||||
{
|
||||
union {
|
||||
u64 ll;
|
||||
struct {
|
||||
#ifdef __BIG_ENDIAN
|
||||
u32 high, low;
|
||||
#else
|
||||
u32 low, high;
|
||||
#endif
|
||||
} l;
|
||||
} rl, rm, rn, rh, a0, b0;
|
||||
u64 c;
|
||||
|
||||
a0.ll = a;
|
||||
b0.ll = b;
|
||||
|
||||
rl.ll = mul_u32_u32(a0.l.low, b0.l.low);
|
||||
rm.ll = mul_u32_u32(a0.l.low, b0.l.high);
|
||||
rn.ll = mul_u32_u32(a0.l.high, b0.l.low);
|
||||
rh.ll = mul_u32_u32(a0.l.high, b0.l.high);
|
||||
|
||||
/*
|
||||
* Each of these lines computes a 64-bit intermediate result into "c",
|
||||
* starting at bits 32-95. The low 32-bits go into the result of the
|
||||
* multiplication, the high 32-bits are carried into the next step.
|
||||
*/
|
||||
rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low;
|
||||
rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low;
|
||||
rh.l.high = (c >> 32) + rh.l.high;
|
||||
|
||||
/*
|
||||
* The 128-bit result of the multiplication is in rl.ll and rh.ll,
|
||||
* shift it right and throw away the high part of the result.
|
||||
*/
|
||||
if (shift == 0)
|
||||
return rl.ll;
|
||||
if (shift < 64)
|
||||
return (rl.ll >> shift) | (rh.ll << (64 - shift));
|
||||
return rh.ll >> (shift & 63);
|
||||
}
|
||||
#endif /* mul_u64_u64_shr */
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_MATH64_H */
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct sysinfo {
|
||||
|
1
include/linux/moduleparam.h
Normal file
1
include/linux/moduleparam.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/module.h>
|
@ -9,7 +9,9 @@
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/math64.h>
|
||||
|
||||
#ifdef SYS_getrandom
|
||||
static inline int getrandom(void *buf, size_t buflen, unsigned int flags)
|
||||
@ -67,4 +69,24 @@ static inline u32 get_random_u32_below(u32 ceil)
|
||||
}
|
||||
}
|
||||
|
||||
static inline u32 __get_random_u32_below(u32 ceil)
|
||||
{
|
||||
return get_random_u32_below(ceil);
|
||||
}
|
||||
|
||||
static inline u64 get_random_u64_below(u64 ceil)
|
||||
{
|
||||
if (ceil <= 1)
|
||||
return 0;
|
||||
if (ceil <= U32_MAX)
|
||||
return get_random_u32_below(ceil);
|
||||
|
||||
for (;;) {
|
||||
u64 rand = get_random_u64();
|
||||
u64 mult = ceil * rand;
|
||||
if (likely(mult >= -ceil % ceil))
|
||||
return mul_u64_u64_shr(ceil, rand, 64);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _LINUX_RANDOM_H */
|
||||
|
6
include/linux/sched/sysctl.h
Normal file
6
include/linux/sched/sysctl.h
Normal file
@ -0,0 +1,6 @@
|
||||
#ifndef __TOOLS_LINUX_SCHED_SYSCTL_H
|
||||
#define __TOOLS_LINUX_SCHED_SYSCTL_H
|
||||
|
||||
#define sysctl_hung_task_timeout_secs (HZ * 10)
|
||||
|
||||
#endif /* __TOOLS_LINUX_SCHED_SYSCTL_H */
|
@ -16,4 +16,6 @@ static inline void sort(void *base, size_t num, size_t size,
|
||||
return qsort(base, num, size, cmp_func);
|
||||
}
|
||||
|
||||
#define sort_nonatomic(...) sort(__VA_ARGS__)
|
||||
|
||||
#endif
|
||||
|
@ -17,4 +17,15 @@ enum string_size_units {
|
||||
int string_get_size(u64 size, u64 blk_size, enum string_size_units units,
|
||||
char *buf, int len);
|
||||
|
||||
static inline void memcpy_and_pad(void *dest, size_t dest_len, const void *src,
|
||||
size_t count, int pad)
|
||||
{
|
||||
if (dest_len > count) {
|
||||
memcpy(dest, src, count);
|
||||
memset(dest + count, pad, dest_len - count);
|
||||
} else {
|
||||
memcpy(dest, src, dest_len);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -29,7 +29,7 @@ static inline int timer_pending(const struct timer_list *timer)
|
||||
}
|
||||
|
||||
int del_timer(struct timer_list * timer);
|
||||
int del_timer_sync(struct timer_list *timer);
|
||||
int timer_delete_sync(struct timer_list *timer);
|
||||
|
||||
#define del_singleshot_timer_sync(timer) del_timer_sync(timer)
|
||||
|
||||
|
0
include/linux/unicode.h
Normal file
0
include/linux/unicode.h
Normal file
@ -273,7 +273,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu)
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
|
||||
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
|
||||
struct btree_iter iter = { NULL };
|
||||
struct btree_iter iter = {};
|
||||
struct posix_acl *acl = NULL;
|
||||
|
||||
if (rcu)
|
||||
@ -344,7 +344,7 @@ int bch2_set_acl(struct mnt_idmap *idmap,
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct btree_iter inode_iter = { NULL };
|
||||
struct btree_iter inode_iter = {};
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct posix_acl *acl;
|
||||
umode_t mode;
|
||||
|
@ -232,7 +232,7 @@ int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
int ret = 0;
|
||||
|
||||
bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k),
|
||||
c, alloc_v2_unpack_error,
|
||||
c, alloc_v3_unpack_error,
|
||||
"unpack error");
|
||||
fsck_err:
|
||||
return ret;
|
||||
@ -589,6 +589,8 @@ iter_err:
|
||||
|
||||
int bch2_alloc_read(struct bch_fs *c)
|
||||
{
|
||||
down_read(&c->state_lock);
|
||||
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct bch_dev *ca = NULL;
|
||||
int ret;
|
||||
@ -608,7 +610,7 @@ int bch2_alloc_read(struct bch_fs *c)
|
||||
* bch2_check_alloc_key() which runs later:
|
||||
*/
|
||||
if (!ca) {
|
||||
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
|
||||
bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -629,17 +631,17 @@ int bch2_alloc_read(struct bch_fs *c)
|
||||
* bch2_check_alloc_key() which runs later:
|
||||
*/
|
||||
if (!ca) {
|
||||
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
|
||||
bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (k.k->p.offset < ca->mi.first_bucket) {
|
||||
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode, ca->mi.first_bucket));
|
||||
bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode, ca->mi.first_bucket));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (k.k->p.offset >= ca->mi.nbuckets) {
|
||||
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
|
||||
bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -652,6 +654,7 @@ int bch2_alloc_read(struct bch_fs *c)
|
||||
bch2_dev_put(ca);
|
||||
bch2_trans_put(trans);
|
||||
|
||||
up_read(&c->state_lock);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
@ -673,8 +676,7 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans,
|
||||
bch2_bkey_val_to_text(&buf, c, alloc_k);
|
||||
|
||||
int ret = __bch2_fsck_err(NULL, trans, flags, err_id,
|
||||
"bucket incorrectly %sset in %s btree\n"
|
||||
" %s",
|
||||
"bucket incorrectly %sset in %s btree\n%s",
|
||||
set ? "" : "un",
|
||||
bch2_btree_id_str(btree),
|
||||
buf.buf);
|
||||
@ -777,14 +779,12 @@ static inline int bch2_dev_data_type_accounting_mod(struct btree_trans *trans, s
|
||||
s64 delta_sectors,
|
||||
s64 delta_fragmented, unsigned flags)
|
||||
{
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_dev_data_type,
|
||||
.dev_data_type.dev = ca->dev_idx,
|
||||
.dev_data_type.data_type = data_type,
|
||||
};
|
||||
s64 d[3] = { delta_buckets, delta_sectors, delta_fragmented };
|
||||
|
||||
return bch2_disk_accounting_mod(trans, &acc, d, 3, flags & BTREE_TRIGGER_gc);
|
||||
return bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc,
|
||||
d, dev_data_type,
|
||||
.dev = ca->dev_idx,
|
||||
.data_type = data_type);
|
||||
}
|
||||
|
||||
int bch2_alloc_key_to_dev_counters(struct btree_trans *trans, struct bch_dev *ca,
|
||||
@ -837,7 +837,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
|
||||
struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p);
|
||||
if (!ca)
|
||||
return -EIO;
|
||||
return -BCH_ERR_trigger_alloc;
|
||||
|
||||
struct bch_alloc_v4 old_a_convert;
|
||||
const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);
|
||||
@ -871,6 +871,9 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
if (data_type_is_empty(new_a->data_type) &&
|
||||
BCH_ALLOC_V4_NEED_INC_GEN(new_a) &&
|
||||
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
|
||||
if (new_a->oldest_gen == new_a->gen &&
|
||||
!bch2_bucket_sectors_total(*new_a))
|
||||
new_a->oldest_gen++;
|
||||
new_a->gen++;
|
||||
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
|
||||
alloc_data_type_set(new_a, new_a->data_type);
|
||||
@ -889,26 +892,20 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
!new_a->io_time[READ])
|
||||
new_a->io_time[READ] = bch2_current_io_time(c, READ);
|
||||
|
||||
u64 old_lru = alloc_lru_idx_read(*old_a);
|
||||
u64 new_lru = alloc_lru_idx_read(*new_a);
|
||||
if (old_lru != new_lru) {
|
||||
ret = bch2_lru_change(trans, new.k->p.inode,
|
||||
bucket_to_u64(new.k->p),
|
||||
old_lru, new_lru);
|
||||
alloc_lru_idx_read(*old_a),
|
||||
alloc_lru_idx_read(*new_a));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
old_lru = alloc_lru_idx_fragmentation(*old_a, ca);
|
||||
new_lru = alloc_lru_idx_fragmentation(*new_a, ca);
|
||||
if (old_lru != new_lru) {
|
||||
ret = bch2_lru_change(trans,
|
||||
BCH_LRU_FRAGMENTATION_START,
|
||||
BCH_LRU_BUCKET_FRAGMENTATION,
|
||||
bucket_to_u64(new.k->p),
|
||||
old_lru, new_lru);
|
||||
alloc_lru_idx_fragmentation(*old_a, ca),
|
||||
alloc_lru_idx_fragmentation(*new_a, ca));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (old_a->gen != new_a->gen) {
|
||||
ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen);
|
||||
@ -1032,9 +1029,9 @@ fsck_err:
|
||||
bch2_dev_put(ca);
|
||||
return ret;
|
||||
invalid_bucket:
|
||||
bch2_fs_inconsistent(c, "reference to invalid bucket\n %s",
|
||||
bch2_fs_inconsistent(c, "reference to invalid bucket\n%s",
|
||||
(bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf));
|
||||
ret = -EIO;
|
||||
ret = -BCH_ERR_trigger_alloc;
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1042,9 +1039,10 @@ invalid_bucket:
|
||||
* This synthesizes deleted extents for holes, similar to BTREE_ITER_slots for
|
||||
* extents style btrees, but works on non-extents btrees:
|
||||
*/
|
||||
static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole)
|
||||
static struct bkey_s_c bch2_get_key_or_hole(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bpos end, struct bkey *hole)
|
||||
{
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter);
|
||||
|
||||
if (bkey_err(k))
|
||||
return k;
|
||||
@ -1055,9 +1053,9 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos
|
||||
struct btree_iter iter2;
|
||||
struct bpos next;
|
||||
|
||||
bch2_trans_copy_iter(&iter2, iter);
|
||||
bch2_trans_copy_iter(trans, &iter2, iter);
|
||||
|
||||
struct btree_path *path = btree_iter_path(iter->trans, iter);
|
||||
struct btree_path *path = btree_iter_path(trans, iter);
|
||||
if (!bpos_eq(path->l[0].b->key.k.p, SPOS_MAX))
|
||||
end = bkey_min(end, bpos_nosnap_successor(path->l[0].b->key.k.p));
|
||||
|
||||
@ -1067,9 +1065,9 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos
|
||||
* btree node min/max is a closed interval, upto takes a half
|
||||
* open interval:
|
||||
*/
|
||||
k = bch2_btree_iter_peek_max(&iter2, end);
|
||||
k = bch2_btree_iter_peek_max(trans, &iter2, end);
|
||||
next = iter2.pos;
|
||||
bch2_trans_iter_exit(iter->trans, &iter2);
|
||||
bch2_trans_iter_exit(trans, &iter2);
|
||||
|
||||
BUG_ON(next.offset >= iter->pos.offset + U32_MAX);
|
||||
|
||||
@ -1110,13 +1108,14 @@ static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *buck
|
||||
return *ca != NULL;
|
||||
}
|
||||
|
||||
static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter,
|
||||
static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bch_dev **ca, struct bkey *hole)
|
||||
{
|
||||
struct bch_fs *c = iter->trans->c;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k;
|
||||
again:
|
||||
k = bch2_get_key_or_hole(iter, POS_MAX, hole);
|
||||
k = bch2_get_key_or_hole(trans, iter, POS_MAX, hole);
|
||||
if (bkey_err(k))
|
||||
return k;
|
||||
|
||||
@ -1129,7 +1128,7 @@ again:
|
||||
if (!next_bucket(c, ca, &hole_start))
|
||||
return bkey_s_c_null;
|
||||
|
||||
bch2_btree_iter_set_pos(iter, hole_start);
|
||||
bch2_btree_iter_set_pos(trans, iter, hole_start);
|
||||
goto again;
|
||||
}
|
||||
|
||||
@ -1170,8 +1169,8 @@ int bch2_check_alloc_key(struct btree_trans *trans,
|
||||
|
||||
a = bch2_alloc_to_v4(alloc_k, &a_convert);
|
||||
|
||||
bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p);
|
||||
k = bch2_btree_iter_peek_slot(discard_iter);
|
||||
bch2_btree_iter_set_pos(trans, discard_iter, alloc_k.k->p);
|
||||
k = bch2_btree_iter_peek_slot(trans, discard_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1184,8 +1183,8 @@ int bch2_check_alloc_key(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a));
|
||||
k = bch2_btree_iter_peek_slot(freespace_iter);
|
||||
bch2_btree_iter_set_pos(trans, freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a));
|
||||
k = bch2_btree_iter_peek_slot(trans, freespace_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1198,16 +1197,15 @@ int bch2_check_alloc_key(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset));
|
||||
k = bch2_btree_iter_peek_slot(bucket_gens_iter);
|
||||
bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset));
|
||||
k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (fsck_err_on(a->gen != alloc_gen(k, gens_offset),
|
||||
trans, bucket_gens_key_wrong,
|
||||
"incorrect gen in bucket_gens btree (got %u should be %u)\n"
|
||||
" %s",
|
||||
"incorrect gen in bucket_gens btree (got %u should be %u)\n%s",
|
||||
alloc_gen(k, gens_offset), a->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
|
||||
@ -1253,9 +1251,9 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
|
||||
if (!ca->mi.freespace_initialized)
|
||||
return 0;
|
||||
|
||||
bch2_btree_iter_set_pos(freespace_iter, start);
|
||||
bch2_btree_iter_set_pos(trans, freespace_iter, start);
|
||||
|
||||
k = bch2_btree_iter_peek_slot(freespace_iter);
|
||||
k = bch2_btree_iter_peek_slot(trans, freespace_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1265,7 +1263,7 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
|
||||
if (fsck_err_on(k.k->type != KEY_TYPE_set,
|
||||
trans, freespace_hole_missing,
|
||||
"hole in alloc btree missing in freespace btree\n"
|
||||
" device %llu buckets %llu-%llu",
|
||||
"device %llu buckets %llu-%llu",
|
||||
freespace_iter->pos.inode,
|
||||
freespace_iter->pos.offset,
|
||||
end->offset)) {
|
||||
@ -1304,9 +1302,9 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
|
||||
unsigned i, gens_offset, gens_end_offset;
|
||||
int ret;
|
||||
|
||||
bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset));
|
||||
bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(start, &gens_offset));
|
||||
|
||||
k = bch2_btree_iter_peek_slot(bucket_gens_iter);
|
||||
k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1424,7 +1422,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite
|
||||
(state == BCH_DATA_free &&
|
||||
genbits != alloc_freespace_genbits(*a))) {
|
||||
if (fsck_err(trans, need_discard_freespace_key_bad,
|
||||
"%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
|
||||
"%s\nincorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
|
||||
(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
|
||||
bch2_btree_id_str(iter->btree_id),
|
||||
iter->pos.inode,
|
||||
@ -1439,7 +1437,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite
|
||||
*gen = a->gen;
|
||||
out:
|
||||
fsck_err:
|
||||
bch2_set_btree_iter_dontneed(&alloc_iter);
|
||||
bch2_set_btree_iter_dontneed(trans, &alloc_iter);
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
@ -1505,7 +1503,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
|
||||
struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode);
|
||||
if (!ca) {
|
||||
if (fsck_err(trans, bucket_gens_to_invalid_dev,
|
||||
"bucket_gens key for invalid device:\n %s",
|
||||
"bucket_gens key for invalid device:\n%s",
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
|
||||
ret = bch2_btree_delete_at(trans, iter, 0);
|
||||
goto out;
|
||||
@ -1514,7 +1512,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
|
||||
if (fsck_err_on(end <= ca->mi.first_bucket ||
|
||||
start >= ca->mi.nbuckets,
|
||||
trans, bucket_gens_to_invalid_buckets,
|
||||
"bucket_gens key for invalid buckets:\n %s",
|
||||
"bucket_gens key for invalid buckets:\n%s",
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||
ret = bch2_btree_delete_at(trans, iter, 0);
|
||||
goto out;
|
||||
@ -1576,7 +1574,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
k = bch2_get_key_or_real_bucket_hole(&iter, &ca, &hole);
|
||||
k = bch2_get_key_or_real_bucket_hole(trans, &iter, &ca, &hole);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto bkey_err;
|
||||
@ -1614,7 +1612,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
|
||||
if (ret)
|
||||
goto bkey_err;
|
||||
|
||||
bch2_btree_iter_set_pos(&iter, next);
|
||||
bch2_btree_iter_set_pos(trans, &iter, next);
|
||||
bkey_err:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
@ -1642,7 +1640,7 @@ bkey_err:
|
||||
BTREE_ITER_prefetch);
|
||||
while (1) {
|
||||
bch2_trans_begin(trans);
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
k = bch2_btree_iter_peek(trans, &iter);
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
@ -1661,7 +1659,7 @@ bkey_err:
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos));
|
||||
bch2_btree_iter_set_pos(trans, &iter, bpos_nosnap_successor(iter.pos));
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
if (ret)
|
||||
@ -1689,7 +1687,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret;
|
||||
|
||||
alloc_k = bch2_btree_iter_peek(alloc_iter);
|
||||
alloc_k = bch2_btree_iter_peek(trans, alloc_iter);
|
||||
if (!alloc_k.k)
|
||||
return 0;
|
||||
|
||||
@ -1705,7 +1703,8 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
|
||||
u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca);
|
||||
if (lru_idx) {
|
||||
ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START,
|
||||
ret = bch2_lru_check_set(trans, BCH_LRU_BUCKET_FRAGMENTATION,
|
||||
bucket_to_u64(alloc_k.k->p),
|
||||
lru_idx, alloc_k, last_flushed);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1716,8 +1715,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
|
||||
if (fsck_err_on(!a->io_time[READ],
|
||||
trans, alloc_key_cached_but_read_time_zero,
|
||||
"cached bucket with read_time 0\n"
|
||||
" %s",
|
||||
"cached bucket with read_time 0\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
|
||||
struct bkey_i_alloc_v4 *a_mut =
|
||||
@ -1735,7 +1733,9 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
a = &a_mut->v;
|
||||
}
|
||||
|
||||
ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ],
|
||||
ret = bch2_lru_check_set(trans, alloc_k.k->p.inode,
|
||||
bucket_to_u64(alloc_k.k->p),
|
||||
a->io_time[READ],
|
||||
alloc_k, last_flushed);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1757,7 +1757,8 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
|
||||
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
|
||||
POS_MIN, BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
|
||||
bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?:
|
||||
bch2_check_stripe_to_lru_refs(c);
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
bch_err_fn(c, ret);
|
||||
@ -1805,6 +1806,19 @@ struct discard_buckets_state {
|
||||
u64 discarded;
|
||||
};
|
||||
|
||||
/*
|
||||
* This is needed because discard is both a filesystem option and a device
|
||||
* option, and mount options are supposed to apply to that mount and not be
|
||||
* persisted, i.e. if it's set as a mount option we can't propagate it to the
|
||||
* device.
|
||||
*/
|
||||
static inline bool discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
return test_bit(BCH_FS_discard_mount_opt_set, &c->flags)
|
||||
? c->opts.discard
|
||||
: ca->mi.discard;
|
||||
}
|
||||
|
||||
static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct btree_iter *need_discard_iter,
|
||||
@ -1814,7 +1828,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bpos pos = need_discard_iter->pos;
|
||||
struct btree_iter iter = { NULL };
|
||||
struct btree_iter iter = {};
|
||||
struct bkey_s_c k;
|
||||
struct bkey_i_alloc_v4 *a;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
@ -1868,7 +1882,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
s->discarded++;
|
||||
*discard_pos_done = iter.pos;
|
||||
|
||||
if (ca->mi.discard && !c->opts.nochanges) {
|
||||
if (discard_opt_enabled(c, ca) && !c->opts.nochanges) {
|
||||
/*
|
||||
* This works without any other locks because this is the only
|
||||
* thread that removes items from the need_discard tree
|
||||
@ -1938,7 +1952,7 @@ static void bch2_do_discards_work(struct work_struct *work)
|
||||
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
|
||||
bch2_err_str(ret));
|
||||
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[WRITE]);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
|
||||
}
|
||||
|
||||
@ -1955,7 +1969,7 @@ void bch2_dev_do_discards(struct bch_dev *ca)
|
||||
if (queue_work(c->write_ref_wq, &ca->discard_work))
|
||||
return;
|
||||
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[WRITE]);
|
||||
put_write_ref:
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
|
||||
}
|
||||
@ -2033,7 +2047,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
|
||||
trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
|
||||
|
||||
bch2_trans_put(trans);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[WRITE]);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
|
||||
}
|
||||
|
||||
@ -2053,21 +2067,79 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
|
||||
if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
|
||||
return;
|
||||
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[WRITE]);
|
||||
put_ref:
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
|
||||
}
|
||||
|
||||
static int invalidate_one_bp(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct bkey_s_c_backpointer bp,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct btree_iter extent_iter;
|
||||
struct bkey_s_c extent_k =
|
||||
bch2_backpointer_get_key(trans, bp, &extent_iter, 0, last_flushed);
|
||||
int ret = bkey_err(extent_k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!extent_k.k)
|
||||
return 0;
|
||||
|
||||
struct bkey_i *n =
|
||||
bch2_bkey_make_mut(trans, &extent_iter, &extent_k,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_bkey_drop_device(bkey_i_to_s(n), ca->dev_idx);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &extent_iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int invalidate_one_bucket_by_bps(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct bpos bucket,
|
||||
u8 gen,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct bpos bp_start = bucket_pos_to_bp_start(ca, bucket);
|
||||
struct bpos bp_end = bucket_pos_to_bp_end(ca, bucket);
|
||||
|
||||
return for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers,
|
||||
bp_start, bp_end, 0, k,
|
||||
NULL, NULL,
|
||||
BCH_WATERMARK_btree|
|
||||
BCH_TRANS_COMMIT_no_enospc, ({
|
||||
if (k.k->type != KEY_TYPE_backpointer)
|
||||
continue;
|
||||
|
||||
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
|
||||
|
||||
if (bp.v->bucket_gen != gen)
|
||||
continue;
|
||||
|
||||
/* filter out bps with gens that don't match */
|
||||
|
||||
invalidate_one_bp(trans, ca, bp, last_flushed);
|
||||
}));
|
||||
}
|
||||
|
||||
noinline_for_stack
|
||||
static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct btree_iter *lru_iter,
|
||||
struct bkey_s_c lru_k,
|
||||
struct bkey_buf *last_flushed,
|
||||
s64 *nr_to_invalidate)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i_alloc_v4 *a = NULL;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
|
||||
unsigned cached_sectors;
|
||||
struct btree_iter alloc_iter = {};
|
||||
int ret = 0;
|
||||
|
||||
if (*nr_to_invalidate <= 0)
|
||||
@ -2084,35 +2156,37 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
|
||||
return 0;
|
||||
|
||||
a = bch2_trans_start_alloc_update(trans, bucket, BTREE_TRIGGER_bucket_invalidate);
|
||||
ret = PTR_ERR_OR_ZERO(a);
|
||||
struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter,
|
||||
BTREE_ID_alloc, bucket,
|
||||
BTREE_ITER_cached);
|
||||
ret = bkey_err(alloc_k);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
struct bch_alloc_v4 a_convert;
|
||||
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
|
||||
|
||||
/* We expect harmless races here due to the btree write buffer: */
|
||||
if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(a->v))
|
||||
if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(*a))
|
||||
goto out;
|
||||
|
||||
BUG_ON(a->v.data_type != BCH_DATA_cached);
|
||||
BUG_ON(a->v.dirty_sectors);
|
||||
/*
|
||||
* Impossible since alloc_lru_idx_read() only returns nonzero if the
|
||||
* bucket is supposed to be on the cached bucket LRU (i.e.
|
||||
* BCH_DATA_cached)
|
||||
*
|
||||
* bch2_lru_validate() also disallows lru keys with lru_pos_time() == 0
|
||||
*/
|
||||
BUG_ON(a->data_type != BCH_DATA_cached);
|
||||
BUG_ON(a->dirty_sectors);
|
||||
|
||||
if (!a->v.cached_sectors)
|
||||
if (!a->cached_sectors)
|
||||
bch_err(c, "invalidating empty bucket, confused");
|
||||
|
||||
cached_sectors = a->v.cached_sectors;
|
||||
unsigned cached_sectors = a->cached_sectors;
|
||||
u8 gen = a->gen;
|
||||
|
||||
SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
|
||||
a->v.gen++;
|
||||
a->v.data_type = 0;
|
||||
a->v.dirty_sectors = 0;
|
||||
a->v.stripe_sectors = 0;
|
||||
a->v.cached_sectors = 0;
|
||||
a->v.io_time[READ] = bch2_current_io_time(c, READ);
|
||||
a->v.io_time[WRITE] = bch2_current_io_time(c, WRITE);
|
||||
|
||||
ret = bch2_trans_commit(trans, NULL, NULL,
|
||||
BCH_WATERMARK_btree|
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
ret = invalidate_one_bucket_by_bps(trans, ca, bucket, gen, last_flushed);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -2120,6 +2194,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
--*nr_to_invalidate;
|
||||
out:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
@ -2129,9 +2204,9 @@ static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter
|
||||
{
|
||||
struct bkey_s_c k;
|
||||
again:
|
||||
k = bch2_btree_iter_peek_max(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX));
|
||||
k = bch2_btree_iter_peek_max(trans, iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX));
|
||||
if (!k.k && !*wrapped) {
|
||||
bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0));
|
||||
bch2_btree_iter_set_pos(trans, iter, lru_pos(ca->dev_idx, 0, 0));
|
||||
*wrapped = true;
|
||||
goto again;
|
||||
}
|
||||
@ -2146,6 +2221,10 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
int ret = 0;
|
||||
|
||||
struct bkey_buf last_flushed;
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
|
||||
ret = bch2_btree_write_buffer_tryflush(trans);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -2170,19 +2249,20 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
|
||||
ret = invalidate_one_bucket(trans, ca, &iter, k, &last_flushed, &nr_to_invalidate);
|
||||
restart_err:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bch2_btree_iter_advance(&iter);
|
||||
bch2_btree_iter_advance(trans, &iter);
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[WRITE]);
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||
}
|
||||
|
||||
@ -2199,7 +2279,7 @@ void bch2_dev_do_invalidates(struct bch_dev *ca)
|
||||
if (queue_work(c->write_ref_wq, &ca->invalidate_work))
|
||||
return;
|
||||
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[WRITE]);
|
||||
put_ref:
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||
}
|
||||
@ -2246,7 +2326,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
|
||||
break;
|
||||
}
|
||||
|
||||
k = bch2_get_key_or_hole(&iter, end, &hole);
|
||||
k = bch2_get_key_or_hole(trans, &iter, end, &hole);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto bkey_err;
|
||||
@ -2265,7 +2345,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (ret)
|
||||
goto bkey_err;
|
||||
|
||||
bch2_btree_iter_advance(&iter);
|
||||
bch2_btree_iter_advance(trans, &iter);
|
||||
} else {
|
||||
struct bkey_i *freespace;
|
||||
|
||||
@ -2285,7 +2365,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (ret)
|
||||
goto bkey_err;
|
||||
|
||||
bch2_btree_iter_set_pos(&iter, k.k->p);
|
||||
bch2_btree_iter_set_pos(trans, &iter, k.k->p);
|
||||
}
|
||||
bkey_err:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
@ -2431,7 +2511,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
|
||||
bch2_set_ra_pages(c, ra_pages);
|
||||
|
||||
for_each_rw_member(c, ca) {
|
||||
__for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
|
||||
u64 dev_reserve = 0;
|
||||
|
||||
/*
|
||||
@ -2513,15 +2593,22 @@ static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_dev_allocator_set_rw(struct bch_fs *c, struct bch_dev *ca, bool rw)
|
||||
{
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
|
||||
if (rw && (ca->mi.data_allowed & BIT(i)))
|
||||
set_bit(ca->dev_idx, c->rw_devs[i].d);
|
||||
else
|
||||
clear_bit(ca->dev_idx, c->rw_devs[i].d);
|
||||
}
|
||||
|
||||
/* device goes ro: */
|
||||
void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
/* First, remove device from allocation groups: */
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
|
||||
clear_bit(ca->dev_idx, c->rw_devs[i].d);
|
||||
bch2_dev_allocator_set_rw(c, ca, false);
|
||||
|
||||
c->rw_devs_change_count++;
|
||||
|
||||
@ -2555,10 +2642,7 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
|
||||
if (ca->mi.data_allowed & (1 << i))
|
||||
set_bit(ca->dev_idx, c->rw_devs[i].d);
|
||||
|
||||
bch2_dev_allocator_set_rw(c, ca, true);
|
||||
c->rw_devs_change_count++;
|
||||
}
|
||||
|
||||
|
@ -131,7 +131,7 @@ static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
|
||||
if (a.stripe)
|
||||
return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe;
|
||||
if (bch2_bucket_sectors_dirty(a))
|
||||
return data_type;
|
||||
return bucket_data_type(data_type);
|
||||
if (a.cached_sectors)
|
||||
return BCH_DATA_cached;
|
||||
if (BCH_ALLOC_V4_NEED_DISCARD(&a))
|
||||
@ -321,11 +321,11 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
|
||||
{
|
||||
u64 want_free = ca->mi.nbuckets >> 7;
|
||||
u64 free = max_t(s64, 0,
|
||||
u.d[BCH_DATA_free].buckets
|
||||
+ u.d[BCH_DATA_need_discard].buckets
|
||||
u.buckets[BCH_DATA_free]
|
||||
+ u.buckets[BCH_DATA_need_discard]
|
||||
- bch2_dev_buckets_reserved(ca, BCH_WATERMARK_stripe));
|
||||
|
||||
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
|
||||
return clamp_t(s64, want_free - free, 0, u.buckets[BCH_DATA_cached]);
|
||||
}
|
||||
|
||||
void bch2_dev_do_invalidates(struct bch_dev *);
|
||||
@ -350,6 +350,7 @@ int bch2_dev_remove_alloc(struct bch_fs *, struct bch_dev *);
|
||||
void bch2_recalc_capacity(struct bch_fs *);
|
||||
u64 bch2_min_rw_member_capacity(struct bch_fs *);
|
||||
|
||||
void bch2_dev_allocator_set_rw(struct bch_fs *, struct bch_dev *, bool);
|
||||
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
|
||||
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -5,6 +5,7 @@
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_types.h"
|
||||
#include "extents.h"
|
||||
#include "io_write_types.h"
|
||||
#include "sb-members.h"
|
||||
|
||||
#include <linux/hash.h>
|
||||
@ -23,6 +24,52 @@ struct dev_alloc_list {
|
||||
u8 data[BCH_SB_MEMBERS_MAX];
|
||||
};
|
||||
|
||||
struct alloc_request {
|
||||
unsigned nr_replicas;
|
||||
unsigned target;
|
||||
bool ec;
|
||||
enum bch_watermark watermark;
|
||||
enum bch_write_flags flags;
|
||||
enum bch_data_type data_type;
|
||||
struct bch_devs_list *devs_have;
|
||||
struct write_point *wp;
|
||||
|
||||
/* These fields are used primarily by open_bucket_add_buckets */
|
||||
struct open_buckets ptrs;
|
||||
unsigned nr_effective; /* sum of @ptrs durability */
|
||||
bool have_cache; /* have we allocated from a 0 durability dev */
|
||||
struct bch_devs_mask devs_may_alloc;
|
||||
|
||||
/* bch2_bucket_alloc_set_trans(): */
|
||||
struct bch_dev_usage usage;
|
||||
|
||||
/* bch2_bucket_alloc_trans(): */
|
||||
struct bch_dev *ca;
|
||||
|
||||
enum {
|
||||
BTREE_BITMAP_NO,
|
||||
BTREE_BITMAP_YES,
|
||||
BTREE_BITMAP_ANY,
|
||||
} btree_bitmap;
|
||||
|
||||
struct {
|
||||
u64 buckets_seen;
|
||||
u64 skipped_open;
|
||||
u64 skipped_need_journal_commit;
|
||||
u64 need_journal_commit;
|
||||
u64 skipped_nocow;
|
||||
u64 skipped_nouse;
|
||||
u64 skipped_mi_btree_bitmap;
|
||||
} counters;
|
||||
|
||||
unsigned scratch_nr_replicas;
|
||||
unsigned scratch_nr_effective;
|
||||
bool scratch_have_cache;
|
||||
enum bch_data_type scratch_data_type;
|
||||
struct open_buckets scratch_ptrs;
|
||||
struct bch_devs_mask scratch_devs_may_alloc;
|
||||
};
|
||||
|
||||
struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
|
||||
struct dev_stripe_state *,
|
||||
struct bch_devs_mask *);
|
||||
@ -82,7 +129,7 @@ static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
|
||||
}
|
||||
|
||||
void bch2_open_bucket_write_error(struct bch_fs *,
|
||||
struct open_buckets *, unsigned);
|
||||
struct open_buckets *, unsigned, int);
|
||||
|
||||
void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
|
||||
|
||||
@ -171,11 +218,8 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
|
||||
}
|
||||
|
||||
enum bch_write_flags;
|
||||
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
|
||||
struct dev_stripe_state *, struct bch_devs_mask *,
|
||||
unsigned, unsigned *, bool *, enum bch_write_flags,
|
||||
enum bch_data_type, enum bch_watermark,
|
||||
struct closure *);
|
||||
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct alloc_request *,
|
||||
struct dev_stripe_state *, struct closure *);
|
||||
|
||||
int bch2_alloc_sectors_start_trans(struct btree_trans *,
|
||||
unsigned, unsigned,
|
||||
|
@ -8,22 +8,6 @@
|
||||
#include "clock_types.h"
|
||||
#include "fifo.h"
|
||||
|
||||
struct bucket_alloc_state {
|
||||
enum {
|
||||
BTREE_BITMAP_NO,
|
||||
BTREE_BITMAP_YES,
|
||||
BTREE_BITMAP_ANY,
|
||||
} btree_bitmap;
|
||||
|
||||
u64 buckets_seen;
|
||||
u64 skipped_open;
|
||||
u64 skipped_need_journal_commit;
|
||||
u64 need_journal_commit;
|
||||
u64 skipped_nocow;
|
||||
u64 skipped_nouse;
|
||||
u64 skipped_mi_btree_bitmap;
|
||||
};
|
||||
|
||||
#define BCH_WATERMARKS() \
|
||||
x(stripe) \
|
||||
x(normal) \
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "checksum.h"
|
||||
#include "disk_accounting.h"
|
||||
#include "error.h"
|
||||
#include "progress.h"
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
||||
@ -49,6 +50,8 @@ void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bke
|
||||
}
|
||||
|
||||
bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level);
|
||||
prt_str(out, " data_type=");
|
||||
bch2_prt_data_type(out, bp.v->data_type);
|
||||
prt_printf(out, " suboffset=%u len=%u gen=%u pos=",
|
||||
(u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
|
||||
bp.v->bucket_len,
|
||||
@ -93,6 +96,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
if (insert) {
|
||||
prt_printf(&buf, "existing backpointer found when inserting ");
|
||||
@ -122,17 +126,15 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
|
||||
|
||||
prt_printf(&buf, "for ");
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
}
|
||||
|
||||
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers &&
|
||||
__bch2_inconsistent_error(c, &buf))
|
||||
ret = -BCH_ERR_erofs_unfixed_errors;
|
||||
|
||||
bch_err(c, "%s", buf.buf);
|
||||
}
|
||||
|
||||
printbuf_exit(&buf);
|
||||
|
||||
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
|
||||
return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
|
||||
@ -207,11 +209,11 @@ static int backpointer_target_not_found(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
|
||||
prt_printf(&buf, "backpointer doesn't match %s it points to:\n",
|
||||
bp.v->level ? "btree node" : "extent");
|
||||
bch2_bkey_val_to_text(&buf, c, bp.s_c);
|
||||
|
||||
prt_printf(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, target_k);
|
||||
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k);
|
||||
@ -219,7 +221,7 @@ static int backpointer_target_not_found(struct btree_trans *trans,
|
||||
struct extent_ptr_decoded p;
|
||||
bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry)
|
||||
if (p.ptr.dev == bp.k->p.inode) {
|
||||
prt_printf(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
struct bkey_i_backpointer bp2;
|
||||
bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i));
|
||||
@ -250,12 +252,24 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
|
||||
0,
|
||||
bp.v->level,
|
||||
iter_flags);
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter);
|
||||
if (bkey_err(k)) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return k;
|
||||
}
|
||||
|
||||
/*
|
||||
* peek_slot() doesn't normally return NULL - except when we ask for a
|
||||
* key at a btree level that doesn't exist.
|
||||
*
|
||||
* We may want to revisit this and change peek_slot():
|
||||
*/
|
||||
if (!k.k) {
|
||||
bkey_init(&iter->k);
|
||||
iter->k.p = bp.v->pos;
|
||||
k.k = &iter->k;
|
||||
}
|
||||
|
||||
if (k.k &&
|
||||
extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp))
|
||||
return k;
|
||||
@ -291,7 +305,7 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
|
||||
0,
|
||||
bp.v->level - 1,
|
||||
0);
|
||||
struct btree *b = bch2_btree_iter_peek_node(iter);
|
||||
struct btree *b = bch2_btree_iter_peek_node(trans, iter);
|
||||
if (IS_ERR_OR_NULL(b))
|
||||
goto err;
|
||||
|
||||
@ -319,7 +333,7 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st
|
||||
return 0;
|
||||
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter alloc_iter = { NULL };
|
||||
struct btree_iter alloc_iter = {};
|
||||
struct bkey_s_c alloc_k;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
@ -440,12 +454,11 @@ found:
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
|
||||
prt_printf(&buf, "\n ");
|
||||
prt_printf(&buf, "extents pointing to same space, but first extent checksum bad:\n");
|
||||
bch2_btree_id_to_text(&buf, btree);
|
||||
prt_str(&buf, " ");
|
||||
bch2_bkey_val_to_text(&buf, c, extent);
|
||||
prt_printf(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
bch2_btree_id_to_text(&buf, o_btree);
|
||||
prt_str(&buf, " ");
|
||||
bch2_bkey_val_to_text(&buf, c, extent2);
|
||||
@ -461,7 +474,7 @@ err:
|
||||
if (bio)
|
||||
bio_put(bio);
|
||||
kvfree(data_buf);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
@ -518,11 +531,27 @@ check_existing_bp:
|
||||
if (!other_extent.k)
|
||||
goto missing;
|
||||
|
||||
rcu_read_lock();
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp->k.p.inode);
|
||||
if (ca) {
|
||||
struct bkey_ptrs_c other_extent_ptrs = bch2_bkey_ptrs_c(other_extent);
|
||||
bkey_for_each_ptr(other_extent_ptrs, ptr)
|
||||
if (ptr->dev == bp->k.p.inode &&
|
||||
dev_ptr_stale_rcu(ca, ptr)) {
|
||||
ret = drop_dev_and_update(trans, other_bp.v->btree_id,
|
||||
other_extent, bp->k.p.inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (bch2_extents_match(orig_k, other_extent)) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n ");
|
||||
prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n");
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
prt_str(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, other_extent);
|
||||
bch_err(c, "%s", buf.buf);
|
||||
|
||||
@ -561,20 +590,20 @@ check_existing_bp:
|
||||
}
|
||||
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bp->k.p.inode);
|
||||
prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n", bp->k.p.inode);
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
prt_str(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, other_extent);
|
||||
bch_err(c, "%s", buf.buf);
|
||||
ret = -BCH_ERR_fsck_repair_unimplemented;
|
||||
goto err;
|
||||
missing:
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "missing backpointer\n for: ");
|
||||
prt_str(&buf, "missing backpointer\nfor: ");
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
prt_printf(&buf, "\n want: ");
|
||||
prt_printf(&buf, "\nwant: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i));
|
||||
prt_printf(&buf, "\n got: ");
|
||||
prt_printf(&buf, "\ngot: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bp_k);
|
||||
|
||||
if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf))
|
||||
@ -594,9 +623,6 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
struct extent_ptr_decoded p;
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
if (p.ptr.cached)
|
||||
continue;
|
||||
|
||||
if (p.ptr.dev == BCH_SB_MEMBER_INVALID)
|
||||
continue;
|
||||
|
||||
@ -604,9 +630,11 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
|
||||
bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches);
|
||||
bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty);
|
||||
|
||||
bool stale = p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr));
|
||||
rcu_read_unlock();
|
||||
|
||||
if (check || empty) {
|
||||
if ((check || empty) && !stale) {
|
||||
struct bkey_i_backpointer bp;
|
||||
bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
|
||||
|
||||
@ -634,7 +662,7 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans,
|
||||
retry:
|
||||
bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
|
||||
0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
|
||||
b = bch2_btree_iter_peek_node(&iter);
|
||||
b = bch2_btree_iter_peek_node(trans, &iter);
|
||||
ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -719,71 +747,6 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct progress_indicator_state {
|
||||
unsigned long next_print;
|
||||
u64 nodes_seen;
|
||||
u64 nodes_total;
|
||||
struct btree *last_node;
|
||||
};
|
||||
|
||||
static inline void progress_init(struct progress_indicator_state *s,
|
||||
struct bch_fs *c,
|
||||
u64 btree_id_mask)
|
||||
{
|
||||
memset(s, 0, sizeof(*s));
|
||||
|
||||
s->next_print = jiffies + HZ * 10;
|
||||
|
||||
for (unsigned i = 0; i < BTREE_ID_NR; i++) {
|
||||
if (!(btree_id_mask & BIT_ULL(i)))
|
||||
continue;
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_btree,
|
||||
.btree.id = i,
|
||||
};
|
||||
|
||||
u64 v;
|
||||
bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
|
||||
s->nodes_total += div64_ul(v, btree_sectors(c));
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool progress_update_p(struct progress_indicator_state *s)
|
||||
{
|
||||
bool ret = time_after_eq(jiffies, s->next_print);
|
||||
|
||||
if (ret)
|
||||
s->next_print = jiffies + HZ * 10;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void progress_update_iter(struct btree_trans *trans,
|
||||
struct progress_indicator_state *s,
|
||||
struct btree_iter *iter,
|
||||
const char *msg)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b = path_l(btree_iter_path(trans, iter))->b;
|
||||
|
||||
s->nodes_seen += b != s->last_node;
|
||||
s->last_node = b;
|
||||
|
||||
if (progress_update_p(s)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
unsigned percent = s->nodes_total
|
||||
? div64_u64(s->nodes_seen * 100, s->nodes_total)
|
||||
: 0;
|
||||
|
||||
prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ",
|
||||
msg, percent, s->nodes_seen, s->nodes_total);
|
||||
bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos));
|
||||
|
||||
bch_info(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
}
|
||||
|
||||
static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
struct extents_to_bp_state *s)
|
||||
{
|
||||
@ -791,7 +754,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
struct progress_indicator_state progress;
|
||||
int ret = 0;
|
||||
|
||||
progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
|
||||
bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
|
||||
|
||||
for (enum btree_id btree_id = 0;
|
||||
btree_id < btree_id_nr_alive(c);
|
||||
@ -810,7 +773,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
BTREE_ITER_prefetch);
|
||||
|
||||
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
||||
progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
|
||||
bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
|
||||
check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
}));
|
||||
@ -831,7 +794,7 @@ enum alloc_sector_counter {
|
||||
ALLOC_SECTORS_NR
|
||||
};
|
||||
|
||||
static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t)
|
||||
static int data_type_to_alloc_counter(enum bch_data_type t)
|
||||
{
|
||||
switch (t) {
|
||||
case BCH_DATA_btree:
|
||||
@ -840,9 +803,10 @@ static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t
|
||||
case BCH_DATA_cached:
|
||||
return ALLOC_cached;
|
||||
case BCH_DATA_stripe:
|
||||
case BCH_DATA_parity:
|
||||
return ALLOC_stripe;
|
||||
default:
|
||||
BUG();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -893,7 +857,11 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
if (bp.v->bucket_gen != a->gen)
|
||||
continue;
|
||||
|
||||
sectors[data_type_to_alloc_counter(bp.v->data_type)] += bp.v->bucket_len;
|
||||
int alloc_counter = data_type_to_alloc_counter(bp.v->data_type);
|
||||
if (alloc_counter < 0)
|
||||
continue;
|
||||
|
||||
sectors[alloc_counter] += bp.v->bucket_len;
|
||||
};
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
if (ret)
|
||||
@ -905,9 +873,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Cached pointers don't have backpointers: */
|
||||
|
||||
if (sectors[ALLOC_dirty] != a->dirty_sectors ||
|
||||
sectors[ALLOC_cached] != a->cached_sectors ||
|
||||
sectors[ALLOC_stripe] != a->stripe_sectors) {
|
||||
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) {
|
||||
ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
|
||||
@ -916,6 +883,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
}
|
||||
|
||||
if (sectors[ALLOC_dirty] > a->dirty_sectors ||
|
||||
sectors[ALLOC_cached] > a->cached_sectors ||
|
||||
sectors[ALLOC_stripe] > a->stripe_sectors) {
|
||||
ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
|
||||
-BCH_ERR_transaction_restart_nested;
|
||||
@ -923,7 +891,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
}
|
||||
|
||||
if (!sectors[ALLOC_dirty] &&
|
||||
!sectors[ALLOC_stripe])
|
||||
!sectors[ALLOC_stripe] &&
|
||||
!sectors[ALLOC_cached])
|
||||
__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty);
|
||||
else
|
||||
__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
|
||||
@ -977,7 +946,7 @@ static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k,
|
||||
{
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0);
|
||||
struct btree *b = bch2_btree_iter_peek_node(&iter);
|
||||
struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
|
||||
int ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1064,7 +1033,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
||||
* Can't allow devices to come/go/resize while we have bucket bitmaps
|
||||
* allocated
|
||||
*/
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
down_read(&c->state_lock);
|
||||
|
||||
for_each_member_device(c, ca) {
|
||||
BUG_ON(ca->bucket_backpointer_mismatches);
|
||||
@ -1149,6 +1118,7 @@ err_free_bitmaps:
|
||||
ca->bucket_backpointer_mismatches = NULL;
|
||||
}
|
||||
|
||||
up_read(&c->state_lock);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
@ -1210,11 +1180,11 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
|
||||
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
|
||||
bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
|
||||
|
||||
int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers,
|
||||
POS_MIN, BTREE_ITER_prefetch, k, ({
|
||||
progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
|
||||
bch2_progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
|
||||
check_one_backpointer(trans, start, end, k, &last_flushed);
|
||||
}));
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_H
|
||||
#define _BCACHEFS_BACKPOINTERS_H
|
||||
|
||||
#include "btree_cache.h"
|
||||
#include "btree_iter.h"
|
||||
@ -123,7 +123,12 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
|
||||
return BCH_DATA_btree;
|
||||
case KEY_TYPE_extent:
|
||||
case KEY_TYPE_reflink_v:
|
||||
return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user;
|
||||
if (p.has_ec)
|
||||
return BCH_DATA_stripe;
|
||||
if (p.ptr.cached)
|
||||
return BCH_DATA_cached;
|
||||
else
|
||||
return BCH_DATA_user;
|
||||
case KEY_TYPE_stripe: {
|
||||
const struct bch_extent_ptr *ptr = &entry->ptr;
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
@ -147,7 +152,20 @@ static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
|
||||
struct bkey_i_backpointer *bp)
|
||||
{
|
||||
bkey_backpointer_init(&bp->k_i);
|
||||
bp->k.p = POS(p.ptr.dev, ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset);
|
||||
bp->k.p.inode = p.ptr.dev;
|
||||
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
bp->k.p.offset = ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset;
|
||||
else {
|
||||
/*
|
||||
* Put stripe backpointers where they won't collide with the
|
||||
* extent backpointers within the stripe:
|
||||
*/
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
bp->k.p.offset = ((u64) (p.ptr.offset + le16_to_cpu(s.v->sectors)) <<
|
||||
MAX_EXTENT_COMPRESS_RATIO_SHIFT) - 1;
|
||||
}
|
||||
|
||||
bp->v = (struct bch_backpointer) {
|
||||
.btree_id = btree_id,
|
||||
.level = level,
|
||||
|
@ -203,6 +203,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/zstd.h>
|
||||
#include <linux/unicode.h>
|
||||
|
||||
#include "bcachefs_format.h"
|
||||
#include "btree_journal_iter_types.h"
|
||||
@ -268,7 +269,8 @@ do { \
|
||||
|
||||
#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n")
|
||||
|
||||
void bch2_print_str(struct bch_fs *, const char *);
|
||||
void bch2_print_str(struct bch_fs *, const char *, const char *);
|
||||
void bch2_print_str_nonblocking(struct bch_fs *, const char *, const char *);
|
||||
|
||||
__printf(2, 3)
|
||||
void bch2_print_opts(struct bch_opts *, const char *, ...);
|
||||
@ -444,6 +446,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(btree_node_sort) \
|
||||
x(btree_node_read) \
|
||||
x(btree_node_read_done) \
|
||||
x(btree_node_write) \
|
||||
x(btree_interior_update_foreground) \
|
||||
x(btree_interior_update_total) \
|
||||
x(btree_gc) \
|
||||
@ -456,6 +459,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(blocked_journal_low_on_space) \
|
||||
x(blocked_journal_low_on_pin) \
|
||||
x(blocked_journal_max_in_flight) \
|
||||
x(blocked_journal_max_open) \
|
||||
x(blocked_key_cache_flush) \
|
||||
x(blocked_allocate) \
|
||||
x(blocked_allocate_open_bucket) \
|
||||
@ -521,8 +525,8 @@ struct bch_dev {
|
||||
struct percpu_ref ref;
|
||||
#endif
|
||||
struct completion ref_completion;
|
||||
struct percpu_ref io_ref;
|
||||
struct completion io_ref_completion;
|
||||
struct percpu_ref io_ref[2];
|
||||
struct completion io_ref_completion[2];
|
||||
|
||||
struct bch_fs *fs;
|
||||
|
||||
@ -533,6 +537,7 @@ struct bch_dev {
|
||||
*/
|
||||
struct bch_member_cpu mi;
|
||||
atomic64_t errors[BCH_MEMBER_ERROR_NR];
|
||||
unsigned long write_errors_start;
|
||||
|
||||
__uuid_t uuid;
|
||||
char name[BDEVNAME_SIZE];
|
||||
@ -558,7 +563,8 @@ struct bch_dev {
|
||||
unsigned long *bucket_backpointer_mismatches;
|
||||
unsigned long *bucket_backpointer_empty;
|
||||
|
||||
struct bch_dev_usage __percpu *usage;
|
||||
struct bch_dev_usage_full __percpu
|
||||
*usage;
|
||||
|
||||
/* Allocator: */
|
||||
u64 alloc_cursor[3];
|
||||
@ -609,6 +615,7 @@ struct bch_dev {
|
||||
x(accounting_replay_done) \
|
||||
x(may_go_rw) \
|
||||
x(rw) \
|
||||
x(rw_init_done) \
|
||||
x(was_rw) \
|
||||
x(stopping) \
|
||||
x(emergency_ro) \
|
||||
@ -623,7 +630,8 @@ struct bch_dev {
|
||||
x(topology_error) \
|
||||
x(errors_fixed) \
|
||||
x(errors_not_fixed) \
|
||||
x(no_invalid_checks)
|
||||
x(no_invalid_checks) \
|
||||
x(discard_mount_opt_set) \
|
||||
|
||||
enum bch_fs_flags {
|
||||
#define x(n) BCH_FS_##n,
|
||||
@ -644,6 +652,9 @@ struct btree_transaction_stats {
|
||||
unsigned nr_max_paths;
|
||||
unsigned journal_entries_size;
|
||||
unsigned max_mem;
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_trans_kmalloc_trace trans_kmalloc_trace;
|
||||
#endif
|
||||
char *max_paths_text;
|
||||
};
|
||||
|
||||
@ -697,6 +708,8 @@ enum bch_write_ref {
|
||||
BCH_WRITE_REF_NR,
|
||||
};
|
||||
|
||||
#define BCH_FS_DEFAULT_UTF8_ENCODING UNICODE_AGE(12, 1, 0)
|
||||
|
||||
struct bch_fs {
|
||||
struct closure cl;
|
||||
|
||||
@ -768,6 +781,7 @@ struct bch_fs {
|
||||
|
||||
u8 nr_devices;
|
||||
u8 clean;
|
||||
bool multi_device; /* true if we've ever had more than one device */
|
||||
|
||||
u8 encryption_type;
|
||||
|
||||
@ -780,7 +794,12 @@ struct bch_fs {
|
||||
unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
|
||||
u64 btrees_lost_data;
|
||||
} sb;
|
||||
DARRAY(enum bcachefs_metadata_version)
|
||||
incompat_versions_requested;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
struct unicode_map *cf_encoding;
|
||||
#endif
|
||||
|
||||
struct bch_sb_handle disk_sb;
|
||||
|
||||
@ -861,7 +880,7 @@ struct bch_fs {
|
||||
struct btree_write_buffer btree_write_buffer;
|
||||
|
||||
struct workqueue_struct *btree_update_wq;
|
||||
struct workqueue_struct *btree_io_complete_wq;
|
||||
struct workqueue_struct *btree_write_complete_wq;
|
||||
/* copygc needs its own workqueue for index updates.. */
|
||||
struct workqueue_struct *copygc_wq;
|
||||
/*
|
||||
@ -970,9 +989,8 @@ struct bch_fs {
|
||||
mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR];
|
||||
size_t zstd_workspace_size;
|
||||
|
||||
struct crypto_shash *sha256;
|
||||
struct crypto_sync_skcipher *chacha20;
|
||||
struct crypto_shash *poly1305;
|
||||
struct bch_key chacha20_key;
|
||||
bool chacha20_key_set;
|
||||
|
||||
atomic64_t key_version;
|
||||
|
||||
@ -994,15 +1012,11 @@ struct bch_fs {
|
||||
wait_queue_head_t copygc_running_wq;
|
||||
|
||||
/* STRIPES: */
|
||||
GENRADIX(struct stripe) stripes;
|
||||
GENRADIX(struct gc_stripe) gc_stripes;
|
||||
|
||||
struct hlist_head ec_stripes_new[32];
|
||||
spinlock_t ec_stripes_new_lock;
|
||||
|
||||
ec_stripes_heap ec_stripes_heap;
|
||||
struct mutex ec_stripes_heap_lock;
|
||||
|
||||
/* ERASURE CODING */
|
||||
struct list_head ec_stripe_head_list;
|
||||
struct mutex ec_stripe_head_lock;
|
||||
|
@ -366,6 +366,10 @@ static inline void bkey_init(struct bkey *k)
|
||||
#define __BKEY_PADDED(key, pad) \
|
||||
struct bkey_i key; __u64 key ## _pad[pad]
|
||||
|
||||
enum bch_bkey_type_flags {
|
||||
BKEY_TYPE_strict_btree_checks = BIT(0),
|
||||
};
|
||||
|
||||
/*
|
||||
* - DELETED keys are used internally to mark keys that should be ignored but
|
||||
* override keys in composition order. Their version number is ignored.
|
||||
@ -384,45 +388,45 @@ static inline void bkey_init(struct bkey *k)
|
||||
* - WHITEOUT: for hash table btrees
|
||||
*/
|
||||
#define BCH_BKEY_TYPES() \
|
||||
x(deleted, 0) \
|
||||
x(whiteout, 1) \
|
||||
x(error, 2) \
|
||||
x(cookie, 3) \
|
||||
x(hash_whiteout, 4) \
|
||||
x(btree_ptr, 5) \
|
||||
x(extent, 6) \
|
||||
x(reservation, 7) \
|
||||
x(inode, 8) \
|
||||
x(inode_generation, 9) \
|
||||
x(dirent, 10) \
|
||||
x(xattr, 11) \
|
||||
x(alloc, 12) \
|
||||
x(quota, 13) \
|
||||
x(stripe, 14) \
|
||||
x(reflink_p, 15) \
|
||||
x(reflink_v, 16) \
|
||||
x(inline_data, 17) \
|
||||
x(btree_ptr_v2, 18) \
|
||||
x(indirect_inline_data, 19) \
|
||||
x(alloc_v2, 20) \
|
||||
x(subvolume, 21) \
|
||||
x(snapshot, 22) \
|
||||
x(inode_v2, 23) \
|
||||
x(alloc_v3, 24) \
|
||||
x(set, 25) \
|
||||
x(lru, 26) \
|
||||
x(alloc_v4, 27) \
|
||||
x(backpointer, 28) \
|
||||
x(inode_v3, 29) \
|
||||
x(bucket_gens, 30) \
|
||||
x(snapshot_tree, 31) \
|
||||
x(logged_op_truncate, 32) \
|
||||
x(logged_op_finsert, 33) \
|
||||
x(accounting, 34) \
|
||||
x(inode_alloc_cursor, 35)
|
||||
x(deleted, 0, 0) \
|
||||
x(whiteout, 1, 0) \
|
||||
x(error, 2, 0) \
|
||||
x(cookie, 3, 0) \
|
||||
x(hash_whiteout, 4, BKEY_TYPE_strict_btree_checks) \
|
||||
x(btree_ptr, 5, BKEY_TYPE_strict_btree_checks) \
|
||||
x(extent, 6, BKEY_TYPE_strict_btree_checks) \
|
||||
x(reservation, 7, BKEY_TYPE_strict_btree_checks) \
|
||||
x(inode, 8, BKEY_TYPE_strict_btree_checks) \
|
||||
x(inode_generation, 9, BKEY_TYPE_strict_btree_checks) \
|
||||
x(dirent, 10, BKEY_TYPE_strict_btree_checks) \
|
||||
x(xattr, 11, BKEY_TYPE_strict_btree_checks) \
|
||||
x(alloc, 12, BKEY_TYPE_strict_btree_checks) \
|
||||
x(quota, 13, BKEY_TYPE_strict_btree_checks) \
|
||||
x(stripe, 14, BKEY_TYPE_strict_btree_checks) \
|
||||
x(reflink_p, 15, BKEY_TYPE_strict_btree_checks) \
|
||||
x(reflink_v, 16, BKEY_TYPE_strict_btree_checks) \
|
||||
x(inline_data, 17, BKEY_TYPE_strict_btree_checks) \
|
||||
x(btree_ptr_v2, 18, BKEY_TYPE_strict_btree_checks) \
|
||||
x(indirect_inline_data, 19, BKEY_TYPE_strict_btree_checks) \
|
||||
x(alloc_v2, 20, BKEY_TYPE_strict_btree_checks) \
|
||||
x(subvolume, 21, BKEY_TYPE_strict_btree_checks) \
|
||||
x(snapshot, 22, BKEY_TYPE_strict_btree_checks) \
|
||||
x(inode_v2, 23, BKEY_TYPE_strict_btree_checks) \
|
||||
x(alloc_v3, 24, BKEY_TYPE_strict_btree_checks) \
|
||||
x(set, 25, 0) \
|
||||
x(lru, 26, BKEY_TYPE_strict_btree_checks) \
|
||||
x(alloc_v4, 27, BKEY_TYPE_strict_btree_checks) \
|
||||
x(backpointer, 28, BKEY_TYPE_strict_btree_checks) \
|
||||
x(inode_v3, 29, BKEY_TYPE_strict_btree_checks) \
|
||||
x(bucket_gens, 30, BKEY_TYPE_strict_btree_checks) \
|
||||
x(snapshot_tree, 31, BKEY_TYPE_strict_btree_checks) \
|
||||
x(logged_op_truncate, 32, BKEY_TYPE_strict_btree_checks) \
|
||||
x(logged_op_finsert, 33, BKEY_TYPE_strict_btree_checks) \
|
||||
x(accounting, 34, BKEY_TYPE_strict_btree_checks) \
|
||||
x(inode_alloc_cursor, 35, BKEY_TYPE_strict_btree_checks)
|
||||
|
||||
enum bch_bkey_type {
|
||||
#define x(name, nr) KEY_TYPE_##name = nr,
|
||||
#define x(name, nr, ...) KEY_TYPE_##name = nr,
|
||||
BCH_BKEY_TYPES()
|
||||
#undef x
|
||||
KEY_TYPE_MAX,
|
||||
@ -686,7 +690,12 @@ struct bch_sb_field_ext {
|
||||
x(inode_depth, BCH_VERSION(1, 17)) \
|
||||
x(persistent_inode_cursors, BCH_VERSION(1, 18)) \
|
||||
x(autofix_errors, BCH_VERSION(1, 19)) \
|
||||
x(directory_size, BCH_VERSION(1, 20))
|
||||
x(directory_size, BCH_VERSION(1, 20)) \
|
||||
x(cached_backpointers, BCH_VERSION(1, 21)) \
|
||||
x(stripe_backpointers, BCH_VERSION(1, 22)) \
|
||||
x(stripe_lru, BCH_VERSION(1, 23)) \
|
||||
x(casefolding, BCH_VERSION(1, 24)) \
|
||||
x(extent_flags, BCH_VERSION(1, 25))
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
@ -837,6 +846,7 @@ LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29);
|
||||
LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
|
||||
LE64_BITMASK(BCH_SB_MULTI_DEVICE, struct bch_sb, flags[3], 63, 64);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
|
||||
LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
|
||||
@ -855,6 +865,9 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48);
|
||||
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
|
||||
struct bch_sb, flags[5], 48, 64);
|
||||
LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
|
||||
LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14);
|
||||
LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20);
|
||||
LE64_BITMASK(BCH_SB_DEGRADED_ACTION, struct bch_sb, flags[6], 20, 22);
|
||||
|
||||
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
|
||||
{
|
||||
@ -908,7 +921,10 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u
|
||||
x(journal_no_flush, 16) \
|
||||
x(alloc_v2, 17) \
|
||||
x(extents_across_btree_nodes, 18) \
|
||||
x(incompat_version_field, 19)
|
||||
x(incompat_version_field, 19) \
|
||||
x(casefolding, 20) \
|
||||
x(no_alloc_info, 21) \
|
||||
x(small_image, 22)
|
||||
|
||||
#define BCH_SB_FEATURES_ALWAYS \
|
||||
(BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \
|
||||
@ -922,7 +938,8 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u
|
||||
BIT_ULL(BCH_FEATURE_new_siphash)| \
|
||||
BIT_ULL(BCH_FEATURE_btree_ptr_v2)| \
|
||||
BIT_ULL(BCH_FEATURE_new_varint)| \
|
||||
BIT_ULL(BCH_FEATURE_journal_no_flush))
|
||||
BIT_ULL(BCH_FEATURE_journal_no_flush)| \
|
||||
BIT_ULL(BCH_FEATURE_incompat_version_field))
|
||||
|
||||
enum bch_sb_feature {
|
||||
#define x(f, n) BCH_FEATURE_##f,
|
||||
@ -974,6 +991,19 @@ enum bch_error_actions {
|
||||
BCH_ON_ERROR_NR
|
||||
};
|
||||
|
||||
#define BCH_DEGRADED_ACTIONS() \
|
||||
x(ask, 0) \
|
||||
x(yes, 1) \
|
||||
x(very, 2) \
|
||||
x(no, 3)
|
||||
|
||||
enum bch_degraded_actions {
|
||||
#define x(t, n) BCH_DEGRADED_##t = n,
|
||||
BCH_DEGRADED_ACTIONS()
|
||||
#undef x
|
||||
BCH_DEGRADED_ACTIONS_NR
|
||||
};
|
||||
|
||||
#define BCH_STR_HASH_TYPES() \
|
||||
x(crc32c, 0) \
|
||||
x(crc64, 1) \
|
||||
@ -1133,7 +1163,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
|
||||
x(log, 9) \
|
||||
x(overwrite, 10) \
|
||||
x(write_buffer_keys, 11) \
|
||||
x(datetime, 12)
|
||||
x(datetime, 12) \
|
||||
x(log_bkey, 13)
|
||||
|
||||
enum bch_jset_entry_type {
|
||||
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
|
||||
|
@ -234,6 +234,11 @@ enum bch_data_event {
|
||||
BCH_DATA_EVENT_NR = 1,
|
||||
};
|
||||
|
||||
enum data_progress_data_type_special {
|
||||
DATA_PROGRESS_DATA_TYPE_phys = 254,
|
||||
DATA_PROGRESS_DATA_TYPE_done = 255,
|
||||
};
|
||||
|
||||
struct bch_ioctl_data_progress {
|
||||
__u8 data_type;
|
||||
__u8 btree_id;
|
||||
|
@ -191,6 +191,7 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
|
||||
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
|
||||
{
|
||||
return bpos_eq(l.k->p, r.k->p) &&
|
||||
l.k->size == r.k->size &&
|
||||
bkey_bytes(l.k) == bkey_bytes(r.k) &&
|
||||
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
|
||||
}
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "xattr.h"
|
||||
|
||||
const char * const bch2_bkey_types[] = {
|
||||
#define x(name, nr) #name,
|
||||
#define x(name, nr, ...) #name,
|
||||
BCH_BKEY_TYPES()
|
||||
#undef x
|
||||
NULL
|
||||
@ -115,7 +115,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_
|
||||
})
|
||||
|
||||
const struct bkey_ops bch2_bkey_ops[] = {
|
||||
#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
|
||||
#define x(name, nr, ...) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
|
||||
BCH_BKEY_TYPES()
|
||||
#undef x
|
||||
};
|
||||
@ -155,6 +155,12 @@ static u64 bch2_key_types_allowed[] = {
|
||||
#undef x
|
||||
};
|
||||
|
||||
static const enum bch_bkey_type_flags bch2_bkey_type_flags[] = {
|
||||
#define x(name, nr, flags) [KEY_TYPE_##name] = flags,
|
||||
BCH_BKEY_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
const char *bch2_btree_node_type_str(enum btree_node_type type)
|
||||
{
|
||||
return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1);
|
||||
@ -177,8 +183,18 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
if (type >= BKEY_TYPE_NR)
|
||||
return 0;
|
||||
|
||||
bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX &&
|
||||
(type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) &&
|
||||
enum bch_bkey_type_flags bkey_flags = k.k->type < KEY_TYPE_MAX
|
||||
? bch2_bkey_type_flags[k.k->type]
|
||||
: 0;
|
||||
|
||||
bool strict_key_type_allowed =
|
||||
(from.flags & BCH_VALIDATE_commit) ||
|
||||
type == BKEY_TYPE_btree ||
|
||||
(from.btree < BTREE_ID_NR &&
|
||||
(bkey_flags & BKEY_TYPE_strict_btree_checks));
|
||||
|
||||
bkey_fsck_err_on(strict_key_type_allowed &&
|
||||
k.k->type < KEY_TYPE_MAX &&
|
||||
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)),
|
||||
c, bkey_invalid_type_for_btree,
|
||||
"invalid key type for btree %s (%s)",
|
||||
|
@ -15,14 +15,9 @@
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/seq_buf.h>
|
||||
#include <linux/swap.h>
|
||||
|
||||
#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
|
||||
do { \
|
||||
if (shrinker_counter) \
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_##counter]++; \
|
||||
} while (0)
|
||||
|
||||
const char * const bch2_btree_node_flags[] = {
|
||||
"typebit",
|
||||
"typebit",
|
||||
@ -203,7 +198,7 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bch2_btree_lock_init(&b->c, 0);
|
||||
bch2_btree_lock_init(&b->c, 0, GFP_KERNEL);
|
||||
|
||||
__bch2_btree_node_to_freelist(bc, b);
|
||||
return b;
|
||||
@ -350,78 +345,33 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
|
||||
return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params);
|
||||
}
|
||||
|
||||
/*
|
||||
* this version is for btree nodes that have already been freed (we're not
|
||||
* reaping a real btree node)
|
||||
*/
|
||||
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter)
|
||||
static int __btree_node_reclaim_checks(struct bch_fs *c, struct btree *b,
|
||||
bool flush, bool locked)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&bc->lock);
|
||||
wait_on_io:
|
||||
if (b->flags & ((1U << BTREE_NODE_dirty)|
|
||||
(1U << BTREE_NODE_read_in_flight)|
|
||||
(1U << BTREE_NODE_write_in_flight))) {
|
||||
if (!flush) {
|
||||
if (btree_node_dirty(b))
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
|
||||
else if (btree_node_read_in_flight(b))
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
|
||||
else if (btree_node_write_in_flight(b))
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
|
||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||
}
|
||||
|
||||
/* XXX: waiting on IO with btree cache lock held */
|
||||
bch2_btree_node_wait_on_read(b);
|
||||
bch2_btree_node_wait_on_write(b);
|
||||
}
|
||||
|
||||
if (!six_trylock_intent(&b->c.lock)) {
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent);
|
||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||
}
|
||||
|
||||
if (!six_trylock_write(&b->c.lock)) {
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(lock_write);
|
||||
goto out_unlock_intent;
|
||||
}
|
||||
|
||||
/* recheck under lock */
|
||||
if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
|
||||
(1U << BTREE_NODE_write_in_flight))) {
|
||||
if (!flush) {
|
||||
if (btree_node_read_in_flight(b))
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
|
||||
else if (btree_node_write_in_flight(b))
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
|
||||
goto out_unlock;
|
||||
}
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
goto wait_on_io;
|
||||
}
|
||||
|
||||
if (btree_node_noevict(b)) {
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(noevict);
|
||||
goto out_unlock;
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_noevict]++;
|
||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||
}
|
||||
if (btree_node_write_blocked(b)) {
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked);
|
||||
goto out_unlock;
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_write_blocked]++;
|
||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||
}
|
||||
if (btree_node_will_make_reachable(b)) {
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable);
|
||||
goto out_unlock;
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_will_make_reachable]++;
|
||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||
}
|
||||
|
||||
if (btree_node_dirty(b)) {
|
||||
if (!flush) {
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
|
||||
goto out_unlock;
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_dirty]++;
|
||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||
}
|
||||
|
||||
if (locked) {
|
||||
/*
|
||||
* Using the underscore version because we don't want to compact
|
||||
* bsets after the write, since this node is about to be evicted
|
||||
@ -434,31 +384,79 @@ wait_on_io:
|
||||
else
|
||||
__bch2_btree_node_write(c, b,
|
||||
BTREE_WRITE_cache_reclaim);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
goto wait_on_io;
|
||||
}
|
||||
out:
|
||||
if (b->hash_val && !ret)
|
||||
trace_and_count(c, btree_cache_reap, c, b);
|
||||
return ret;
|
||||
out_unlock:
|
||||
six_unlock_write(&b->c.lock);
|
||||
out_unlock_intent:
|
||||
six_unlock_intent(&b->c.lock);
|
||||
ret = -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
|
||||
(1U << BTREE_NODE_write_in_flight))) {
|
||||
if (!flush) {
|
||||
if (btree_node_read_in_flight(b))
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_read_in_flight]++;
|
||||
else if (btree_node_write_in_flight(b))
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_write_in_flight]++;
|
||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||
}
|
||||
|
||||
if (locked)
|
||||
return -EINTR;
|
||||
|
||||
/* XXX: waiting on IO with btree cache lock held */
|
||||
bch2_btree_node_wait_on_read(b);
|
||||
bch2_btree_node_wait_on_write(b);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter)
|
||||
/*
|
||||
* this version is for btree nodes that have already been freed (we're not
|
||||
* reaping a real btree node)
|
||||
*/
|
||||
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
|
||||
{
|
||||
return __btree_node_reclaim(c, b, false, shrinker_counter);
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&bc->lock);
|
||||
retry_unlocked:
|
||||
ret = __btree_node_reclaim_checks(c, b, flush, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!six_trylock_intent(&b->c.lock)) {
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_intent]++;
|
||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||
}
|
||||
|
||||
if (!six_trylock_write(&b->c.lock)) {
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_write]++;
|
||||
six_unlock_intent(&b->c.lock);
|
||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||
}
|
||||
|
||||
/* recheck under lock */
|
||||
ret = __btree_node_reclaim_checks(c, b, flush, true);
|
||||
if (ret) {
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
if (ret == -EINTR)
|
||||
goto retry_unlocked;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (b->hash_val && !ret)
|
||||
trace_and_count(c, btree_cache_reap, c, b);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
return __btree_node_reclaim(c, b, false);
|
||||
}
|
||||
|
||||
static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
return __btree_node_reclaim(c, b, true, false);
|
||||
return __btree_node_reclaim(c, b, true);
|
||||
}
|
||||
|
||||
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||
@ -490,7 +488,10 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||
* IO can always make forward progress:
|
||||
*/
|
||||
can_free = btree_cache_can_free(list);
|
||||
nr = min_t(unsigned long, nr, can_free);
|
||||
if (nr > can_free) {
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_cache_reserve] += nr - can_free;
|
||||
nr = can_free;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
list_for_each_entry_safe(b, t, &bc->freeable, list) {
|
||||
@ -506,7 +507,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||
if (touched >= nr)
|
||||
goto out;
|
||||
|
||||
if (!btree_node_reclaim(c, b, true)) {
|
||||
if (!btree_node_reclaim(c, b)) {
|
||||
btree_node_data_free(bc, b);
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
@ -522,7 +523,7 @@ restart:
|
||||
clear_btree_node_accessed(b);
|
||||
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++;
|
||||
--touched;;
|
||||
} else if (!btree_node_reclaim(c, b, true)) {
|
||||
} else if (!btree_node_reclaim(c, b)) {
|
||||
__bch2_btree_node_hash_remove(bc, b);
|
||||
__btree_node_data_free(bc, b);
|
||||
|
||||
@ -575,6 +576,19 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
|
||||
return btree_cache_can_free(list);
|
||||
}
|
||||
|
||||
static void bch2_btree_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
|
||||
{
|
||||
struct btree_cache_list *list = shrink->private_data;
|
||||
struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]);
|
||||
|
||||
char *cbuf;
|
||||
size_t buflen = seq_buf_get_buf(s, &cbuf);
|
||||
struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
|
||||
|
||||
bch2_btree_cache_to_text(&out, bc);
|
||||
seq_buf_commit(s, out.pos);
|
||||
}
|
||||
|
||||
void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
@ -610,6 +624,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
btree_node_write_in_flight(b));
|
||||
|
||||
btree_node_data_free(bc, b);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
BUG_ON(!bch2_journal_error(&c->journal) &&
|
||||
@ -665,6 +680,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
bc->live[0].shrink = shrink;
|
||||
shrink->count_objects = bch2_btree_cache_count;
|
||||
shrink->scan_objects = bch2_btree_cache_scan;
|
||||
shrink->to_text = bch2_btree_cache_shrinker_to_text;
|
||||
shrink->seeks = 2;
|
||||
shrink->private_data = &bc->live[0];
|
||||
shrinker_register(shrink);
|
||||
@ -675,6 +691,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
bc->live[1].shrink = shrink;
|
||||
shrink->count_objects = bch2_btree_cache_count;
|
||||
shrink->scan_objects = bch2_btree_cache_scan;
|
||||
shrink->to_text = bch2_btree_cache_shrinker_to_text;
|
||||
shrink->seeks = 8;
|
||||
shrink->private_data = &bc->live[1];
|
||||
shrinker_register(shrink);
|
||||
@ -754,7 +771,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++)
|
||||
list_for_each_entry_reverse(b, &bc->live[i].list, list)
|
||||
if (!btree_node_reclaim(c, b, false))
|
||||
if (!btree_node_reclaim(c, b))
|
||||
return b;
|
||||
|
||||
while (1) {
|
||||
@ -789,23 +806,24 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
|
||||
* disk node. Check the freed list before allocating a new one:
|
||||
*/
|
||||
list_for_each_entry(b, freed, list)
|
||||
if (!btree_node_reclaim(c, b, false)) {
|
||||
if (!btree_node_reclaim(c, b)) {
|
||||
list_del_init(&b->list);
|
||||
goto got_node;
|
||||
}
|
||||
|
||||
b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN);
|
||||
if (!b) {
|
||||
if (b) {
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_NOWAIT);
|
||||
} else {
|
||||
mutex_unlock(&bc->lock);
|
||||
bch2_trans_unlock(trans);
|
||||
b = __btree_node_mem_alloc(c, GFP_KERNEL);
|
||||
if (!b)
|
||||
goto err;
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL);
|
||||
mutex_lock(&bc->lock);
|
||||
}
|
||||
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0);
|
||||
|
||||
BUG_ON(!six_trylock_intent(&b->c.lock));
|
||||
BUG_ON(!six_trylock_write(&b->c.lock));
|
||||
|
||||
@ -815,7 +833,7 @@ got_node:
|
||||
* the list. Check if there's any freed nodes there:
|
||||
*/
|
||||
list_for_each_entry(b2, &bc->freeable, list)
|
||||
if (!btree_node_reclaim(c, b2, false)) {
|
||||
if (!btree_node_reclaim(c, b2)) {
|
||||
swap(b->data, b2->data);
|
||||
swap(b->aux_data, b2->aux_data);
|
||||
|
||||
@ -976,7 +994,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
|
||||
|
||||
/* Unlock before doing IO: */
|
||||
six_unlock_intent(&b->c.lock);
|
||||
bch2_trans_unlock_noassert(trans);
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
bch2_btree_node_read(trans, b, sync);
|
||||
|
||||
@ -1415,7 +1433,7 @@ void __bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, "%u", r->level);
|
||||
else
|
||||
prt_printf(out, "(unknown)");
|
||||
prt_printf(out, "\n ");
|
||||
prt_newline(out);
|
||||
|
||||
bch2_bkey_val_to_text(out, c, k);
|
||||
}
|
||||
@ -1491,9 +1509,10 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc
|
||||
|
||||
prt_btree_cache_line(out, c, "live:", bc->live[0].nr);
|
||||
prt_btree_cache_line(out, c, "pinned:", bc->live[1].nr);
|
||||
prt_btree_cache_line(out, c, "freeable:", bc->nr_freeable);
|
||||
prt_btree_cache_line(out, c, "reserve:", bc->nr_reserve);
|
||||
prt_btree_cache_line(out, c, "freed:", bc->nr_freeable);
|
||||
prt_btree_cache_line(out, c, "dirty:", atomic_long_read(&bc->nr_dirty));
|
||||
prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
|
||||
prt_printf(out, "cannibalize lock:\t%s\n", bc->alloc_lock ? "held" : "not held");
|
||||
prt_newline(out);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) {
|
||||
@ -1504,6 +1523,7 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc
|
||||
}
|
||||
|
||||
prt_newline(out);
|
||||
prt_printf(out, "counters since mount:\n");
|
||||
prt_printf(out, "freed:\t%zu\n", bc->nr_freed);
|
||||
prt_printf(out, "not freed:\n");
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "progress.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "reflink.h"
|
||||
#include "recovery.h"
|
||||
@ -212,15 +213,15 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree *
|
||||
|
||||
prt_printf(&buf, " at ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_printf(&buf, ":\n parent: ");
|
||||
prt_printf(&buf, ":\nparent: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
if (prev) {
|
||||
prt_printf(&buf, "\n prev: ");
|
||||
prt_printf(&buf, "\nprev: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key));
|
||||
}
|
||||
|
||||
prt_str(&buf, "\n next: ");
|
||||
prt_str(&buf, "\nnext: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key));
|
||||
|
||||
if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */
|
||||
@ -279,12 +280,12 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b,
|
||||
if (bpos_eq(child->key.k.p, b->key.k.p))
|
||||
return 0;
|
||||
|
||||
prt_printf(&buf, " at ");
|
||||
prt_printf(&buf, "\nat: ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_printf(&buf, ":\n parent: ");
|
||||
prt_printf(&buf, "\nparent: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
prt_str(&buf, "\n child: ");
|
||||
prt_str(&buf, "\nchild: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key));
|
||||
|
||||
if (mustfix_fsck_err(trans, btree_node_topology_bad_max_key,
|
||||
@ -350,8 +351,7 @@ again:
|
||||
|
||||
if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
|
||||
trans, btree_node_read_error,
|
||||
"Topology repair: unreadable btree node at\n"
|
||||
" %s",
|
||||
"Topology repair: unreadable btree node at\n%s",
|
||||
buf.buf)) {
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
cur = NULL;
|
||||
@ -611,7 +611,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
|
||||
if (fsck_err_on(btree_id != BTREE_ID_accounting &&
|
||||
k.k->bversion.lo > atomic64_read(&c->key_version),
|
||||
trans, bkey_version_in_future,
|
||||
"key version number higher than recorded %llu\n %s",
|
||||
"key version number higher than recorded %llu\n%s",
|
||||
atomic64_read(&c->key_version),
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
|
||||
atomic64_set(&c->key_version, k.k->bversion.lo);
|
||||
@ -619,7 +619,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
|
||||
|
||||
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),
|
||||
trans, btree_bitmap_not_marked,
|
||||
"btree ptr not marked in member info btree allocated bitmap\n %s",
|
||||
"btree ptr not marked in member info btree allocated bitmap\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k),
|
||||
buf.buf))) {
|
||||
@ -656,7 +656,9 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool initial)
|
||||
static int bch2_gc_btree(struct btree_trans *trans,
|
||||
struct progress_indicator_state *progress,
|
||||
enum btree_id btree, bool initial)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
unsigned target_depth = btree_node_type_has_triggers(__btree_node_type(0, btree)) ? 0 : 1;
|
||||
@ -673,6 +675,7 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in
|
||||
BTREE_ITER_prefetch);
|
||||
|
||||
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
||||
bch2_progress_update_iter(trans, progress, &iter, "check_allocations");
|
||||
gc_pos_set(c, gc_pos_btree(btree, level, k.k->p));
|
||||
bch2_gc_mark_key(trans, btree, level, &prev, &iter, k, initial);
|
||||
}));
|
||||
@ -688,7 +691,7 @@ retry_root:
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN,
|
||||
0, bch2_btree_id_root(c, btree)->b->c.level, 0);
|
||||
struct btree *b = bch2_btree_iter_peek_node(&iter);
|
||||
struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
|
||||
ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
goto err_root;
|
||||
@ -717,22 +720,24 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
|
||||
static int bch2_gc_btrees(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
enum btree_id ids[BTREE_ID_NR];
|
||||
struct printbuf buf = PRINTBUF;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
struct progress_indicator_state progress;
|
||||
bch2_progress_init(&progress, c, ~0ULL);
|
||||
|
||||
enum btree_id ids[BTREE_ID_NR];
|
||||
for (unsigned i = 0; i < BTREE_ID_NR; i++)
|
||||
ids[i] = i;
|
||||
bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
|
||||
|
||||
for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
||||
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
||||
unsigned btree = i < BTREE_ID_NR ? ids[i] : i;
|
||||
|
||||
if (IS_ERR_OR_NULL(bch2_btree_id_root(c, btree)->b))
|
||||
continue;
|
||||
|
||||
ret = bch2_gc_btree(trans, btree, true);
|
||||
ret = bch2_gc_btree(trans, &progress, btree, true);
|
||||
}
|
||||
|
||||
printbuf_exit(&buf);
|
||||
@ -1015,8 +1020,7 @@ int bch2_check_allocations(struct bch_fs *c)
|
||||
{
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
down_read(&c->state_lock);
|
||||
down_write(&c->gc_lock);
|
||||
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
@ -1054,6 +1058,7 @@ out:
|
||||
percpu_up_write(&c->mark_lock);
|
||||
|
||||
up_write(&c->gc_lock);
|
||||
up_read(&c->state_lock);
|
||||
|
||||
/*
|
||||
* At startup, allocations can happen directly instead of via the
|
||||
@ -1194,7 +1199,7 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
BCH_TRANS_COMMIT_no_enospc, ({
|
||||
ca = bch2_dev_iterate(c, ca, k.k->p.inode);
|
||||
if (!ca) {
|
||||
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
|
||||
bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
|
||||
continue;
|
||||
}
|
||||
bch2_alloc_write_oldest_gen(trans, ca, &iter, k);
|
||||
@ -1238,16 +1243,11 @@ void bch2_gc_gens_async(struct bch_fs *c)
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
|
||||
}
|
||||
|
||||
void bch2_fs_btree_gc_exit(struct bch_fs *c)
|
||||
{
|
||||
}
|
||||
|
||||
int bch2_fs_btree_gc_init(struct bch_fs *c)
|
||||
void bch2_fs_btree_gc_init_early(struct bch_fs *c)
|
||||
{
|
||||
seqcount_init(&c->gc_pos_lock);
|
||||
INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work);
|
||||
|
||||
init_rwsem(&c->gc_lock);
|
||||
mutex_init(&c->gc_gens_lock);
|
||||
return 0;
|
||||
}
|
||||
|
@ -83,7 +83,6 @@ void bch2_gc_pos_to_text(struct printbuf *, struct gc_pos *);
|
||||
int bch2_gc_gens(struct bch_fs *);
|
||||
void bch2_gc_gens_async(struct bch_fs *);
|
||||
|
||||
void bch2_fs_btree_gc_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_gc_init(struct bch_fs *);
|
||||
void bch2_fs_btree_gc_init_early(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_GC_H */
|
||||
|
@ -525,8 +525,6 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, "at btree ");
|
||||
bch2_btree_pos_to_text(out, c, b);
|
||||
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_printf(out, "\nnode offset %u/%u",
|
||||
b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)));
|
||||
if (i)
|
||||
@ -550,23 +548,7 @@ static int __btree_err(int ret,
|
||||
enum bch_sb_error_id err_type,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct printbuf out = PRINTBUF;
|
||||
bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes;
|
||||
va_list args;
|
||||
|
||||
btree_err_msg(&out, c, ca, b, i, k, b->written, write);
|
||||
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(&out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (write == WRITE) {
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = c->opts.errors == BCH_ON_ERROR_continue
|
||||
? 0
|
||||
: -BCH_ERR_fsck_errors_not_fixed;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
|
||||
ret = -BCH_ERR_btree_node_read_err_fixable;
|
||||
@ -576,6 +558,29 @@ static int __btree_err(int ret,
|
||||
if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable)
|
||||
bch2_sb_error_count(c, err_type);
|
||||
|
||||
struct printbuf out = PRINTBUF;
|
||||
if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) {
|
||||
printbuf_indent_add_nextline(&out, 2);
|
||||
#ifdef BCACHEFS_LOG_PREFIX
|
||||
prt_printf(&out, bch2_log_msg(c, ""));
|
||||
#endif
|
||||
}
|
||||
|
||||
btree_err_msg(&out, c, ca, b, i, k, b->written, write);
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(&out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (write == WRITE) {
|
||||
prt_str(&out, ", ");
|
||||
ret = __bch2_inconsistent_error(c, &out)
|
||||
? -BCH_ERR_fsck_errors_not_fixed
|
||||
: 0;
|
||||
silent = false;
|
||||
}
|
||||
|
||||
switch (ret) {
|
||||
case -BCH_ERR_btree_node_read_err_fixable:
|
||||
ret = !silent
|
||||
@ -585,25 +590,21 @@ static int __btree_err(int ret,
|
||||
ret != -BCH_ERR_fsck_ignore)
|
||||
goto fsck_err;
|
||||
ret = -BCH_ERR_fsck_fix;
|
||||
break;
|
||||
case -BCH_ERR_btree_node_read_err_want_retry:
|
||||
case -BCH_ERR_btree_node_read_err_must_retry:
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
break;
|
||||
goto out;
|
||||
case -BCH_ERR_btree_node_read_err_bad_node:
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = bch2_topology_error(c);
|
||||
prt_str(&out, ", ");
|
||||
ret = __bch2_topology_error(c, &out);
|
||||
if (ret)
|
||||
silent = false;
|
||||
break;
|
||||
case -BCH_ERR_btree_node_read_err_incompatible:
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed;
|
||||
silent = false;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (!silent)
|
||||
bch2_print_str(c, KERN_ERR, out.buf);
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&out);
|
||||
@ -817,7 +818,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
-BCH_ERR_btree_node_read_err_bad_node,
|
||||
c, ca, b, i, NULL,
|
||||
btree_node_bad_format,
|
||||
"invalid bkey format: %s\n %s", buf1.buf,
|
||||
"invalid bkey format: %s\n%s", buf1.buf,
|
||||
(printbuf_reset(&buf2),
|
||||
bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf));
|
||||
printbuf_reset(&buf1);
|
||||
@ -997,7 +998,7 @@ drop_this_key:
|
||||
}
|
||||
got_good_key:
|
||||
le16_add_cpu(&i->u64s, -next_good_key);
|
||||
memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k);
|
||||
memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k);
|
||||
set_btree_node_need_rewrite(b);
|
||||
}
|
||||
fsck_err:
|
||||
@ -1016,7 +1017,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
bool used_mempool, blacklisted;
|
||||
bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
|
||||
unsigned u64s;
|
||||
unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
|
||||
u64 max_journal_seq = 0;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
@ -1187,7 +1187,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
le64_to_cpu(i->journal_seq),
|
||||
b->written, b->written + sectors, ptr_written);
|
||||
|
||||
b->written += sectors;
|
||||
b->written = min(b->written + sectors, btree_sectors(c));
|
||||
|
||||
if (blacklisted && !first)
|
||||
continue;
|
||||
@ -1223,23 +1223,20 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
sorted = btree_bounce_alloc(c, btree_buf_bytes(b), &used_mempool);
|
||||
sorted->keys.u64s = 0;
|
||||
|
||||
set_btree_bset(b, b->set, &b->data->keys);
|
||||
|
||||
b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter);
|
||||
memset((uint8_t *)(sorted + 1) + b->nr.live_u64s * sizeof(u64), 0,
|
||||
btree_buf_bytes(b) -
|
||||
sizeof(struct btree_node) -
|
||||
b->nr.live_u64s * sizeof(u64));
|
||||
|
||||
u64s = le16_to_cpu(sorted->keys.u64s);
|
||||
b->data->keys.u64s = sorted->keys.u64s;
|
||||
*sorted = *b->data;
|
||||
sorted->keys.u64s = cpu_to_le16(u64s);
|
||||
swap(sorted, b->data);
|
||||
set_btree_bset(b, b->set, &b->data->keys);
|
||||
b->nsets = 1;
|
||||
b->data->keys.journal_seq = cpu_to_le64(max_journal_seq);
|
||||
|
||||
BUG_ON(b->nr.live_u64s != u64s);
|
||||
BUG_ON(b->nr.live_u64s != le16_to_cpu(b->data->keys.u64s));
|
||||
|
||||
btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted);
|
||||
|
||||
@ -1329,6 +1326,7 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
bch_info(c, "retrying read");
|
||||
ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ);
|
||||
rb->have_ioref = ca != NULL;
|
||||
rb->start_time = local_clock();
|
||||
bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
|
||||
bio->bi_iter.bi_sector = rb->pick.ptr.offset;
|
||||
bio->bi_iter.bi_size = btree_buf_bytes(b);
|
||||
@ -1339,17 +1337,22 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
} else {
|
||||
bio->bi_status = BLK_STS_REMOVED;
|
||||
}
|
||||
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
|
||||
rb->start_time, !bio->bi_status);
|
||||
start:
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch2_dev_io_err_on(ca && bio->bi_status, ca, BCH_MEMBER_ERROR_read,
|
||||
|
||||
if (ca && bio->bi_status)
|
||||
bch_err_dev_ratelimited(ca,
|
||||
"btree read error %s for %s",
|
||||
bch2_blk_status_to_str(bio->bi_status), buf.buf);
|
||||
if (rb->have_ioref)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
rb->have_ioref = false;
|
||||
|
||||
bch2_mark_io_failure(&failed, &rb->pick);
|
||||
bch2_mark_io_failure(&failed, &rb->pick, false);
|
||||
|
||||
can_retry = bch2_bkey_pick_read_device(c,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
@ -1401,12 +1404,11 @@ static void btree_node_read_endio(struct bio *bio)
|
||||
struct btree_read_bio *rb =
|
||||
container_of(bio, struct btree_read_bio, bio);
|
||||
struct bch_fs *c = rb->c;
|
||||
struct bch_dev *ca = rb->have_ioref
|
||||
? bch2_dev_have_ref(c, rb->pick.ptr.dev) : NULL;
|
||||
|
||||
if (rb->have_ioref) {
|
||||
struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev);
|
||||
|
||||
bch2_latency_acct(ca, rb->start_time, READ);
|
||||
}
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
|
||||
rb->start_time, !bio->bi_status);
|
||||
|
||||
queue_work(c->btree_read_complete_wq, &rb->work);
|
||||
}
|
||||
@ -1603,6 +1605,7 @@ static void btree_node_read_all_replicas_endio(struct bio *bio)
|
||||
struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev);
|
||||
|
||||
bch2_latency_acct(ca, rb->start_time, READ);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
}
|
||||
|
||||
ra->err[rb->idx] = bio->bi_status;
|
||||
@ -1902,7 +1905,8 @@ static void btree_node_scrub_work(struct work_struct *work)
|
||||
scrub->key.k->k.p, 0, scrub->level - 1, 0);
|
||||
|
||||
struct btree *b;
|
||||
int ret = lockrestart_do(trans, PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(&iter)));
|
||||
int ret = lockrestart_do(trans,
|
||||
PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(trans, &iter)));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -1910,7 +1914,7 @@ static void btree_node_scrub_work(struct work_struct *work)
|
||||
bch_err(c, "error validating btree node during scrub on %s at btree %s",
|
||||
scrub->ca->name, err.buf);
|
||||
|
||||
ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
|
||||
ret = bch2_btree_node_rewrite(trans, &iter, b, 0, 0);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
@ -1921,7 +1925,7 @@ err:
|
||||
printbuf_exit(&err);
|
||||
bch2_bkey_buf_exit(&scrub->key, c);;
|
||||
btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf);
|
||||
percpu_ref_put(&scrub->ca->io_ref);
|
||||
percpu_ref_put(&scrub->ca->io_ref[READ]);
|
||||
kfree(scrub);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
|
||||
}
|
||||
@ -1990,7 +1994,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
|
||||
return 0;
|
||||
err_free:
|
||||
btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
err:
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
|
||||
return ret;
|
||||
@ -2016,7 +2020,7 @@ static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
|
||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||
}
|
||||
|
||||
static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
static void __btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time)
|
||||
{
|
||||
struct btree_write *w = btree_prev_write(b);
|
||||
unsigned long old, new;
|
||||
@ -2024,6 +2028,9 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
|
||||
bch2_btree_complete_write(c, b, w);
|
||||
|
||||
if (start_time)
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_write], start_time);
|
||||
|
||||
old = READ_ONCE(b->flags);
|
||||
do {
|
||||
new = old;
|
||||
@ -2054,7 +2061,7 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
|
||||
}
|
||||
|
||||
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
@ -2062,7 +2069,7 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
|
||||
/* we don't need transaction context anymore after we got the lock. */
|
||||
bch2_trans_put(trans);
|
||||
__btree_node_write_done(c, b);
|
||||
__btree_node_write_done(c, b, start_time);
|
||||
six_unlock_read(&b->c.lock);
|
||||
}
|
||||
|
||||
@ -2072,6 +2079,7 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
container_of(work, struct btree_write_bio, work);
|
||||
struct bch_fs *c = wbio->wbio.c;
|
||||
struct btree *b = wbio->wbio.bio.bi_private;
|
||||
u64 start_time = wbio->start_time;
|
||||
int ret = 0;
|
||||
|
||||
btree_bounce_free(c,
|
||||
@ -2104,12 +2112,18 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
}
|
||||
out:
|
||||
bio_put(&wbio->wbio.bio);
|
||||
btree_node_write_done(c, b);
|
||||
btree_node_write_done(c, b, start_time);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c,
|
||||
"writing btree node: %s", bch2_err_str(ret));
|
||||
|
||||
if (!bch2_err_matches(ret, EROFS)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret));
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch2_fs_fatal_error(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2122,23 +2136,33 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
struct bch_fs *c = wbio->c;
|
||||
struct btree *b = wbio->bio.bi_private;
|
||||
struct bch_dev *ca = wbio->have_ioref ? bch2_dev_have_ref(c, wbio->dev) : NULL;
|
||||
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_write,
|
||||
wbio->submit_time, !bio->bi_status);
|
||||
|
||||
if (ca && bio->bi_status) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
buf.atomic++;
|
||||
prt_printf(&buf, "btree write error: %s\n ",
|
||||
bch2_blk_status_to_str(bio->bi_status));
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch_err_dev_ratelimited(ca, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
if (bio->bi_status) {
|
||||
unsigned long flags;
|
||||
|
||||
if (wbio->have_ioref)
|
||||
bch2_latency_acct(ca, wbio->submit_time, WRITE);
|
||||
|
||||
if (!ca ||
|
||||
bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
|
||||
"btree write error: %s",
|
||||
bch2_blk_status_to_str(bio->bi_status)) ||
|
||||
bch2_meta_write_fault("btree")) {
|
||||
spin_lock_irqsave(&c->btree_write_error_lock, flags);
|
||||
bch2_dev_list_add_dev(&orig->failed, wbio->dev);
|
||||
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: we should be using io_ref[WRITE], but we aren't retrying failed
|
||||
* btree writes yet (due to device removal/ro):
|
||||
*/
|
||||
if (wbio->have_ioref)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
|
||||
if (parent) {
|
||||
bio_put(bio);
|
||||
@ -2149,7 +2173,7 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
clear_btree_node_write_in_flight_inner(b);
|
||||
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner);
|
||||
INIT_WORK(&wb->work, btree_node_write_work);
|
||||
queue_work(c->btree_io_complete_wq, &wb->work);
|
||||
queue_work(c->btree_write_complete_wq, &wb->work);
|
||||
}
|
||||
|
||||
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
@ -2208,6 +2232,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
||||
bool validate_before_checksum = false;
|
||||
enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK;
|
||||
void *data;
|
||||
u64 start_time = local_clock();
|
||||
int ret;
|
||||
|
||||
if (flags & BTREE_WRITE_ALREADY_STARTED)
|
||||
@ -2416,6 +2441,7 @@ do_write:
|
||||
wbio->data = data;
|
||||
wbio->data_bytes = bytes;
|
||||
wbio->sector_offset = b->written;
|
||||
wbio->start_time = start_time;
|
||||
wbio->wbio.c = c;
|
||||
wbio->wbio.used_mempool = used_mempool;
|
||||
wbio->wbio.first_btree_write = !b->written;
|
||||
@ -2443,7 +2469,7 @@ err:
|
||||
b->written += sectors_to_write;
|
||||
nowrite:
|
||||
btree_bounce_free(c, bytes, used_mempool, data);
|
||||
__btree_node_write_done(c, b);
|
||||
__btree_node_write_done(c, b, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -52,6 +52,7 @@ struct btree_write_bio {
|
||||
void *data;
|
||||
unsigned data_bytes;
|
||||
unsigned sector_offset;
|
||||
u64 start_time;
|
||||
struct bch_write_bio wbio;
|
||||
};
|
||||
|
||||
|
@ -244,10 +244,8 @@ void bch2_trans_verify_paths(struct btree_trans *trans)
|
||||
bch2_btree_path_verify(trans, path);
|
||||
}
|
||||
|
||||
static void bch2_btree_iter_verify(struct btree_iter *iter)
|
||||
static void bch2_btree_iter_verify(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
|
||||
BUG_ON(!!(iter->flags & BTREE_ITER_cached) != btree_iter_path(trans, iter)->cached);
|
||||
|
||||
BUG_ON((iter->flags & BTREE_ITER_is_extents) &&
|
||||
@ -276,9 +274,9 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
|
||||
bkey_gt(iter->pos, iter->k.p)));
|
||||
}
|
||||
|
||||
static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k)
|
||||
static int bch2_btree_iter_verify_ret(struct btree_trans *trans,
|
||||
struct btree_iter *iter, struct bkey_s_c k)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct btree_iter copy;
|
||||
struct bkey_s_c prev;
|
||||
int ret = 0;
|
||||
@ -299,7 +297,7 @@ static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k
|
||||
bch2_trans_iter_init(trans, ©, iter->btree_id, iter->pos,
|
||||
BTREE_ITER_nopreserve|
|
||||
BTREE_ITER_all_snapshots);
|
||||
prev = bch2_btree_iter_prev(©);
|
||||
prev = bch2_btree_iter_prev(trans, ©);
|
||||
if (!prev.k)
|
||||
goto out;
|
||||
|
||||
@ -365,9 +363,11 @@ static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned l) {}
|
||||
static inline void bch2_btree_path_verify(struct btree_trans *trans,
|
||||
struct btree_path *path) {}
|
||||
static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
|
||||
static inline void bch2_btree_iter_verify(struct btree_trans *trans,
|
||||
struct btree_iter *iter) {}
|
||||
static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
|
||||
static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; }
|
||||
static inline int bch2_btree_iter_verify_ret(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_s_c k) { return 0; }
|
||||
|
||||
#endif
|
||||
|
||||
@ -562,20 +562,6 @@ static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
|
||||
bch2_btree_node_iter_peek_all(&l->iter, l->b));
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_path_level *l,
|
||||
struct bkey *u)
|
||||
{
|
||||
struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
|
||||
bch2_btree_node_iter_peek(&l->iter, l->b));
|
||||
|
||||
path->pos = k.k ? k.k->p : l->b->key.k.p;
|
||||
trans->paths_sorted = false;
|
||||
bch2_btree_path_verify_level(trans, path, l - path->l);
|
||||
return k;
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_path_level *l,
|
||||
@ -1501,22 +1487,14 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
|
||||
|
||||
for (struct jset_entry *e = trans->journal_entries;
|
||||
e != btree_trans_journal_entries_top(trans);
|
||||
e = vstruct_next(e))
|
||||
e = vstruct_next(e)) {
|
||||
bch2_journal_entry_to_text(buf, trans->c, e);
|
||||
prt_newline(buf);
|
||||
}
|
||||
|
||||
printbuf_indent_sub(buf, 2);
|
||||
}
|
||||
|
||||
noinline __cold
|
||||
void bch2_dump_trans_updates(struct btree_trans *trans)
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_trans_updates_to_text(&buf, trans);
|
||||
bch2_print_str(trans->c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx)
|
||||
{
|
||||
struct btree_path *path = trans->paths + path_idx;
|
||||
@ -1613,7 +1591,7 @@ void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort)
|
||||
__bch2_trans_paths_to_text(&buf, trans, nosort);
|
||||
bch2_trans_updates_to_text(&buf, trans);
|
||||
|
||||
bch2_print_str(trans->c, buf.buf);
|
||||
bch2_print_str(trans->c, KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
@ -1877,10 +1855,8 @@ hole:
|
||||
return (struct bkey_s_c) { u, NULL };
|
||||
}
|
||||
|
||||
void bch2_set_btree_iter_dontneed(struct btree_iter *iter)
|
||||
void bch2_set_btree_iter_dontneed(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
|
||||
if (!iter->path || trans->restarted)
|
||||
return;
|
||||
|
||||
@ -1892,17 +1868,14 @@ void bch2_set_btree_iter_dontneed(struct btree_iter *iter)
|
||||
/* Btree iterators: */
|
||||
|
||||
int __must_check
|
||||
__bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
__bch2_btree_iter_traverse(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
|
||||
return bch2_btree_path_traverse(trans, iter->path, iter->flags);
|
||||
}
|
||||
|
||||
int __must_check
|
||||
bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
bch2_btree_iter_traverse(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
int ret;
|
||||
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
|
||||
iter->path = bch2_btree_path_set_pos(trans, iter->path,
|
||||
@ -1910,7 +1883,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
iter->flags & BTREE_ITER_intent,
|
||||
btree_iter_ip_allocated(iter));
|
||||
|
||||
ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
|
||||
int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1922,14 +1895,14 @@ bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
|
||||
/* Iterate across nodes (leaf and interior nodes) */
|
||||
|
||||
struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
|
||||
struct btree *bch2_btree_iter_peek_node(struct btree_trans *trans,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct btree *b = NULL;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(trans->paths[iter->path].cached);
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify(trans, iter);
|
||||
|
||||
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
|
||||
if (ret)
|
||||
@ -1951,7 +1924,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
|
||||
btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter));
|
||||
out:
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify(trans, iter);
|
||||
|
||||
return b;
|
||||
err:
|
||||
@ -1960,26 +1933,26 @@ err:
|
||||
}
|
||||
|
||||
/* Only kept for -tools */
|
||||
struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter)
|
||||
struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_trans *trans,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
struct btree *b;
|
||||
|
||||
while (b = bch2_btree_iter_peek_node(iter),
|
||||
while (b = bch2_btree_iter_peek_node(trans, iter),
|
||||
bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
|
||||
bch2_trans_begin(iter->trans);
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
|
||||
struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct btree *b = NULL;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(trans->paths[iter->path].cached);
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify(trans, iter);
|
||||
|
||||
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
|
||||
if (ret)
|
||||
@ -2046,7 +2019,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
|
||||
EBUG_ON(btree_iter_path(trans, iter)->uptodate);
|
||||
out:
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify(trans, iter);
|
||||
|
||||
return b;
|
||||
err:
|
||||
@ -2056,7 +2029,7 @@ err:
|
||||
|
||||
/* Iterate across keys (in leaf nodes only) */
|
||||
|
||||
inline bool bch2_btree_iter_advance(struct btree_iter *iter)
|
||||
inline bool bch2_btree_iter_advance(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
struct bpos pos = iter->k.p;
|
||||
bool ret = !(iter->flags & BTREE_ITER_all_snapshots
|
||||
@ -2065,11 +2038,11 @@ inline bool bch2_btree_iter_advance(struct btree_iter *iter)
|
||||
|
||||
if (ret && !(iter->flags & BTREE_ITER_is_extents))
|
||||
pos = bkey_successor(iter, pos);
|
||||
bch2_btree_iter_set_pos(iter, pos);
|
||||
bch2_btree_iter_set_pos(trans, iter, pos);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
|
||||
inline bool bch2_btree_iter_rewind(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
struct bpos pos = bkey_start_pos(&iter->k);
|
||||
bool ret = !(iter->flags & BTREE_ITER_all_snapshots
|
||||
@ -2078,7 +2051,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
|
||||
|
||||
if (ret && !(iter->flags & BTREE_ITER_is_extents))
|
||||
pos = bkey_predecessor(iter, pos);
|
||||
bch2_btree_iter_set_pos(iter, pos);
|
||||
bch2_btree_iter_set_pos(trans, iter, pos);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2205,9 +2178,9 @@ void btree_trans_peek_prev_journal(struct btree_trans *trans,
|
||||
* bkey_s_c_null:
|
||||
*/
|
||||
static noinline
|
||||
struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
|
||||
struct bkey_s_c btree_trans_peek_key_cache(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bpos pos)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey u;
|
||||
struct bkey_s_c k;
|
||||
@ -2253,14 +2226,14 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
|
||||
return k;
|
||||
}
|
||||
|
||||
static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
|
||||
static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bpos search_key)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct bkey_s_c k, k2;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(btree_iter_path(trans, iter)->cached);
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify(trans, iter);
|
||||
|
||||
while (1) {
|
||||
iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
|
||||
@ -2270,7 +2243,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
|
||||
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
|
||||
if (unlikely(ret)) {
|
||||
/* ensure that iter->k is consistent with iter->pos: */
|
||||
bch2_btree_iter_set_pos(iter, iter->pos);
|
||||
bch2_btree_iter_set_pos(trans, iter, iter->pos);
|
||||
k = bkey_s_c_err(ret);
|
||||
break;
|
||||
}
|
||||
@ -2280,7 +2253,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
|
||||
|
||||
if (unlikely(!l->b)) {
|
||||
/* No btree nodes at requested level: */
|
||||
bch2_btree_iter_set_pos(iter, SPOS_MAX);
|
||||
bch2_btree_iter_set_pos(trans, iter, SPOS_MAX);
|
||||
k = bkey_s_c_null;
|
||||
break;
|
||||
}
|
||||
@ -2291,10 +2264,10 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
|
||||
|
||||
if (unlikely(iter->flags & BTREE_ITER_with_key_cache) &&
|
||||
k.k &&
|
||||
(k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
|
||||
(k2 = btree_trans_peek_key_cache(trans, iter, k.k->p)).k) {
|
||||
k = k2;
|
||||
if (bkey_err(k)) {
|
||||
bch2_btree_iter_set_pos(iter, iter->pos);
|
||||
bch2_btree_iter_set_pos(trans, iter, iter->pos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -2327,27 +2300,28 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
|
||||
search_key = bpos_successor(l->b->key.k.p);
|
||||
} else {
|
||||
/* End of btree: */
|
||||
bch2_btree_iter_set_pos(iter, SPOS_MAX);
|
||||
bch2_btree_iter_set_pos(trans, iter, SPOS_MAX);
|
||||
k = bkey_s_c_null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify(trans, iter);
|
||||
return k;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch2_btree_iter_peek_max() - returns first key greater than or equal to
|
||||
* iterator's current position
|
||||
* @trans: btree transaction object
|
||||
* @iter: iterator to peek from
|
||||
* @end: search limit: returns keys less than or equal to @end
|
||||
*
|
||||
* Returns: key if found, or an error extractable with bkey_err().
|
||||
*/
|
||||
struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos end)
|
||||
struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bpos end)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct bpos search_key = btree_iter_search_key(iter);
|
||||
struct bkey_s_c k;
|
||||
struct bpos iter_pos = iter->pos;
|
||||
@ -2357,6 +2331,12 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX));
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret)) {
|
||||
k = bkey_s_c_err(ret);
|
||||
goto out_no_locked;
|
||||
}
|
||||
|
||||
if (iter->update_path) {
|
||||
bch2_path_put_nokeep(trans, iter->update_path,
|
||||
iter->flags & BTREE_ITER_intent);
|
||||
@ -2364,7 +2344,7 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
|
||||
}
|
||||
|
||||
while (1) {
|
||||
k = __bch2_btree_iter_peek(iter, search_key);
|
||||
k = __bch2_btree_iter_peek(trans, iter, search_key);
|
||||
if (unlikely(!k.k))
|
||||
goto end;
|
||||
if (unlikely(bkey_err(k)))
|
||||
@ -2478,9 +2458,9 @@ out_no_locked:
|
||||
if (!(iter->flags & BTREE_ITER_all_snapshots))
|
||||
iter->pos.snapshot = iter->snapshot;
|
||||
|
||||
ret = bch2_btree_iter_verify_ret(iter, k);
|
||||
ret = bch2_btree_iter_verify_ret(trans, iter, k);
|
||||
if (unlikely(ret)) {
|
||||
bch2_btree_iter_set_pos(iter, iter->pos);
|
||||
bch2_btree_iter_set_pos(trans, iter, iter->pos);
|
||||
k = bkey_s_c_err(ret);
|
||||
}
|
||||
|
||||
@ -2488,7 +2468,7 @@ out_no_locked:
|
||||
|
||||
return k;
|
||||
end:
|
||||
bch2_btree_iter_set_pos(iter, end);
|
||||
bch2_btree_iter_set_pos(trans, iter, end);
|
||||
k = bkey_s_c_null;
|
||||
goto out_no_locked;
|
||||
}
|
||||
@ -2496,24 +2476,25 @@ end:
|
||||
/**
|
||||
* bch2_btree_iter_next() - returns first key greater than iterator's current
|
||||
* position
|
||||
* @trans: btree transaction object
|
||||
* @iter: iterator to peek from
|
||||
*
|
||||
* Returns: key if found, or an error extractable with bkey_err().
|
||||
*/
|
||||
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
|
||||
struct bkey_s_c bch2_btree_iter_next(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
if (!bch2_btree_iter_advance(iter))
|
||||
if (!bch2_btree_iter_advance(trans, iter))
|
||||
return bkey_s_c_null;
|
||||
|
||||
return bch2_btree_iter_peek(iter);
|
||||
return bch2_btree_iter_peek(trans, iter);
|
||||
}
|
||||
|
||||
static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, struct bpos search_key)
|
||||
static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bpos search_key)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct bkey_s_c k, k2;
|
||||
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify(trans, iter);
|
||||
|
||||
while (1) {
|
||||
iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
|
||||
@ -2523,7 +2504,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru
|
||||
int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
|
||||
if (unlikely(ret)) {
|
||||
/* ensure that iter->k is consistent with iter->pos: */
|
||||
bch2_btree_iter_set_pos(iter, iter->pos);
|
||||
bch2_btree_iter_set_pos(trans, iter, iter->pos);
|
||||
k = bkey_s_c_err(ret);
|
||||
break;
|
||||
}
|
||||
@ -2533,7 +2514,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru
|
||||
|
||||
if (unlikely(!l->b)) {
|
||||
/* No btree nodes at requested level: */
|
||||
bch2_btree_iter_set_pos(iter, SPOS_MAX);
|
||||
bch2_btree_iter_set_pos(trans, iter, SPOS_MAX);
|
||||
k = bkey_s_c_null;
|
||||
break;
|
||||
}
|
||||
@ -2549,10 +2530,10 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru
|
||||
|
||||
if (unlikely(iter->flags & BTREE_ITER_with_key_cache) &&
|
||||
k.k &&
|
||||
(k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
|
||||
(k2 = btree_trans_peek_key_cache(trans, iter, k.k->p)).k) {
|
||||
k = k2;
|
||||
if (bkey_err(k2)) {
|
||||
bch2_btree_iter_set_pos(iter, iter->pos);
|
||||
bch2_btree_iter_set_pos(trans, iter, iter->pos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -2573,25 +2554,27 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru
|
||||
search_key = bpos_predecessor(path->l[0].b->data->min_key);
|
||||
} else {
|
||||
/* Start of btree: */
|
||||
bch2_btree_iter_set_pos(iter, POS_MIN);
|
||||
bch2_btree_iter_set_pos(trans, iter, POS_MIN);
|
||||
k = bkey_s_c_null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify(trans, iter);
|
||||
return k;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch2_btree_iter_peek_prev_min() - returns first key less than or equal to
|
||||
* iterator's current position
|
||||
* @trans: btree transaction object
|
||||
* @iter: iterator to peek from
|
||||
* @end: search limit: returns keys greater than or equal to @end
|
||||
*
|
||||
* Returns: key if found, or an error extractable with bkey_err().
|
||||
*/
|
||||
struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bpos end)
|
||||
struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bpos end)
|
||||
{
|
||||
if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) &&
|
||||
!bkey_eq(iter->pos, POS_MAX)) {
|
||||
@ -2603,7 +2586,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
|
||||
* real visible extents - easiest to just use peek_slot() (which
|
||||
* internally uses peek() for extents)
|
||||
*/
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter);
|
||||
if (bkey_err(k))
|
||||
return k;
|
||||
|
||||
@ -2613,7 +2596,6 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
|
||||
return k;
|
||||
}
|
||||
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct bpos search_key = iter->pos;
|
||||
struct bkey_s_c k;
|
||||
btree_path_idx_t saved_path = 0;
|
||||
@ -2622,8 +2604,14 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN));
|
||||
|
||||
int ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret)) {
|
||||
k = bkey_s_c_err(ret);
|
||||
goto out_no_locked;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
k = __bch2_btree_iter_peek_prev(iter, search_key);
|
||||
k = __bch2_btree_iter_peek_prev(trans, iter, search_key);
|
||||
if (unlikely(!k.k))
|
||||
goto end;
|
||||
if (unlikely(bkey_err(k)))
|
||||
@ -2714,10 +2702,10 @@ out_no_locked:
|
||||
bch2_path_put_nokeep(trans, saved_path, iter->flags & BTREE_ITER_intent);
|
||||
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify(trans, iter);
|
||||
return k;
|
||||
end:
|
||||
bch2_btree_iter_set_pos(iter, end);
|
||||
bch2_btree_iter_set_pos(trans, iter, end);
|
||||
k = bkey_s_c_null;
|
||||
goto out_no_locked;
|
||||
}
|
||||
@ -2725,37 +2713,43 @@ end:
|
||||
/**
|
||||
* bch2_btree_iter_prev() - returns first key less than iterator's current
|
||||
* position
|
||||
* @trans: btree transaction object
|
||||
* @iter: iterator to peek from
|
||||
*
|
||||
* Returns: key if found, or an error extractable with bkey_err().
|
||||
*/
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
if (!bch2_btree_iter_rewind(iter))
|
||||
if (!bch2_btree_iter_rewind(trans, iter))
|
||||
return bkey_s_c_null;
|
||||
|
||||
return bch2_btree_iter_peek_prev(iter);
|
||||
return bch2_btree_iter_peek_prev(trans, iter);
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct bpos search_key;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify(trans, iter);
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache));
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret)) {
|
||||
k = bkey_s_c_err(ret);
|
||||
goto out_no_locked;
|
||||
}
|
||||
|
||||
/* extents can't span inode numbers: */
|
||||
if ((iter->flags & BTREE_ITER_is_extents) &&
|
||||
unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
|
||||
if (iter->pos.inode == KEY_INODE_MAX)
|
||||
return bkey_s_c_null;
|
||||
|
||||
bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
|
||||
bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos));
|
||||
}
|
||||
|
||||
search_key = btree_iter_search_key(iter);
|
||||
@ -2789,7 +2783,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
goto out;
|
||||
|
||||
if (unlikely(iter->flags & BTREE_ITER_with_key_cache) &&
|
||||
(k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
|
||||
(k = btree_trans_peek_key_cache(trans, iter, iter->pos)).k) {
|
||||
if (!bkey_err(k))
|
||||
iter->k = *k.k;
|
||||
/* We're not returning a key from iter->path: */
|
||||
@ -2816,8 +2810,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
if (iter->flags & BTREE_ITER_intent) {
|
||||
struct btree_iter iter2;
|
||||
|
||||
bch2_trans_copy_iter(&iter2, iter);
|
||||
k = bch2_btree_iter_peek_max(&iter2, end);
|
||||
bch2_trans_copy_iter(trans, &iter2, iter);
|
||||
k = bch2_btree_iter_peek_max(trans, &iter2, end);
|
||||
|
||||
if (k.k && !bkey_err(k)) {
|
||||
swap(iter->key_cache_path, iter2.key_cache_path);
|
||||
@ -2828,9 +2822,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
} else {
|
||||
struct bpos pos = iter->pos;
|
||||
|
||||
k = bch2_btree_iter_peek_max(iter, end);
|
||||
k = bch2_btree_iter_peek_max(trans, iter, end);
|
||||
if (unlikely(bkey_err(k)))
|
||||
bch2_btree_iter_set_pos(iter, pos);
|
||||
bch2_btree_iter_set_pos(trans, iter, pos);
|
||||
else
|
||||
iter->pos = pos;
|
||||
}
|
||||
@ -2861,39 +2855,39 @@ out:
|
||||
btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter));
|
||||
out_no_locked:
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
bch2_btree_iter_verify(iter);
|
||||
ret = bch2_btree_iter_verify_ret(iter, k);
|
||||
bch2_btree_iter_verify(trans, iter);
|
||||
ret = bch2_btree_iter_verify_ret(trans, iter, k);
|
||||
if (unlikely(ret))
|
||||
return bkey_s_c_err(ret);
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
|
||||
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
if (!bch2_btree_iter_advance(iter))
|
||||
if (!bch2_btree_iter_advance(trans, iter))
|
||||
return bkey_s_c_null;
|
||||
|
||||
return bch2_btree_iter_peek_slot(iter);
|
||||
return bch2_btree_iter_peek_slot(trans, iter);
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
|
||||
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
if (!bch2_btree_iter_rewind(iter))
|
||||
if (!bch2_btree_iter_rewind(trans, iter))
|
||||
return bkey_s_c_null;
|
||||
|
||||
return bch2_btree_iter_peek_slot(iter);
|
||||
return bch2_btree_iter_peek_slot(trans, iter);
|
||||
}
|
||||
|
||||
/* Obsolete, but still used by rust wrapper in -tools */
|
||||
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter)
|
||||
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
struct bkey_s_c k;
|
||||
|
||||
while (btree_trans_too_many_iters(iter->trans) ||
|
||||
(k = bch2_btree_iter_peek_type(iter, iter->flags),
|
||||
while (btree_trans_too_many_iters(trans) ||
|
||||
(k = bch2_btree_iter_peek_type(trans, iter, iter->flags),
|
||||
bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
|
||||
bch2_trans_begin(iter->trans);
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
return k;
|
||||
}
|
||||
@ -3039,7 +3033,6 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
|
||||
iter->path = 0;
|
||||
iter->update_path = 0;
|
||||
iter->key_cache_path = 0;
|
||||
iter->trans = NULL;
|
||||
}
|
||||
|
||||
void bch2_trans_iter_init_outlined(struct btree_trans *trans,
|
||||
@ -3079,10 +3072,9 @@ void bch2_trans_node_iter_init(struct btree_trans *trans,
|
||||
BUG_ON(iter->min_depth != depth);
|
||||
}
|
||||
|
||||
void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
|
||||
void bch2_trans_copy_iter(struct btree_trans *trans,
|
||||
struct btree_iter *dst, struct btree_iter *src)
|
||||
{
|
||||
struct btree_trans *trans = src->trans;
|
||||
|
||||
*dst = *src;
|
||||
#ifdef TRACK_PATH_ALLOCATED
|
||||
dst->ip_allocated = _RET_IP_;
|
||||
@ -3094,7 +3086,19 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
|
||||
dst->key_cache_path = 0;
|
||||
}
|
||||
|
||||
void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void bch2_trans_kmalloc_trace_to_text(struct printbuf *out,
|
||||
darray_trans_kmalloc_trace *trace)
|
||||
{
|
||||
printbuf_tabstops_reset(out);
|
||||
printbuf_tabstop_push(out, 60);
|
||||
|
||||
darray_for_each(*trace, i)
|
||||
prt_printf(out, "%pS\t%zu\n", (void *) i->ip, i->bytes);
|
||||
}
|
||||
#endif
|
||||
|
||||
void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long ip)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
unsigned new_top = trans->mem_top + size;
|
||||
@ -3104,10 +3108,33 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
void *new_mem;
|
||||
void *p;
|
||||
|
||||
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
|
||||
if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) {
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace);
|
||||
bch2_print_str(c, KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
#endif
|
||||
}
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
struct btree_transaction_stats *s = btree_trans_stats(trans);
|
||||
s->max_mem = max(s->max_mem, new_bytes);
|
||||
if (new_bytes > s->max_mem) {
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr);
|
||||
s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size,
|
||||
trans->trans_kmalloc_trace.nr);
|
||||
|
||||
memcpy(s->trans_kmalloc_trace.data,
|
||||
trans->trans_kmalloc_trace.data,
|
||||
sizeof(s->trans_kmalloc_trace.data[0]) *
|
||||
s->trans_kmalloc_trace.nr);
|
||||
#endif
|
||||
s->max_mem = new_bytes;
|
||||
}
|
||||
|
||||
if (trans->used_mempool) {
|
||||
if (trans->mem_bytes >= new_bytes)
|
||||
@ -3163,9 +3190,12 @@ out_new_mem:
|
||||
|
||||
if (old_bytes) {
|
||||
trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
|
||||
return ERR_PTR(btree_trans_restart_ip(trans,
|
||||
BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
|
||||
}
|
||||
out_change_top:
|
||||
bch2_trans_kmalloc_trace(trans, size, ip);
|
||||
|
||||
p = trans->mem + trans->mem_top;
|
||||
trans->mem_top += size;
|
||||
memset(p, 0, size);
|
||||
@ -3271,6 +3301,18 @@ u32 bch2_trans_begin(struct btree_trans *trans)
|
||||
|
||||
trans->last_begin_ip = _RET_IP_;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
if (trans->restarted) {
|
||||
trans->restart_count_this_trans++;
|
||||
} else {
|
||||
trans->restart_count_this_trans = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans->trans_kmalloc_trace.nr = 0;
|
||||
#endif
|
||||
|
||||
trans_set_locked(trans, false);
|
||||
|
||||
if (trans->restarted) {
|
||||
@ -3439,6 +3481,7 @@ void bch2_trans_put(struct btree_trans *trans)
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_exit(&trans->last_restarted_trace);
|
||||
darray_exit(&trans->trans_kmalloc_trace);
|
||||
#endif
|
||||
|
||||
unsigned long *paths_allocated = trans->paths_allocated;
|
||||
@ -3594,6 +3637,9 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
|
||||
for (s = c->btree_transaction_stats;
|
||||
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
|
||||
s++) {
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_exit(&s->trans_kmalloc_trace);
|
||||
#endif
|
||||
kfree(s->max_paths_text);
|
||||
bch2_time_stats_exit(&s->lock_hold_times);
|
||||
}
|
||||
|
@ -9,7 +9,6 @@
|
||||
void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
|
||||
void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t);
|
||||
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
|
||||
void bch2_dump_trans_updates(struct btree_trans *);
|
||||
void bch2_dump_trans_paths_updates(struct btree_trans *);
|
||||
|
||||
static inline int __bkey_err(const struct bkey *k)
|
||||
@ -335,13 +334,20 @@ static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_tra
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip)
|
||||
static int btree_trans_restart_foreign_task(struct btree_trans *trans, int err, unsigned long ip)
|
||||
{
|
||||
BUG_ON(err <= 0);
|
||||
BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart));
|
||||
|
||||
trans->restarted = err;
|
||||
trans->last_restarted_ip = ip;
|
||||
return -err;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip)
|
||||
{
|
||||
btree_trans_restart_foreign_task(trans, err, ip);
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_exit(&trans->last_restarted_trace);
|
||||
bch2_save_backtrace(&trans->last_restarted_trace, current, 0, GFP_NOWAIT);
|
||||
@ -355,6 +361,18 @@ static int btree_trans_restart(struct btree_trans *trans, int err)
|
||||
return btree_trans_restart_ip(trans, err, _THIS_IP_);
|
||||
}
|
||||
|
||||
static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
if (!(ktime_get_ns() & ~(~0ULL << min(63, (10 + trans->restart_count_this_trans))))) {
|
||||
trace_and_count(trans->c, trans_restart_injected, trans, ip);
|
||||
return btree_trans_restart_ip(trans,
|
||||
BCH_ERR_transaction_restart_fault_inject, ip);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool bch2_btree_node_upgrade(struct btree_trans *,
|
||||
struct btree_path *, unsigned);
|
||||
|
||||
@ -375,36 +393,37 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct
|
||||
void bch2_trans_node_drop(struct btree_trans *trans, struct btree *);
|
||||
void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *);
|
||||
|
||||
int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter);
|
||||
int __must_check bch2_btree_iter_traverse(struct btree_iter *);
|
||||
int __must_check __bch2_btree_iter_traverse(struct btree_trans *, struct btree_iter *);
|
||||
int __must_check bch2_btree_iter_traverse(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
|
||||
struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *);
|
||||
struct btree *bch2_btree_iter_next_node(struct btree_iter *);
|
||||
struct btree *bch2_btree_iter_peek_node(struct btree_trans *, struct btree_iter *);
|
||||
struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_trans *, struct btree_iter *);
|
||||
struct btree *bch2_btree_iter_next_node(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *, struct bpos);
|
||||
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *, struct btree_iter *, struct bpos);
|
||||
struct bkey_s_c bch2_btree_iter_next(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_trans *trans,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
return bch2_btree_iter_peek_max(iter, SPOS_MAX);
|
||||
return bch2_btree_iter_peek_max(trans, iter, SPOS_MAX);
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *, struct bpos);
|
||||
struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *, struct btree_iter *, struct bpos);
|
||||
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
return bch2_btree_iter_peek_prev_min(iter, POS_MIN);
|
||||
return bch2_btree_iter_peek_prev_min(trans, iter, POS_MIN);
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *, struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_trans *, struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
bool bch2_btree_iter_advance(struct btree_iter *);
|
||||
bool bch2_btree_iter_rewind(struct btree_iter *);
|
||||
bool bch2_btree_iter_advance(struct btree_trans *, struct btree_iter *);
|
||||
bool bch2_btree_iter_rewind(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
|
||||
{
|
||||
@ -415,10 +434,9 @@ static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpo
|
||||
iter->k.size = 0;
|
||||
}
|
||||
|
||||
static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
|
||||
static inline void bch2_btree_iter_set_pos(struct btree_trans *trans,
|
||||
struct btree_iter *iter, struct bpos new_pos)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
|
||||
if (unlikely(iter->update_path))
|
||||
bch2_path_put(trans, iter->update_path,
|
||||
iter->flags & BTREE_ITER_intent);
|
||||
@ -436,13 +454,14 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it
|
||||
iter->pos = bkey_start_pos(&iter->k);
|
||||
}
|
||||
|
||||
static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot)
|
||||
static inline void bch2_btree_iter_set_snapshot(struct btree_trans *trans,
|
||||
struct btree_iter *iter, u32 snapshot)
|
||||
{
|
||||
struct bpos pos = iter->pos;
|
||||
|
||||
iter->snapshot = snapshot;
|
||||
pos.snapshot = snapshot;
|
||||
bch2_btree_iter_set_pos(iter, pos);
|
||||
bch2_btree_iter_set_pos(trans, iter, pos);
|
||||
}
|
||||
|
||||
void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
|
||||
@ -484,7 +503,6 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans,
|
||||
unsigned flags,
|
||||
unsigned long ip)
|
||||
{
|
||||
iter->trans = trans;
|
||||
iter->update_path = 0;
|
||||
iter->key_cache_path = 0;
|
||||
iter->btree_id = btree_id;
|
||||
@ -521,22 +539,50 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans,
|
||||
void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *,
|
||||
enum btree_id, struct bpos,
|
||||
unsigned, unsigned, unsigned);
|
||||
void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *);
|
||||
void bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *, struct btree_iter *);
|
||||
|
||||
void bch2_set_btree_iter_dontneed(struct btree_iter *);
|
||||
void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void bch2_trans_kmalloc_trace_to_text(struct printbuf *,
|
||||
darray_trans_kmalloc_trace *);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* bch2_trans_kmalloc - allocate memory for use by the current transaction
|
||||
*
|
||||
* Must be called after bch2_trans_begin, which on second and further calls
|
||||
* frees all memory allocated in this transaction
|
||||
*/
|
||||
static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
void *__bch2_trans_kmalloc(struct btree_trans *, size_t, unsigned long);
|
||||
|
||||
static inline void bch2_trans_kmalloc_trace(struct btree_trans *trans, size_t size,
|
||||
unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_push(&trans->trans_kmalloc_trace,
|
||||
((struct trans_kmalloc_trace) { .ip = ip, .bytes = size }));
|
||||
#endif
|
||||
}
|
||||
|
||||
static __always_inline void *bch2_trans_kmalloc_nomemzero_ip(struct btree_trans *trans, size_t size,
|
||||
unsigned long ip)
|
||||
{
|
||||
size = roundup(size, 8);
|
||||
|
||||
bch2_trans_kmalloc_trace(trans, size, ip);
|
||||
|
||||
if (likely(trans->mem_top + size <= trans->mem_bytes)) {
|
||||
void *p = trans->mem + trans->mem_top;
|
||||
|
||||
trans->mem_top += size;
|
||||
return p;
|
||||
} else {
|
||||
return __bch2_trans_kmalloc(trans, size, ip);
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline void *bch2_trans_kmalloc_ip(struct btree_trans *trans, size_t size,
|
||||
unsigned long ip)
|
||||
{
|
||||
size = roundup(size, 8);
|
||||
|
||||
bch2_trans_kmalloc_trace(trans, size, ip);
|
||||
|
||||
if (likely(trans->mem_top + size <= trans->mem_bytes)) {
|
||||
void *p = trans->mem + trans->mem_top;
|
||||
|
||||
@ -544,22 +590,24 @@ static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
memset(p, 0, size);
|
||||
return p;
|
||||
} else {
|
||||
return __bch2_trans_kmalloc(trans, size);
|
||||
return __bch2_trans_kmalloc(trans, size, ip);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size)
|
||||
/**
|
||||
* bch2_trans_kmalloc - allocate memory for use by the current transaction
|
||||
*
|
||||
* Must be called after bch2_trans_begin, which on second and further calls
|
||||
* frees all memory allocated in this transaction
|
||||
*/
|
||||
static __always_inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
{
|
||||
size = round_up(size, 8);
|
||||
return bch2_trans_kmalloc_ip(trans, size, _THIS_IP_);
|
||||
}
|
||||
|
||||
if (likely(trans->mem_top + size <= trans->mem_bytes)) {
|
||||
void *p = trans->mem + trans->mem_top;
|
||||
|
||||
trans->mem_top += size;
|
||||
return p;
|
||||
} else {
|
||||
return __bch2_trans_kmalloc(trans, size);
|
||||
}
|
||||
static __always_inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size)
|
||||
{
|
||||
return bch2_trans_kmalloc_nomemzero_ip(trans, size, _THIS_IP_);
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans,
|
||||
@ -570,7 +618,7 @@ static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans,
|
||||
struct bkey_s_c k;
|
||||
|
||||
bch2_trans_iter_init(trans, iter, btree_id, pos, flags);
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
k = bch2_btree_iter_peek_slot(trans, iter);
|
||||
|
||||
if (!bkey_err(k) && type && k.k->type != type)
|
||||
k = bkey_s_c_err(-BCH_ERR_ENOENT_bkey_type_mismatch);
|
||||
@ -640,14 +688,14 @@ u32 bch2_trans_begin(struct btree_trans *);
|
||||
int _ret3 = 0; \
|
||||
do { \
|
||||
_ret3 = lockrestart_do((_trans), ({ \
|
||||
struct btree *_b = bch2_btree_iter_peek_node(&_iter); \
|
||||
struct btree *_b = bch2_btree_iter_peek_node(_trans, &_iter);\
|
||||
if (!_b) \
|
||||
break; \
|
||||
\
|
||||
PTR_ERR_OR_ZERO(_b) ?: (_do); \
|
||||
})) ?: \
|
||||
lockrestart_do((_trans), \
|
||||
PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \
|
||||
PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(_trans, &_iter)));\
|
||||
} while (!_ret3); \
|
||||
\
|
||||
bch2_trans_iter_exit((_trans), &(_iter)); \
|
||||
@ -659,31 +707,34 @@ u32 bch2_trans_begin(struct btree_trans *);
|
||||
__for_each_btree_node(_trans, _iter, _btree_id, _start, \
|
||||
0, 0, _flags, _b, _do)
|
||||
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter,
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
unsigned flags)
|
||||
{
|
||||
return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(iter) :
|
||||
bch2_btree_iter_peek_prev(iter);
|
||||
return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(trans, iter) :
|
||||
bch2_btree_iter_peek_prev(trans, iter);
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
unsigned flags)
|
||||
{
|
||||
return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(iter) :
|
||||
bch2_btree_iter_peek(iter);
|
||||
return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(trans, iter) :
|
||||
bch2_btree_iter_peek(trans, iter);
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_iter *iter,
|
||||
static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos end,
|
||||
unsigned flags)
|
||||
{
|
||||
if (!(flags & BTREE_ITER_slots))
|
||||
return bch2_btree_iter_peek_max(iter, end);
|
||||
return bch2_btree_iter_peek_max(trans, iter, end);
|
||||
|
||||
if (bkey_gt(iter->pos, end))
|
||||
return bkey_s_c_null;
|
||||
|
||||
return bch2_btree_iter_peek_slot(iter);
|
||||
return bch2_btree_iter_peek_slot(trans, iter);
|
||||
}
|
||||
|
||||
int __bch2_btree_trans_too_many_iters(struct btree_trans *);
|
||||
@ -739,7 +790,7 @@ transaction_restart: \
|
||||
if (!_ret2) \
|
||||
bch2_trans_verify_not_restarted(_trans, _restart_count);\
|
||||
\
|
||||
_ret2 ?: trans_was_restarted(_trans, _restart_count); \
|
||||
_ret2 ?: trans_was_restarted(_trans, _orig_restart_count); \
|
||||
})
|
||||
|
||||
#define for_each_btree_key_max_continue(_trans, _iter, \
|
||||
@ -750,14 +801,14 @@ transaction_restart: \
|
||||
\
|
||||
do { \
|
||||
_ret3 = lockrestart_do(_trans, ({ \
|
||||
(_k) = bch2_btree_iter_peek_max_type(&(_iter), \
|
||||
(_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), \
|
||||
_end, (_flags)); \
|
||||
if (!(_k).k) \
|
||||
break; \
|
||||
\
|
||||
bkey_err(_k) ?: (_do); \
|
||||
})); \
|
||||
} while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \
|
||||
} while (!_ret3 && bch2_btree_iter_advance(_trans, &(_iter))); \
|
||||
\
|
||||
bch2_trans_iter_exit((_trans), &(_iter)); \
|
||||
_ret3; \
|
||||
@ -795,14 +846,14 @@ transaction_restart: \
|
||||
\
|
||||
do { \
|
||||
_ret3 = lockrestart_do(_trans, ({ \
|
||||
(_k) = bch2_btree_iter_peek_prev_type(&(_iter), \
|
||||
(_k) = bch2_btree_iter_peek_prev_type(_trans, &(_iter), \
|
||||
(_flags)); \
|
||||
if (!(_k).k) \
|
||||
break; \
|
||||
\
|
||||
bkey_err(_k) ?: (_do); \
|
||||
})); \
|
||||
} while (!_ret3 && bch2_btree_iter_rewind(&(_iter))); \
|
||||
} while (!_ret3 && bch2_btree_iter_rewind(_trans, &(_iter))); \
|
||||
\
|
||||
bch2_trans_iter_exit((_trans), &(_iter)); \
|
||||
_ret3; \
|
||||
@ -832,21 +883,22 @@ transaction_restart: \
|
||||
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
|
||||
(_journal_seq), (_commit_flags)))
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *,
|
||||
struct btree_iter *);
|
||||
|
||||
#define for_each_btree_key_max_norestart(_trans, _iter, _btree_id, \
|
||||
_start, _end, _flags, _k, _ret) \
|
||||
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
|
||||
(_start), (_flags)); \
|
||||
(_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags),\
|
||||
(_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), _end, _flags),\
|
||||
!((_ret) = bkey_err(_k)) && (_k).k; \
|
||||
bch2_btree_iter_advance(&(_iter)))
|
||||
bch2_btree_iter_advance(_trans, &(_iter)))
|
||||
|
||||
#define for_each_btree_key_max_continue_norestart(_iter, _end, _flags, _k, _ret)\
|
||||
#define for_each_btree_key_max_continue_norestart(_trans, _iter, _end, _flags, _k, _ret)\
|
||||
for (; \
|
||||
(_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags), \
|
||||
(_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), _end, _flags), \
|
||||
!((_ret) = bkey_err(_k)) && (_k).k; \
|
||||
bch2_btree_iter_advance(&(_iter)))
|
||||
bch2_btree_iter_advance(_trans, &(_iter)))
|
||||
|
||||
#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \
|
||||
_start, _flags, _k, _ret) \
|
||||
@ -857,12 +909,12 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
|
||||
_start, _flags, _k, _ret) \
|
||||
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
|
||||
(_start), (_flags)); \
|
||||
(_k) = bch2_btree_iter_peek_prev_type(&(_iter), _flags), \
|
||||
(_k) = bch2_btree_iter_peek_prev_type(_trans, &(_iter), _flags), \
|
||||
!((_ret) = bkey_err(_k)) && (_k).k; \
|
||||
bch2_btree_iter_rewind(&(_iter)))
|
||||
bch2_btree_iter_rewind(_trans, &(_iter)))
|
||||
|
||||
#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \
|
||||
for_each_btree_key_max_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret)
|
||||
#define for_each_btree_key_continue_norestart(_trans, _iter, _flags, _k, _ret) \
|
||||
for_each_btree_key_max_continue_norestart(_trans, _iter, SPOS_MAX, _flags, _k, _ret)
|
||||
|
||||
/*
|
||||
* This should not be used in a fastpath, without first trying _do in
|
||||
|
@ -687,7 +687,8 @@ void bch2_journal_keys_put(struct bch_fs *c)
|
||||
|
||||
static void __journal_keys_sort(struct journal_keys *keys)
|
||||
{
|
||||
sort(keys->data, keys->nr, sizeof(keys->data[0]), journal_sort_key_cmp, NULL);
|
||||
sort_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]),
|
||||
journal_sort_key_cmp, NULL);
|
||||
|
||||
cond_resched();
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "trace.h"
|
||||
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/seq_buf.h>
|
||||
|
||||
static inline bool btree_uses_pcpu_readers(enum btree_id id)
|
||||
{
|
||||
@ -156,7 +157,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
|
||||
}
|
||||
|
||||
if (ck) {
|
||||
bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0);
|
||||
bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL);
|
||||
ck->c.cached = true;
|
||||
goto lock;
|
||||
}
|
||||
@ -287,6 +288,19 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans,
|
||||
struct btree_path *ck_path,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bpos_to_text(&buf, ck_path->pos);
|
||||
prt_char(&buf, ' ');
|
||||
bch2_bkey_val_to_text(&buf, trans->c, k);
|
||||
trace_key_cache_fill(trans, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
static noinline int btree_key_cache_fill(struct btree_trans *trans,
|
||||
struct btree_path *ck_path,
|
||||
unsigned flags)
|
||||
@ -306,7 +320,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
|
||||
BTREE_ITER_key_cache_fill|
|
||||
BTREE_ITER_cached_nofill);
|
||||
iter.flags &= ~BTREE_ITER_with_journal;
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
k = bch2_btree_iter_peek_slot(trans, &iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -320,18 +334,11 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (trace_key_cache_fill_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bpos_to_text(&buf, ck_path->pos);
|
||||
prt_char(&buf, ' ');
|
||||
bch2_bkey_val_to_text(&buf, trans->c, k);
|
||||
trace_key_cache_fill(trans, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
if (trace_key_cache_fill_enabled())
|
||||
do_trace_key_cache_fill(trans, ck_path, k);
|
||||
out:
|
||||
/* We're not likely to need this iterator again: */
|
||||
bch2_set_btree_iter_dontneed(&iter);
|
||||
bch2_set_btree_iter_dontneed(trans, &iter);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
@ -412,7 +419,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
BTREE_ITER_intent);
|
||||
b_iter.flags &= ~BTREE_ITER_with_key_cache;
|
||||
|
||||
ret = bch2_btree_iter_traverse(&c_iter);
|
||||
ret = bch2_btree_iter_traverse(trans, &c_iter);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -444,7 +451,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
!test_bit(JOURNAL_space_low, &c->journal.flags))
|
||||
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
|
||||
|
||||
struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(&b_iter);
|
||||
struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(trans, &b_iter);
|
||||
ret = bkey_err(btree_k);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -784,6 +791,18 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
|
||||
{
|
||||
}
|
||||
|
||||
static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
|
||||
{
|
||||
struct bch_fs *c = shrink->private_data;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
char *cbuf;
|
||||
size_t buflen = seq_buf_get_buf(s, &cbuf);
|
||||
struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
|
||||
|
||||
bch2_btree_key_cache_to_text(&out, bc);
|
||||
seq_buf_commit(s, out.pos);
|
||||
}
|
||||
|
||||
int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
|
||||
{
|
||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
||||
@ -808,6 +827,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
|
||||
bc->shrink = shrink;
|
||||
shrink->count_objects = bch2_btree_key_cache_count;
|
||||
shrink->scan_objects = bch2_btree_key_cache_scan;
|
||||
shrink->to_text = bch2_btree_key_cache_shrinker_to_text;
|
||||
shrink->batch = 1 << 14;
|
||||
shrink->seeks = 0;
|
||||
shrink->private_data = c;
|
||||
|
@ -7,9 +7,10 @@
|
||||
static struct lock_class_key bch2_btree_node_lock_key;
|
||||
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *b,
|
||||
enum six_lock_init_flags flags)
|
||||
enum six_lock_init_flags flags,
|
||||
gfp_t gfp)
|
||||
{
|
||||
__six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags);
|
||||
__six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags, gfp);
|
||||
lockdep_set_notrack_class(&b->lock);
|
||||
}
|
||||
|
||||
@ -90,10 +91,10 @@ static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
for (i = g->g; i != g->g + g->nr; i++) {
|
||||
struct task_struct *task = i->trans->locking_wait.task;
|
||||
struct task_struct *task = READ_ONCE(i->trans->locking_wait.task);
|
||||
if (i != g->g)
|
||||
prt_str(out, "<- ");
|
||||
prt_printf(out, "%u ", task ?task->pid : 0);
|
||||
prt_printf(out, "%u ", task ? task->pid : 0);
|
||||
}
|
||||
prt_newline(out);
|
||||
}
|
||||
@ -171,7 +172,9 @@ static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
|
||||
{
|
||||
if (i == g->g) {
|
||||
trace_would_deadlock(g, i->trans);
|
||||
return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
|
||||
return btree_trans_restart_foreign_task(i->trans,
|
||||
BCH_ERR_transaction_restart_would_deadlock,
|
||||
_THIS_IP_);
|
||||
} else {
|
||||
i->trans->lock_must_abort = true;
|
||||
wake_up_process(i->trans->locking_wait.task);
|
||||
@ -233,7 +236,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle,
|
||||
prt_newline(&buf);
|
||||
}
|
||||
|
||||
bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf);
|
||||
bch2_print_str_nonblocking(g->g->trans->c, KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
BUG();
|
||||
}
|
||||
@ -796,13 +799,6 @@ int bch2_trans_relock_notrace(struct btree_trans *trans)
|
||||
return __bch2_trans_relock(trans, false);
|
||||
}
|
||||
|
||||
void bch2_trans_unlock_noassert(struct btree_trans *trans)
|
||||
{
|
||||
__bch2_trans_unlock(trans);
|
||||
|
||||
trans_set_unlocked(trans);
|
||||
}
|
||||
|
||||
void bch2_trans_unlock(struct btree_trans *trans)
|
||||
{
|
||||
__bch2_trans_unlock(trans);
|
||||
|
@ -13,9 +13,8 @@
|
||||
#include "btree_iter.h"
|
||||
#include "six.h"
|
||||
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags);
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags, gfp_t gfp);
|
||||
|
||||
void bch2_trans_unlock_noassert(struct btree_trans *);
|
||||
void bch2_trans_unlock_write(struct btree_trans *);
|
||||
|
||||
static inline bool is_btree_node(struct btree_path *path, unsigned l)
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/min_heap.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
struct find_btree_nodes_worker {
|
||||
@ -166,17 +167,23 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
|
||||
bio->bi_iter.bi_sector = offset;
|
||||
bch2_bio_map(bio, bn, PAGE_SIZE);
|
||||
|
||||
u64 submit_time = local_clock();
|
||||
submit_bio_wait(bio);
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
|
||||
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
|
||||
|
||||
if (bio->bi_status) {
|
||||
bch_err_dev_ratelimited(ca,
|
||||
"IO error in try_read_btree_node() at %llu: %s",
|
||||
offset, bch2_blk_status_to_str(bio->bi_status)))
|
||||
offset, bch2_blk_status_to_str(bio->bi_status));
|
||||
return;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(bn->magic) != bset_magic(c))
|
||||
return;
|
||||
|
||||
if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
|
||||
if (!c->chacha20)
|
||||
if (!c->chacha20_key_set)
|
||||
return;
|
||||
|
||||
struct nonce nonce = btree_nonce(&bn->keys, 0);
|
||||
@ -264,7 +271,7 @@ static int read_btree_nodes_worker(void *p)
|
||||
err:
|
||||
bio_put(bio);
|
||||
free_page((unsigned long) buf);
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
closure_put(w->cl);
|
||||
kfree(w);
|
||||
return 0;
|
||||
@ -283,32 +290,32 @@ static int read_btree_nodes(struct find_btree_nodes *f)
|
||||
continue;
|
||||
|
||||
struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
|
||||
struct task_struct *t;
|
||||
|
||||
if (!w) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
closure_get(&cl);
|
||||
w->cl = &cl;
|
||||
w->f = f;
|
||||
w->ca = ca;
|
||||
|
||||
t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
|
||||
struct task_struct *t = kthread_create(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
|
||||
ret = PTR_ERR_OR_ZERO(t);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
closure_put(&cl);
|
||||
f->ret = ret;
|
||||
bch_err(c, "error starting kthread: %i", ret);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
kfree(w);
|
||||
bch_err_msg(c, ret, "starting kthread");
|
||||
break;
|
||||
}
|
||||
|
||||
closure_get(&cl);
|
||||
percpu_ref_get(&ca->io_ref[READ]);
|
||||
wake_up_process(t);
|
||||
}
|
||||
err:
|
||||
closure_sync(&cl);
|
||||
while (closure_sync_timeout(&cl, sysctl_hung_task_timeout_secs * HZ / 2))
|
||||
;
|
||||
return f->ret ?: ret;
|
||||
}
|
||||
|
||||
@ -388,10 +395,10 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "%s: nodes found:\n", __func__);
|
||||
found_btree_nodes_to_text(&buf, c, f->nodes);
|
||||
bch2_print_string_as_lines(KERN_INFO, buf.buf);
|
||||
bch2_print_str(c, KERN_INFO, buf.buf);
|
||||
}
|
||||
|
||||
sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
|
||||
sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
|
||||
|
||||
dst = 0;
|
||||
darray_for_each(f->nodes, i) {
|
||||
@ -411,13 +418,13 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
|
||||
}
|
||||
f->nodes.nr = dst;
|
||||
|
||||
sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
|
||||
sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
|
||||
|
||||
if (0 && c->opts.verbose) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__);
|
||||
found_btree_nodes_to_text(&buf, c, f->nodes);
|
||||
bch2_print_string_as_lines(KERN_INFO, buf.buf);
|
||||
bch2_print_str(c, KERN_INFO, buf.buf);
|
||||
}
|
||||
|
||||
swap(nodes_heap, f->nodes);
|
||||
@ -463,7 +470,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
|
||||
found_btree_nodes_to_text(&buf, c, f->nodes);
|
||||
bch2_print_string_as_lines(KERN_INFO, buf.buf);
|
||||
bch2_print_str(c, KERN_INFO, buf.buf);
|
||||
} else {
|
||||
bch_info(c, "btree node scan found %zu nodes after overwrites", f->nodes.nr);
|
||||
}
|
||||
@ -572,10 +579,12 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
|
||||
|
||||
found_btree_node_to_key(&tmp.k, &n);
|
||||
|
||||
if (c->opts.verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
|
||||
bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k),
|
||||
(struct bkey_validate_context) {
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "snapshot.h"
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/string_helpers.h>
|
||||
|
||||
static const char * const trans_commit_flags_strs[] = {
|
||||
#define x(n, ...) #n,
|
||||
@ -164,6 +165,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
|
||||
EBUG_ON(bpos_gt(insert->k.p, b->data->max_key));
|
||||
EBUG_ON(insert->k.u64s > bch2_btree_keys_u64s_remaining(b));
|
||||
EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos));
|
||||
kmsan_check_memory(insert, bkey_bytes(&insert->k));
|
||||
|
||||
k = bch2_btree_node_iter_peek_all(node_iter, b);
|
||||
if (k && bkey_cmp_left_packed(b, k, &insert->k.p))
|
||||
@ -336,6 +338,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
|
||||
BUG_ON(i->cached != path->cached);
|
||||
BUG_ON(i->level != path->level);
|
||||
BUG_ON(i->btree_id != path->btree_id);
|
||||
BUG_ON(i->bkey_type != __btree_node_type(path->level, path->btree_id));
|
||||
EBUG_ON(!i->level &&
|
||||
btree_type_has_snapshots(i->btree_id) &&
|
||||
!(i->flags & BTREE_UPDATE_internal_snapshot_node) &&
|
||||
@ -364,7 +367,8 @@ static noinline void journal_transaction_name(struct btree_trans *trans)
|
||||
struct jset_entry_log *l =
|
||||
container_of(entry, struct jset_entry_log, entry);
|
||||
|
||||
strncpy(l->d, trans->fn, JSET_ENTRY_LOG_U64s * sizeof(u64));
|
||||
memcpy_and_pad(l->d, JSET_ENTRY_LOG_U64s * sizeof(u64),
|
||||
trans->fn, strlen(trans->fn), 0);
|
||||
}
|
||||
|
||||
static inline int btree_key_can_insert(struct btree_trans *trans,
|
||||
@ -517,23 +521,31 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
|
||||
}
|
||||
}
|
||||
|
||||
static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
|
||||
unsigned *btree_id_updates_start)
|
||||
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
|
||||
{
|
||||
unsigned sort_id_start = 0;
|
||||
|
||||
while (sort_id_start < trans->nr_updates) {
|
||||
unsigned i, sort_id = trans->updates[sort_id_start].sort_order;
|
||||
bool trans_trigger_run;
|
||||
|
||||
/*
|
||||
* Running triggers will append more updates to the list of updates as
|
||||
* we're walking it:
|
||||
* For a given btree, this algorithm runs insert triggers before
|
||||
* overwrite triggers: this is so that when extents are being
|
||||
* moved (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop
|
||||
* references before they are re-added.
|
||||
*
|
||||
* Running triggers will append more updates to the list of
|
||||
* updates as we're walking it:
|
||||
*/
|
||||
do {
|
||||
trans_trigger_run = false;
|
||||
|
||||
for (unsigned i = *btree_id_updates_start;
|
||||
i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
|
||||
for (i = sort_id_start;
|
||||
i < trans->nr_updates && trans->updates[i].sort_order <= sort_id;
|
||||
i++) {
|
||||
if (trans->updates[i].btree_id < btree_id) {
|
||||
*btree_id_updates_start = i;
|
||||
if (trans->updates[i].sort_order < sort_id) {
|
||||
sort_id_start = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -545,41 +557,9 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
|
||||
}
|
||||
} while (trans_trigger_run);
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
BUG_ON(!(i->flags & BTREE_TRIGGER_norun) &&
|
||||
i->btree_id == btree_id &&
|
||||
btree_node_type_has_trans_triggers(i->bkey_type) &&
|
||||
(!i->insert_trigger_run || !i->overwrite_trigger_run));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
|
||||
{
|
||||
unsigned btree_id = 0, btree_id_updates_start = 0;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
*
|
||||
* For a given btree, this algorithm runs insert triggers before
|
||||
* overwrite triggers: this is so that when extents are being moved
|
||||
* (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before
|
||||
* they are re-added.
|
||||
*/
|
||||
for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
|
||||
if (btree_id == BTREE_ID_alloc)
|
||||
continue;
|
||||
|
||||
ret = run_btree_triggers(trans, btree_id, &btree_id_updates_start);
|
||||
if (ret)
|
||||
return ret;
|
||||
sort_id_start = i;
|
||||
}
|
||||
|
||||
btree_id_updates_start = 0;
|
||||
ret = run_btree_triggers(trans, BTREE_ID_alloc, &btree_id_updates_start);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans_for_each_update(trans, i)
|
||||
BUG_ON(!(i->flags & BTREE_TRIGGER_norun) &&
|
||||
@ -903,6 +883,24 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
struct bch_fs *c = trans->c;
|
||||
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_journal_res_blocked)) {
|
||||
/*
|
||||
* XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
|
||||
* flag
|
||||
*/
|
||||
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
|
||||
watermark < BCH_WATERMARK_reclaim) {
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_trans_journal_res_get(trans,
|
||||
(flags & BCH_WATERMARK_MASK)|
|
||||
JOURNAL_RES_GET_CHECK));
|
||||
goto out;
|
||||
}
|
||||
|
||||
switch (ret) {
|
||||
case -BCH_ERR_btree_insert_btree_node_full:
|
||||
ret = bch2_btree_split_leaf(trans, i->path, flags);
|
||||
@ -914,22 +912,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_accounting_update_sb(trans));
|
||||
break;
|
||||
case -BCH_ERR_journal_res_get_blocked:
|
||||
/*
|
||||
* XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
|
||||
* flag
|
||||
*/
|
||||
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
|
||||
watermark < BCH_WATERMARK_reclaim) {
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_trans_journal_res_get(trans,
|
||||
(flags & BCH_WATERMARK_MASK)|
|
||||
JOURNAL_RES_GET_CHECK));
|
||||
break;
|
||||
case -BCH_ERR_btree_insert_need_journal_reclaim:
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
@ -950,7 +932,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
BUG_ON(ret >= 0);
|
||||
break;
|
||||
}
|
||||
|
||||
out:
|
||||
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
|
||||
|
||||
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) &&
|
||||
@ -999,6 +981,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret))
|
||||
goto out_reset;
|
||||
|
||||
if (!trans->nr_updates &&
|
||||
!trans->journal_entries_u64s)
|
||||
goto out_reset;
|
||||
|
@ -139,6 +139,7 @@ struct btree {
|
||||
};
|
||||
|
||||
#define BCH_BTREE_CACHE_NOT_FREED_REASONS() \
|
||||
x(cache_reserve) \
|
||||
x(lock_intent) \
|
||||
x(lock_write) \
|
||||
x(dirty) \
|
||||
@ -367,7 +368,6 @@ static inline unsigned long btree_path_ip_allocated(struct btree_path *path)
|
||||
* @nodes_intent_locked - bitmask indicating which locks are intent locks
|
||||
*/
|
||||
struct btree_iter {
|
||||
struct btree_trans *trans;
|
||||
btree_path_idx_t path;
|
||||
btree_path_idx_t update_path;
|
||||
btree_path_idx_t key_cache_path;
|
||||
@ -423,6 +423,7 @@ static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
|
||||
|
||||
struct btree_insert_entry {
|
||||
unsigned flags;
|
||||
u8 sort_order;
|
||||
u8 bkey_type;
|
||||
enum btree_id btree_id:8;
|
||||
u8 level:4;
|
||||
@ -477,6 +478,12 @@ struct btree_trans_paths {
|
||||
struct btree_path paths[];
|
||||
};
|
||||
|
||||
struct trans_kmalloc_trace {
|
||||
unsigned long ip;
|
||||
size_t bytes;
|
||||
};
|
||||
typedef DARRAY(struct trans_kmalloc_trace) darray_trans_kmalloc_trace;
|
||||
|
||||
struct btree_trans {
|
||||
struct bch_fs *c;
|
||||
|
||||
@ -488,6 +495,9 @@ struct btree_trans {
|
||||
void *mem;
|
||||
unsigned mem_top;
|
||||
unsigned mem_bytes;
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_trans_kmalloc_trace trans_kmalloc_trace;
|
||||
#endif
|
||||
|
||||
btree_path_idx_t nr_sorted;
|
||||
btree_path_idx_t nr_paths;
|
||||
@ -509,6 +519,9 @@ struct btree_trans {
|
||||
bool notrace_relock_fail:1;
|
||||
enum bch_errcode restarted:16;
|
||||
u32 restart_count;
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
u32 restart_count_this_trans;
|
||||
#endif
|
||||
|
||||
u64 last_begin_time;
|
||||
unsigned long last_begin_ip;
|
||||
@ -644,13 +657,13 @@ static inline struct bset_tree *bset_tree_last(struct btree *b)
|
||||
static inline void *
|
||||
__btree_node_offset_to_ptr(const struct btree *b, u16 offset)
|
||||
{
|
||||
return (void *) ((u64 *) b->data + 1 + offset);
|
||||
return (void *) ((u64 *) b->data + offset);
|
||||
}
|
||||
|
||||
static inline u16
|
||||
__btree_node_ptr_to_offset(const struct btree *b, const void *p)
|
||||
{
|
||||
u16 ret = (u64 *) p - 1 - (u64 *) b->data;
|
||||
u16 ret = (u64 *) p - (u64 *) b->data;
|
||||
|
||||
EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p);
|
||||
return ret;
|
||||
@ -850,6 +863,18 @@ static inline bool btree_type_uses_write_buffer(enum btree_id btree)
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
static inline u8 btree_trigger_order(enum btree_id btree)
|
||||
{
|
||||
switch (btree) {
|
||||
case BTREE_ID_alloc:
|
||||
return U8_MAX;
|
||||
case BTREE_ID_stripes:
|
||||
return U8_MAX - 1;
|
||||
default:
|
||||
return btree;
|
||||
}
|
||||
}
|
||||
|
||||
struct btree_root {
|
||||
struct btree *b;
|
||||
|
||||
|
@ -14,10 +14,12 @@
|
||||
#include "snapshot.h"
|
||||
#include "trace.h"
|
||||
|
||||
#include <linux/string_helpers.h>
|
||||
|
||||
static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
|
||||
const struct btree_insert_entry *r)
|
||||
{
|
||||
return cmp_int(l->btree_id, r->btree_id) ?:
|
||||
return cmp_int(l->sort_order, r->sort_order) ?:
|
||||
cmp_int(l->cached, r->cached) ?:
|
||||
-cmp_int(l->level, r->level) ?:
|
||||
bpos_cmp(l->k->k.p, r->k->k.p);
|
||||
@ -126,7 +128,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
|
||||
struct bpos new_pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter old_iter, new_iter = { NULL };
|
||||
struct btree_iter old_iter, new_iter = {};
|
||||
struct bkey_s_c old_k, new_k;
|
||||
snapshot_id_list s;
|
||||
struct bkey_i *update;
|
||||
@ -140,7 +142,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
|
||||
bch2_trans_iter_init(trans, &old_iter, id, old_pos,
|
||||
BTREE_ITER_not_extents|
|
||||
BTREE_ITER_all_snapshots);
|
||||
while ((old_k = bch2_btree_iter_prev(&old_iter)).k &&
|
||||
while ((old_k = bch2_btree_iter_prev(trans, &old_iter)).k &&
|
||||
!(ret = bkey_err(old_k)) &&
|
||||
bkey_eq(old_pos, old_k.k->p)) {
|
||||
struct bpos whiteout_pos =
|
||||
@ -296,7 +298,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
BTREE_ITER_intent|
|
||||
BTREE_ITER_with_updates|
|
||||
BTREE_ITER_not_extents);
|
||||
k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX));
|
||||
k = bch2_btree_iter_peek_max(trans, &iter, POS(insert->k.p.inode, U64_MAX));
|
||||
if ((ret = bkey_err(k)))
|
||||
goto err;
|
||||
if (!k.k)
|
||||
@ -322,8 +324,8 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
if (done)
|
||||
goto out;
|
||||
next:
|
||||
bch2_btree_iter_advance(&iter);
|
||||
k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX));
|
||||
bch2_btree_iter_advance(trans, &iter);
|
||||
k = bch2_btree_iter_peek_max(trans, &iter, POS(insert->k.p.inode, U64_MAX));
|
||||
if ((ret = bkey_err(k)))
|
||||
goto err;
|
||||
if (!k.k)
|
||||
@ -397,6 +399,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx,
|
||||
|
||||
n = (struct btree_insert_entry) {
|
||||
.flags = flags,
|
||||
.sort_order = btree_trigger_order(path->btree_id),
|
||||
.bkey_type = __btree_node_type(path->level, path->btree_id),
|
||||
.btree_id = path->btree_id,
|
||||
.level = path->level,
|
||||
@ -511,6 +514,8 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
|
||||
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
|
||||
{
|
||||
kmsan_check_memory(k, bkey_bytes(&k->k));
|
||||
|
||||
btree_path_idx_t path_idx = iter->update_path ?: iter->path;
|
||||
int ret;
|
||||
|
||||
@ -589,13 +594,13 @@ int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
|
||||
enum btree_id btree, struct bpos end)
|
||||
{
|
||||
bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent);
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_prev(iter);
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_prev(trans, iter);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_btree_iter_advance(iter);
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
bch2_btree_iter_advance(trans, iter);
|
||||
k = bch2_btree_iter_peek_slot(trans, iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -631,7 +636,7 @@ int bch2_btree_insert_nonextent(struct btree_trans *trans,
|
||||
BTREE_ITER_cached|
|
||||
BTREE_ITER_not_extents|
|
||||
BTREE_ITER_intent);
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
ret = bch2_btree_iter_traverse(trans, &iter) ?:
|
||||
bch2_trans_update(trans, &iter, k, flags);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
@ -643,7 +648,7 @@ int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id,
|
||||
struct btree_iter iter;
|
||||
bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k),
|
||||
BTREE_ITER_intent|flags);
|
||||
int ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
int ret = bch2_btree_iter_traverse(trans, &iter) ?:
|
||||
bch2_trans_update(trans, &iter, k, flags);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
@ -692,7 +697,7 @@ int bch2_btree_delete(struct btree_trans *trans,
|
||||
bch2_trans_iter_init(trans, &iter, btree, pos,
|
||||
BTREE_ITER_cached|
|
||||
BTREE_ITER_intent);
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
ret = bch2_btree_iter_traverse(trans, &iter) ?:
|
||||
bch2_btree_delete_at(trans, &iter, update_flags);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
@ -710,7 +715,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent);
|
||||
while ((k = bch2_btree_iter_peek_max(&iter, end)).k) {
|
||||
while ((k = bch2_btree_iter_peek_max(trans, &iter, end)).k) {
|
||||
struct disk_reservation disk_res =
|
||||
bch2_disk_reservation_init(trans->c, 0);
|
||||
struct bkey_i delete;
|
||||
@ -805,7 +810,7 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
|
||||
struct btree_iter iter;
|
||||
bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent);
|
||||
|
||||
int ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
int ret = bch2_btree_iter_traverse(trans, &iter) ?:
|
||||
bch2_btree_bit_mod_iter(trans, &iter, set);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
@ -826,7 +831,6 @@ int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree,
|
||||
int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf)
|
||||
{
|
||||
unsigned u64s = DIV_ROUND_UP(buf->pos, sizeof(u64));
|
||||
prt_chars(buf, '\0', u64s * sizeof(u64) - buf->pos);
|
||||
|
||||
int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
|
||||
if (ret)
|
||||
@ -839,7 +843,20 @@ int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf)
|
||||
|
||||
struct jset_entry_log *l = container_of(e, struct jset_entry_log, entry);
|
||||
journal_entry_init(e, BCH_JSET_ENTRY_log, 0, 1, u64s);
|
||||
memcpy(l->d, buf->buf, buf->pos);
|
||||
memcpy_and_pad(l->d, u64s * sizeof(u64), buf->buf, buf->pos, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_trans_log_bkey(struct btree_trans *trans, enum btree_id btree,
|
||||
unsigned level, struct bkey_i *k)
|
||||
{
|
||||
struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s));
|
||||
int ret = PTR_ERR_OR_ZERO(e);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
journal_entry_init(e, BCH_JSET_ENTRY_log_bkey, btree, level, k->k.u64s);
|
||||
bkey_copy(e->start, k);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -852,7 +869,6 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
|
||||
prt_vprintf(&buf, fmt, args);
|
||||
|
||||
unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64));
|
||||
prt_chars(&buf, '\0', u64s * sizeof(u64) - buf.pos);
|
||||
|
||||
int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
|
||||
if (ret)
|
||||
@ -865,7 +881,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
|
||||
|
||||
struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries);
|
||||
journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s);
|
||||
memcpy(l->d, buf.buf, buf.pos);
|
||||
memcpy_and_pad(l->d, u64s * sizeof(u64), buf.buf, buf.pos, 0);
|
||||
c->journal.early_journal_entries.nr += jset_u64s(u64s);
|
||||
} else {
|
||||
ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags,
|
||||
|
@ -126,10 +126,20 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
|
||||
|
||||
int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);
|
||||
|
||||
int bch2_btree_write_buffer_insert_err(struct btree_trans *,
|
||||
enum btree_id, struct bkey_i *);
|
||||
|
||||
static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
|
||||
enum btree_id btree,
|
||||
struct bkey_i *k)
|
||||
{
|
||||
kmsan_check_memory(k, bkey_bytes(&k->k));
|
||||
|
||||
if (unlikely(!btree_type_uses_write_buffer(btree))) {
|
||||
int ret = bch2_btree_write_buffer_insert_err(trans, btree, k);
|
||||
dump_stack();
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* Most updates skip the btree write buffer until journal replay is
|
||||
* finished because synchronization with journal replay relies on having
|
||||
@ -160,6 +170,8 @@ void bch2_trans_commit_hook(struct btree_trans *,
|
||||
int __bch2_trans_commit(struct btree_trans *, unsigned);
|
||||
|
||||
int bch2_trans_log_msg(struct btree_trans *, struct printbuf *);
|
||||
int bch2_trans_log_bkey(struct btree_trans *, enum btree_id, unsigned, struct bkey_i *);
|
||||
|
||||
__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
|
||||
__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
|
||||
|
||||
@ -210,7 +222,7 @@ static inline void bch2_trans_reset_updates(struct btree_trans *trans)
|
||||
trans->extra_disk_res = 0;
|
||||
}
|
||||
|
||||
static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
|
||||
static __always_inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
|
||||
unsigned type, unsigned min_bytes)
|
||||
{
|
||||
unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k));
|
||||
@ -233,7 +245,7 @@ static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *t
|
||||
return mut;
|
||||
}
|
||||
|
||||
static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
|
||||
static __always_inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
|
||||
{
|
||||
return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0);
|
||||
}
|
||||
|
@ -35,6 +35,8 @@ static const char * const bch2_btree_update_modes[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static void bch2_btree_update_to_text(struct printbuf *, struct btree_update *);
|
||||
|
||||
static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
|
||||
btree_path_idx_t, struct btree *, struct keylist *);
|
||||
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
|
||||
@ -54,6 +56,8 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
|
||||
struct bkey_buf prev;
|
||||
int ret = 0;
|
||||
|
||||
printbuf_indent_add_nextline(&buf, 2);
|
||||
|
||||
BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
!bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
|
||||
b->data->min_key));
|
||||
@ -64,19 +68,20 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
|
||||
|
||||
if (b == btree_node_root(c, b)) {
|
||||
if (!bpos_eq(b->data->min_key, POS_MIN)) {
|
||||
printbuf_reset(&buf);
|
||||
ret = __bch2_topology_error(c, &buf);
|
||||
|
||||
bch2_bpos_to_text(&buf, b->data->min_key);
|
||||
log_fsck_err(trans, btree_root_bad_min_key,
|
||||
"btree root with incorrect min_key: %s", buf.buf);
|
||||
goto topology_repair;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!bpos_eq(b->data->max_key, SPOS_MAX)) {
|
||||
printbuf_reset(&buf);
|
||||
ret = __bch2_topology_error(c, &buf);
|
||||
bch2_bpos_to_text(&buf, b->data->max_key);
|
||||
log_fsck_err(trans, btree_root_bad_max_key,
|
||||
"btree root with incorrect max_key: %s", buf.buf);
|
||||
goto topology_repair;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
@ -94,20 +99,19 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
|
||||
: bpos_successor(prev.k->k.p);
|
||||
|
||||
if (!bpos_eq(expected_min, bp.v->min_key)) {
|
||||
bch2_topology_error(c);
|
||||
ret = __bch2_topology_error(c, &buf);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "end of prev node doesn't match start of next node\n in ");
|
||||
prt_str(&buf, "end of prev node doesn't match start of next node\nin ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_str(&buf, " node ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
prt_str(&buf, "\n prev ");
|
||||
prt_str(&buf, "\nprev ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
|
||||
prt_str(&buf, "\n next ");
|
||||
prt_str(&buf, "\nnext ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
|
||||
log_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf);
|
||||
goto topology_repair;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bch2_bkey_buf_reassemble(&prev, c, k);
|
||||
@ -115,29 +119,25 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
|
||||
}
|
||||
|
||||
if (bkey_deleted(&prev.k->k)) {
|
||||
bch2_topology_error(c);
|
||||
ret = __bch2_topology_error(c, &buf);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "empty interior node\n in ");
|
||||
prt_str(&buf, "empty interior node\nin ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_str(&buf, " node ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
log_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf);
|
||||
goto topology_repair;
|
||||
} else if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
|
||||
bch2_topology_error(c);
|
||||
ret = __bch2_topology_error(c, &buf);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "last child node doesn't end at end of parent node\n in ");
|
||||
prt_str(&buf, "last child node doesn't end at end of parent node\nin ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_str(&buf, " node ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
prt_str(&buf, "\n last key ");
|
||||
prt_str(&buf, "\nlast key ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
|
||||
|
||||
log_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf);
|
||||
goto topology_repair;
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
@ -145,9 +145,6 @@ fsck_err:
|
||||
bch2_bkey_buf_exit(&prev, c);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
topology_repair:
|
||||
ret = bch2_topology_error(c);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Calculate ideal packed bkey format for new btree nodes: */
|
||||
@ -287,6 +284,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
|
||||
struct disk_reservation *res,
|
||||
struct closure *cl,
|
||||
bool interior_node,
|
||||
unsigned target,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -320,6 +318,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
|
||||
mutex_unlock(&c->btree_reserve_cache_lock);
|
||||
retry:
|
||||
ret = bch2_alloc_sectors_start_trans(trans,
|
||||
target ?:
|
||||
c->opts.metadata_target ?:
|
||||
c->opts.foreground_target,
|
||||
0,
|
||||
@ -328,7 +327,9 @@ retry:
|
||||
res->nr_replicas,
|
||||
min(res->nr_replicas,
|
||||
c->opts.metadata_replicas_required),
|
||||
watermark, 0, cl, &wp);
|
||||
watermark,
|
||||
target ? BCH_WRITE_only_specified_devs : 0,
|
||||
cl, &wp);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
@ -508,6 +509,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
|
||||
static int bch2_btree_reserve_get(struct btree_trans *trans,
|
||||
struct btree_update *as,
|
||||
unsigned nr_nodes[2],
|
||||
unsigned target,
|
||||
unsigned flags,
|
||||
struct closure *cl)
|
||||
{
|
||||
@ -530,7 +532,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
|
||||
|
||||
while (p->nr < nr_nodes[interior]) {
|
||||
b = __bch2_btree_node_alloc(trans, &as->disk_res, cl,
|
||||
interior, flags);
|
||||
interior, target, flags);
|
||||
if (IS_ERR(b)) {
|
||||
ret = PTR_ERR(b);
|
||||
goto err;
|
||||
@ -649,6 +651,14 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If the node has been reused, we might be reading uninitialized memory - that's fine: */
|
||||
static noinline __no_kmsan_checks bool btree_node_seq_matches(struct btree *b, __le64 seq)
|
||||
{
|
||||
struct btree_node *b_data = READ_ONCE(b->data);
|
||||
|
||||
return (b_data ? b_data->keys.seq : 0) == seq;
|
||||
}
|
||||
|
||||
static void btree_update_nodes_written(struct btree_update *as)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
@ -677,15 +687,9 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
* on disk:
|
||||
*/
|
||||
for (i = 0; i < as->nr_old_nodes; i++) {
|
||||
__le64 seq;
|
||||
|
||||
b = as->old_nodes[i];
|
||||
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
|
||||
seq = b->data ? b->data->keys.seq : 0;
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
if (seq == as->old_nodes_seq[i])
|
||||
if (btree_node_seq_matches(b, as->old_nodes_seq[i]))
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
@ -1117,7 +1121,8 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
|
||||
|
||||
static struct btree_update *
|
||||
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
unsigned level_start, bool split, unsigned flags)
|
||||
unsigned level_start, bool split,
|
||||
unsigned target, unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_update *as;
|
||||
@ -1222,12 +1227,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
|
||||
ret = bch2_disk_reservation_get(c, &as->disk_res,
|
||||
(nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
|
||||
c->opts.metadata_replicas,
|
||||
READ_ONCE(c->opts.metadata_replicas),
|
||||
disk_res_flags);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL);
|
||||
ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, NULL);
|
||||
if (bch2_err_matches(ret, ENOSPC) ||
|
||||
bch2_err_matches(ret, ENOMEM)) {
|
||||
struct closure cl;
|
||||
@ -1246,7 +1251,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
closure_init_stack(&cl);
|
||||
|
||||
do {
|
||||
ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
|
||||
ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, &cl);
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
bch2_wait_on_allocator(c, &cl);
|
||||
@ -1269,7 +1274,8 @@ err:
|
||||
bch2_btree_update_free(as, trans);
|
||||
if (!bch2_err_matches(ret, ENOSPC) &&
|
||||
!bch2_err_matches(ret, EROFS) &&
|
||||
ret != -BCH_ERR_journal_reclaim_would_deadlock)
|
||||
ret != -BCH_ERR_journal_reclaim_would_deadlock &&
|
||||
ret != -BCH_ERR_journal_shutdown)
|
||||
bch_err_fn_ratelimited(c, ret);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@ -1780,11 +1786,24 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->gc_lock);
|
||||
BUG_ON(!btree_node_intent_locked(path, b->c.level));
|
||||
BUG_ON(!b->c.level);
|
||||
BUG_ON(!as || as->b);
|
||||
bch2_verify_keylist_sorted(keys);
|
||||
|
||||
if (!btree_node_intent_locked(path, b->c.level)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf, "%s(): node not locked at level %u\n",
|
||||
__func__, b->c.level);
|
||||
bch2_btree_update_to_text(&buf, as);
|
||||
bch2_btree_path_to_text(&buf, trans, path_idx);
|
||||
|
||||
bch2_print_str(c, KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
bch2_fs_emergency_read_only(c);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
ret = bch2_btree_node_lock_write(trans, path, &b->c);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -1850,7 +1869,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
|
||||
|
||||
as = bch2_btree_update_start(trans, trans->paths + path,
|
||||
trans->paths[path].level,
|
||||
true, flags);
|
||||
true, 0, flags);
|
||||
if (IS_ERR(as))
|
||||
return PTR_ERR(as);
|
||||
|
||||
@ -1920,7 +1939,8 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path,
|
||||
return bch2_btree_split_leaf(trans, path, flags);
|
||||
|
||||
struct btree_update *as =
|
||||
bch2_btree_update_start(trans, trans->paths + path, b->c.level, true, flags);
|
||||
bch2_btree_update_start(trans, trans->paths + path, b->c.level,
|
||||
true, 0, flags);
|
||||
if (IS_ERR(as))
|
||||
return PTR_ERR(as);
|
||||
|
||||
@ -2005,18 +2025,22 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) {
|
||||
struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bpos_to_text(&buf1, prev->data->max_key);
|
||||
bch2_bpos_to_text(&buf2, next->data->min_key);
|
||||
bch_err(c,
|
||||
"%s(): btree topology error:\n"
|
||||
" prev ends at %s\n"
|
||||
" next starts at %s",
|
||||
__func__, buf1.buf, buf2.buf);
|
||||
printbuf_exit(&buf1);
|
||||
printbuf_exit(&buf2);
|
||||
ret = bch2_topology_error(c);
|
||||
printbuf_indent_add_nextline(&buf, 2);
|
||||
prt_printf(&buf, "%s(): ", __func__);
|
||||
ret = __bch2_topology_error(c, &buf);
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_printf(&buf, "prev ends at ");
|
||||
bch2_bpos_to_text(&buf, prev->data->max_key);
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_printf(&buf, "next starts at ");
|
||||
bch2_bpos_to_text(&buf, next->data->min_key);
|
||||
|
||||
bch_err(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -2045,7 +2069,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
|
||||
parent = btree_node_parent(trans->paths + path, b);
|
||||
as = bch2_btree_update_start(trans, trans->paths + path, level, false,
|
||||
BCH_TRANS_COMMIT_no_enospc|flags);
|
||||
0, BCH_TRANS_COMMIT_no_enospc|flags);
|
||||
ret = PTR_ERR_OR_ZERO(as);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -2124,9 +2148,35 @@ err_free_update:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int get_iter_to_node(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct btree *b)
|
||||
{
|
||||
bch2_trans_node_iter_init(trans, iter, b->c.btree_id, b->key.k.p,
|
||||
BTREE_MAX_DEPTH, b->c.level,
|
||||
BTREE_ITER_intent);
|
||||
int ret = bch2_btree_iter_traverse(trans, iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* has node been freed? */
|
||||
if (btree_iter_path(trans, iter)->l[b->c.level].b != b) {
|
||||
/* node has been freed: */
|
||||
BUG_ON(!btree_node_dying(b));
|
||||
ret = -BCH_ERR_btree_node_dying;
|
||||
goto err;
|
||||
}
|
||||
|
||||
BUG_ON(!btree_node_hashed(b));
|
||||
return 0;
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct btree *b,
|
||||
unsigned target,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -2139,7 +2189,8 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
|
||||
struct btree_path *path = btree_iter_path(trans, iter);
|
||||
parent = btree_node_parent(path, b);
|
||||
as = bch2_btree_update_start(trans, path, b->c.level, false, flags);
|
||||
as = bch2_btree_update_start(trans, path, b->c.level,
|
||||
false, target, flags);
|
||||
ret = PTR_ERR_OR_ZERO(as);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -2189,26 +2240,63 @@ err:
|
||||
goto out;
|
||||
}
|
||||
|
||||
int bch2_btree_node_rewrite_key(struct btree_trans *trans,
|
||||
static int bch2_btree_node_rewrite_key(struct btree_trans *trans,
|
||||
enum btree_id btree, unsigned level,
|
||||
struct bpos pos, unsigned flags)
|
||||
struct bkey_i *k, unsigned flags)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter,
|
||||
btree, k->k.p,
|
||||
BTREE_MAX_DEPTH, level, 0);
|
||||
struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
|
||||
int ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(k);
|
||||
ret = found
|
||||
? bch2_btree_node_rewrite(trans, &iter, b, 0, flags)
|
||||
: -ENOENT;
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_node_rewrite_pos(struct btree_trans *trans,
|
||||
enum btree_id btree, unsigned level,
|
||||
struct bpos pos,
|
||||
unsigned target,
|
||||
unsigned flags)
|
||||
{
|
||||
BUG_ON(!level);
|
||||
|
||||
/* Traverse one depth lower to get a pointer to the node itself: */
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter, btree, pos, 0, level - 1, 0);
|
||||
struct btree *b = bch2_btree_iter_peek_node(&iter);
|
||||
struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
|
||||
int ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_btree_node_rewrite(trans, &iter, b, flags);
|
||||
ret = bch2_btree_node_rewrite(trans, &iter, b, target, flags);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *trans,
|
||||
struct btree *b, unsigned flags)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
int ret = get_iter_to_node(trans, &iter, b);
|
||||
if (ret)
|
||||
return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
|
||||
|
||||
ret = bch2_btree_node_rewrite(trans, &iter, b, 0, flags);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct async_btree_rewrite {
|
||||
struct bch_fs *c;
|
||||
struct work_struct work;
|
||||
@ -2218,58 +2306,17 @@ struct async_btree_rewrite {
|
||||
struct bkey_buf key;
|
||||
};
|
||||
|
||||
static int async_btree_node_rewrite_trans(struct btree_trans *trans,
|
||||
struct async_btree_rewrite *a)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter,
|
||||
a->btree_id, a->key.k->k.p,
|
||||
BTREE_MAX_DEPTH, a->level, 0);
|
||||
struct btree *b = bch2_btree_iter_peek_node(&iter);
|
||||
int ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(a->key.k);
|
||||
ret = found
|
||||
? bch2_btree_node_rewrite(trans, &iter, b, 0)
|
||||
: -ENOENT;
|
||||
|
||||
#if 0
|
||||
/* Tracepoint... */
|
||||
if (!ret || ret == -ENOENT) {
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
if (!ret) {
|
||||
prt_printf(&buf, "rewrite node:\n ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k));
|
||||
} else {
|
||||
prt_printf(&buf, "node to rewrite not found:\n want: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k));
|
||||
prt_printf(&buf, "\n got: ");
|
||||
if (b)
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
else
|
||||
prt_str(&buf, "(null)");
|
||||
}
|
||||
bch_info(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
#endif
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void async_btree_node_rewrite_work(struct work_struct *work)
|
||||
{
|
||||
struct async_btree_rewrite *a =
|
||||
container_of(work, struct async_btree_rewrite, work);
|
||||
struct bch_fs *c = a->c;
|
||||
|
||||
int ret = bch2_trans_do(c, async_btree_node_rewrite_trans(trans, a));
|
||||
if (ret != -ENOENT)
|
||||
int ret = bch2_trans_do(c, bch2_btree_node_rewrite_key(trans,
|
||||
a->btree_id, a->level, a->key.k, 0));
|
||||
if (ret != -ENOENT &&
|
||||
!bch2_err_matches(ret, EROFS) &&
|
||||
ret != -BCH_ERR_journal_shutdown)
|
||||
bch_err_fn_ratelimited(c, ret);
|
||||
|
||||
spin_lock(&c->btree_node_rewrites_lock);
|
||||
@ -2370,7 +2417,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
|
||||
bool skip_triggers)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter2 = { NULL };
|
||||
struct btree_iter iter2 = {};
|
||||
struct btree *parent;
|
||||
int ret;
|
||||
|
||||
@ -2394,7 +2441,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
|
||||
|
||||
parent = btree_node_parent(btree_iter_path(trans, iter), b);
|
||||
if (parent) {
|
||||
bch2_trans_copy_iter(&iter2, iter);
|
||||
bch2_trans_copy_iter(trans, &iter2, iter);
|
||||
|
||||
iter2.path = bch2_btree_path_make_mut(trans, iter2.path,
|
||||
iter2.flags & BTREE_ITER_intent,
|
||||
@ -2408,7 +2455,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
|
||||
|
||||
trans->paths_sorted = false;
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter2) ?:
|
||||
ret = bch2_btree_iter_traverse(trans, &iter2) ?:
|
||||
bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -2512,30 +2559,15 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
|
||||
unsigned commit_flags, bool skip_triggers)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
int ret;
|
||||
|
||||
bch2_trans_node_iter_init(trans, &iter, b->c.btree_id, b->key.k.p,
|
||||
BTREE_MAX_DEPTH, b->c.level,
|
||||
BTREE_ITER_intent);
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
int ret = get_iter_to_node(trans, &iter, b);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* has node been freed? */
|
||||
if (btree_iter_path(trans, &iter)->l[b->c.level].b != b) {
|
||||
/* node has been freed: */
|
||||
BUG_ON(!btree_node_dying(b));
|
||||
goto out;
|
||||
}
|
||||
|
||||
BUG_ON(!btree_node_hashed(b));
|
||||
return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
|
||||
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr,
|
||||
!bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev));
|
||||
|
||||
ret = bch2_btree_node_update_key(trans, &iter, b, new_key,
|
||||
commit_flags, skip_triggers);
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
@ -168,10 +168,13 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
|
||||
struct btree *, unsigned);
|
||||
int bch2_btree_node_rewrite_key(struct btree_trans *,
|
||||
struct btree *, unsigned, unsigned);
|
||||
int bch2_btree_node_rewrite_pos(struct btree_trans *,
|
||||
enum btree_id, unsigned,
|
||||
struct bpos, unsigned);
|
||||
struct bpos, unsigned, unsigned);
|
||||
int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *,
|
||||
struct btree *, unsigned);
|
||||
|
||||
void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
|
||||
|
||||
int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
|
||||
@ -282,12 +285,12 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct bt
|
||||
{
|
||||
struct bset_tree *t = bset_tree_last(b);
|
||||
struct btree_node_entry *bne = max(write_block(b),
|
||||
(void *) btree_bkey_last(b, bset_tree_last(b)));
|
||||
(void *) btree_bkey_last(b, t));
|
||||
ssize_t remaining_space =
|
||||
__bch2_btree_u64s_remaining(b, bne->keys.start);
|
||||
|
||||
if (unlikely(bset_written(b, bset(b, t)))) {
|
||||
if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
|
||||
if (b->written + block_sectors(c) <= btree_sectors(c))
|
||||
return bne;
|
||||
} else {
|
||||
if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) &&
|
||||
|
@ -144,7 +144,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
|
||||
EBUG_ON(!trans->c->btree_write_buffer.flushing.pin.seq);
|
||||
EBUG_ON(trans->c->btree_write_buffer.flushing.pin.seq > wb->journal_seq);
|
||||
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
ret = bch2_btree_iter_traverse(trans, iter);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -208,7 +208,7 @@ btree_write_buffered_insert(struct btree_trans *trans,
|
||||
|
||||
trans->journal_res.seq = wb->journal_seq;
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
ret = bch2_btree_iter_traverse(trans, &iter) ?:
|
||||
bch2_trans_update(trans, &iter, &wb->k,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
@ -264,12 +264,28 @@ out:
|
||||
BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
|
||||
}
|
||||
|
||||
int bch2_btree_write_buffer_insert_err(struct btree_trans *trans,
|
||||
enum btree_id btree, struct bkey_i *k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_printf(&buf, "attempting to do write buffer update on non wb btree=");
|
||||
bch2_btree_id_to_text(&buf, btree);
|
||||
prt_str(&buf, "\n");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
|
||||
|
||||
bch2_fs_inconsistent(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EROFS;
|
||||
}
|
||||
|
||||
static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct journal *j = &c->journal;
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct btree_iter iter = { NULL };
|
||||
struct btree_iter iter = {};
|
||||
size_t overwritten = 0, fast = 0, slowpath = 0, could_not_insert = 0;
|
||||
bool write_locked = false;
|
||||
bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags);
|
||||
@ -312,7 +328,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
darray_for_each(wb->sorted, i) {
|
||||
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
|
||||
|
||||
BUG_ON(!btree_type_uses_write_buffer(k->btree));
|
||||
if (unlikely(!btree_type_uses_write_buffer(k->btree))) {
|
||||
ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k);
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
|
||||
prefetch(&wb->flushing.keys.data[n->idx]);
|
||||
@ -349,7 +368,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
write_locked = false;
|
||||
|
||||
ret = lockrestart_do(trans,
|
||||
bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_btree_iter_traverse(trans, &iter) ?:
|
||||
bch2_foreground_maybe_merge(trans, iter.path, 0,
|
||||
BCH_WATERMARK_reclaim|
|
||||
BCH_TRANS_COMMIT_journal_reclaim|
|
||||
@ -366,7 +385,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
BTREE_ITER_intent|BTREE_ITER_all_snapshots);
|
||||
}
|
||||
|
||||
bch2_btree_iter_set_pos(&iter, k->k.k.p);
|
||||
bch2_btree_iter_set_pos(trans, &iter, k->k.k.p);
|
||||
btree_iter_path(trans, &iter)->preserve = false;
|
||||
|
||||
bool accounting_accumulated = false;
|
||||
@ -409,7 +428,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
*/
|
||||
trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr);
|
||||
|
||||
sort(wb->flushing.keys.data,
|
||||
sort_nonatomic(wb->flushing.keys.data,
|
||||
wb->flushing.keys.nr,
|
||||
sizeof(wb->flushing.keys.data[0]),
|
||||
wb_key_seq_cmp, NULL);
|
||||
@ -847,13 +866,18 @@ void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
|
||||
darray_exit(&wb->inc.keys);
|
||||
}
|
||||
|
||||
int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
|
||||
void bch2_fs_btree_write_buffer_init_early(struct bch_fs *c)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
mutex_init(&wb->inc.lock);
|
||||
mutex_init(&wb->flushing.lock);
|
||||
INIT_WORK(&wb->flush_work, bch2_btree_write_buffer_flush_work);
|
||||
}
|
||||
|
||||
int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
/* Will be resized by journal as needed: */
|
||||
unsigned initial_size = 1 << 16;
|
||||
|
@ -101,6 +101,7 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_t
|
||||
|
||||
int bch2_btree_write_buffer_resize(struct bch_fs *, size_t);
|
||||
void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
|
||||
void bch2_fs_btree_write_buffer_init_early(struct bch_fs *);
|
||||
int bch2_fs_btree_write_buffer_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_H */
|
||||
|
@ -29,9 +29,16 @@
|
||||
#include <linux/preempt.h>
|
||||
|
||||
void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage)
|
||||
{
|
||||
for (unsigned i = 0; i < BCH_DATA_NR; i++)
|
||||
usage->buckets[i] = percpu_u64_get(&ca->usage->d[i].buckets);
|
||||
}
|
||||
|
||||
void bch2_dev_usage_full_read_fast(struct bch_dev *ca, struct bch_dev_usage_full *usage)
|
||||
{
|
||||
memset(usage, 0, sizeof(*usage));
|
||||
acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s());
|
||||
acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage,
|
||||
sizeof(struct bch_dev_usage_full) / sizeof(u64));
|
||||
}
|
||||
|
||||
static u64 reserve_factor(u64 r)
|
||||
@ -75,7 +82,7 @@ bch2_fs_usage_read_short(struct bch_fs *c)
|
||||
|
||||
void bch2_dev_usage_to_text(struct printbuf *out,
|
||||
struct bch_dev *ca,
|
||||
struct bch_dev_usage *usage)
|
||||
struct bch_dev_usage_full *usage)
|
||||
{
|
||||
if (out->nr_tabstops < 5) {
|
||||
printbuf_tabstops_reset(out);
|
||||
@ -365,7 +372,7 @@ found:
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level,
|
||||
BTREE_ITER_intent|BTREE_ITER_all_snapshots);
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
ret = bch2_btree_iter_traverse(trans, &iter) ?:
|
||||
bch2_trans_update(trans, &iter, new,
|
||||
BTREE_UPDATE_internal_snapshot_node|
|
||||
BTREE_TRIGGER_norun);
|
||||
@ -381,6 +388,31 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf,
|
||||
struct bkey_s_c k, bool insert, enum bch_sb_error_id id)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
prt_printf(buf, "\nwhile marking ");
|
||||
bch2_bkey_val_to_text(buf, c, k);
|
||||
prt_newline(buf);
|
||||
|
||||
bool print = __bch2_count_fsck_err(c, id, buf);
|
||||
|
||||
int ret = bch2_run_explicit_recovery_pass_printbuf(c, buf,
|
||||
BCH_RECOVERY_PASS_check_allocations);
|
||||
|
||||
if (insert) {
|
||||
bch2_trans_updates_to_text(buf, trans);
|
||||
__bch2_inconsistent_error(c, buf);
|
||||
ret = -BCH_ERR_bucket_ref_update;
|
||||
}
|
||||
|
||||
if (print || insert)
|
||||
bch2_print_str(c, KERN_ERR, buf->buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
|
||||
struct bkey_s_c k,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
@ -396,32 +428,29 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
|
||||
|
||||
BUG_ON(!sectors);
|
||||
|
||||
if (gen_after(ptr->gen, b_gen)) {
|
||||
bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
|
||||
log_fsck_err(trans, ptr_gen_newer_than_bucket_gen,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
|
||||
"while marking %s",
|
||||
if (unlikely(gen_after(ptr->gen, b_gen))) {
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
|
||||
ptr->gen,
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
if (inserting)
|
||||
goto err;
|
||||
ptr->gen);
|
||||
|
||||
ret = bucket_ref_update_err(trans, &buf, k, inserting,
|
||||
BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
|
||||
bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
|
||||
log_fsck_err(trans, ptr_too_stale,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
|
||||
"while marking %s",
|
||||
if (unlikely(gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX)) {
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
|
||||
ptr->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
if (inserting)
|
||||
goto err;
|
||||
ptr->gen);
|
||||
|
||||
ret = bucket_ref_update_err(trans, &buf, k, inserting,
|
||||
BCH_FSCK_ERR_ptr_too_stale);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -430,62 +459,50 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (b_gen != ptr->gen) {
|
||||
bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
|
||||
log_fsck_err(trans, stale_dirty_ptr,
|
||||
"bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n"
|
||||
"while marking %s",
|
||||
if (unlikely(b_gen != ptr->gen)) {
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf,
|
||||
"bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bucket_gen_get(ca, bucket_nr),
|
||||
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
|
||||
ptr->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
if (inserting)
|
||||
goto err;
|
||||
ptr->gen);
|
||||
|
||||
ret = bucket_ref_update_err(trans, &buf, k, inserting,
|
||||
BCH_FSCK_ERR_stale_dirty_ptr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (bucket_data_type_mismatch(bucket_data_type, ptr_data_type)) {
|
||||
bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
|
||||
log_fsck_err(trans, ptr_bucket_data_type_mismatch,
|
||||
"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
|
||||
"while marking %s",
|
||||
if (unlikely(bucket_data_type_mismatch(bucket_data_type, ptr_data_type))) {
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_type_str(bucket_data_type),
|
||||
bch2_data_type_str(ptr_data_type),
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
if (inserting)
|
||||
goto err;
|
||||
bch2_data_type_str(ptr_data_type));
|
||||
|
||||
ret = bucket_ref_update_err(trans, &buf, k, inserting,
|
||||
BCH_FSCK_ERR_ptr_bucket_data_type_mismatch);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((u64) *bucket_sectors + sectors > U32_MAX) {
|
||||
bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
|
||||
log_fsck_err(trans, bucket_sector_count_overflow,
|
||||
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
|
||||
"while marking %s",
|
||||
if (unlikely((u64) *bucket_sectors + sectors > U32_MAX)) {
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf,
|
||||
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
|
||||
*bucket_sectors, sectors,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
if (inserting)
|
||||
goto err;
|
||||
*bucket_sectors, sectors);
|
||||
|
||||
ret = bucket_ref_update_err(trans, &buf, k, inserting,
|
||||
BCH_FSCK_ERR_bucket_sector_count_overflow);
|
||||
sectors = -*bucket_sectors;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*bucket_sectors += sectors;
|
||||
out:
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_dump_trans_updates(trans);
|
||||
bch2_inconsistent_error(c);
|
||||
ret = -BCH_ERR_bucket_ref_update;
|
||||
goto out;
|
||||
}
|
||||
|
||||
void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
|
||||
@ -590,12 +607,10 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!p.ptr.cached) {
|
||||
ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & BTREE_TRIGGER_gc) {
|
||||
struct bucket *g = gc_bucket(ca, bucket.offset);
|
||||
@ -653,9 +668,9 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
|
||||
stripe_blockcount_get(&s->v, p.ec.block) +
|
||||
sectors);
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||
};
|
||||
struct disk_accounting_pos acc;
|
||||
memset(&acc, 0, sizeof(acc));
|
||||
acc.type = BCH_DISK_ACCOUNTING_replicas;
|
||||
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
|
||||
acc.replicas.data_type = data_type;
|
||||
ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false);
|
||||
@ -674,26 +689,28 @@ err:
|
||||
return -BCH_ERR_ENOMEM_mark_stripe_ptr;
|
||||
}
|
||||
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
gc_stripe_lock(m);
|
||||
|
||||
if (!m || !m->alive) {
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
gc_stripe_unlock(m);
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ",
|
||||
(u64) p.ec.idx);
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s",
|
||||
(u64) p.ec.idx, buf.buf);
|
||||
__bch2_inconsistent_error(c, &buf);
|
||||
bch2_print_str(c, KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
bch2_inconsistent_error(c);
|
||||
return -BCH_ERR_trigger_stripe_pointer;
|
||||
}
|
||||
|
||||
m->block_sectors[p.ec.block] += sectors;
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||
};
|
||||
memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e));
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
struct disk_accounting_pos acc;
|
||||
memset(&acc, 0, sizeof(acc));
|
||||
acc.type = BCH_DISK_ACCOUNTING_replicas;
|
||||
unsafe_memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e), "VLA");
|
||||
gc_stripe_unlock(m);
|
||||
|
||||
acc.replicas.data_type = data_type;
|
||||
int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, true);
|
||||
@ -719,16 +736,14 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
: BCH_DATA_user;
|
||||
int ret = 0;
|
||||
|
||||
struct disk_accounting_pos acc_replicas_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||
.replicas.data_type = data_type,
|
||||
.replicas.nr_devs = 0,
|
||||
.replicas.nr_required = 1,
|
||||
};
|
||||
struct disk_accounting_pos acc_replicas_key;
|
||||
memset(&acc_replicas_key, 0, sizeof(acc_replicas_key));
|
||||
acc_replicas_key.type = BCH_DISK_ACCOUNTING_replicas;
|
||||
acc_replicas_key.replicas.data_type = data_type;
|
||||
acc_replicas_key.replicas.nr_devs = 0;
|
||||
acc_replicas_key.replicas.nr_required = 1;
|
||||
|
||||
struct disk_accounting_pos acct_compression_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_compression,
|
||||
};
|
||||
unsigned cur_compression_type = 0;
|
||||
u64 compression_acct[3] = { 1, 0, 0 };
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
@ -762,13 +777,13 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
acc_replicas_key.replicas.nr_required = 0;
|
||||
}
|
||||
|
||||
if (acct_compression_key.compression.type &&
|
||||
acct_compression_key.compression.type != p.crc.compression_type) {
|
||||
if (cur_compression_type &&
|
||||
cur_compression_type != p.crc.compression_type) {
|
||||
if (flags & BTREE_TRIGGER_overwrite)
|
||||
bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
|
||||
|
||||
ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct,
|
||||
ARRAY_SIZE(compression_acct), gc);
|
||||
ret = bch2_disk_accounting_mod2(trans, gc, compression_acct,
|
||||
compression, cur_compression_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -777,7 +792,7 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
compression_acct[2] = 0;
|
||||
}
|
||||
|
||||
acct_compression_key.compression.type = p.crc.compression_type;
|
||||
cur_compression_type = p.crc.compression_type;
|
||||
if (p.crc.compression_type) {
|
||||
compression_acct[1] += p.crc.uncompressed_size;
|
||||
compression_acct[2] += p.crc.compressed_size;
|
||||
@ -791,45 +806,34 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (acc_replicas_key.replicas.nr_devs && !level && k.k->p.snapshot) {
|
||||
struct disk_accounting_pos acc_snapshot_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_snapshot,
|
||||
.snapshot.id = k.k->p.snapshot,
|
||||
};
|
||||
ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc);
|
||||
ret = bch2_disk_accounting_mod2_nr(trans, gc, replicas_sectors, 1, snapshot, k.k->p.snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (acct_compression_key.compression.type) {
|
||||
if (cur_compression_type) {
|
||||
if (flags & BTREE_TRIGGER_overwrite)
|
||||
bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
|
||||
|
||||
ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct,
|
||||
ARRAY_SIZE(compression_acct), gc);
|
||||
ret = bch2_disk_accounting_mod2(trans, gc, compression_acct,
|
||||
compression, cur_compression_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (level) {
|
||||
struct disk_accounting_pos acc_btree_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_btree,
|
||||
.btree.id = btree_id,
|
||||
};
|
||||
ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc);
|
||||
ret = bch2_disk_accounting_mod2_nr(trans, gc, replicas_sectors, 1, btree, btree_id);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
bool insert = !(flags & BTREE_TRIGGER_overwrite);
|
||||
struct disk_accounting_pos acc_inum_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_inum,
|
||||
.inum.inum = k.k->p.inode,
|
||||
};
|
||||
|
||||
s64 v[3] = {
|
||||
insert ? 1 : -1,
|
||||
insert ? k.k->size : -((s64) k.k->size),
|
||||
*replicas_sectors,
|
||||
};
|
||||
ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc);
|
||||
ret = bch2_disk_accounting_mod2(trans, gc, v, inum, k.k->p.inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -878,15 +882,15 @@ int bch2_trigger_extent(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
int need_rebalance_delta = 0;
|
||||
s64 need_rebalance_sectors_delta = 0;
|
||||
s64 need_rebalance_sectors_delta[1] = { 0 };
|
||||
|
||||
s64 s = bch2_bkey_sectors_need_rebalance(c, old);
|
||||
need_rebalance_delta -= s != 0;
|
||||
need_rebalance_sectors_delta -= s;
|
||||
need_rebalance_sectors_delta[0] -= s;
|
||||
|
||||
s = bch2_bkey_sectors_need_rebalance(c, new.s_c);
|
||||
need_rebalance_delta += s != 0;
|
||||
need_rebalance_sectors_delta += s;
|
||||
need_rebalance_sectors_delta[0] += s;
|
||||
|
||||
if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) {
|
||||
int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
|
||||
@ -895,12 +899,9 @@ int bch2_trigger_extent(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (need_rebalance_sectors_delta) {
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_rebalance_work,
|
||||
};
|
||||
int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1,
|
||||
flags & BTREE_TRIGGER_gc);
|
||||
if (need_rebalance_sectors_delta[0]) {
|
||||
int ret = bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc,
|
||||
need_rebalance_sectors_delta, rebalance_work);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -916,17 +917,13 @@ static int __trigger_reservation(struct btree_trans *trans,
|
||||
enum btree_iter_update_trigger_flags flags)
|
||||
{
|
||||
if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
|
||||
s64 sectors = k.k->size;
|
||||
s64 sectors[1] = { k.k->size };
|
||||
|
||||
if (flags & BTREE_TRIGGER_overwrite)
|
||||
sectors = -sectors;
|
||||
sectors[0] = -sectors[0];
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_persistent_reserved,
|
||||
.persistent_reserved.nr_replicas = bkey_s_c_to_reservation(k).v->nr_replicas,
|
||||
};
|
||||
|
||||
return bch2_disk_accounting_mod(trans, &acc, §ors, 1, flags & BTREE_TRIGGER_gc);
|
||||
return bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, sectors,
|
||||
persistent_reserved, bkey_s_c_to_reservation(k).v->nr_replicas);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -957,14 +954,23 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
|
||||
return PTR_ERR(a);
|
||||
|
||||
if (a->v.data_type && type && a->v.data_type != type) {
|
||||
bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
|
||||
log_fsck_err(trans, bucket_metadata_type_mismatch,
|
||||
"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
|
||||
"while marking %s",
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
|
||||
"while marking %s\n",
|
||||
iter.pos.inode, iter.pos.offset, a->v.gen,
|
||||
bch2_data_type_str(a->v.data_type),
|
||||
bch2_data_type_str(type),
|
||||
bch2_data_type_str(type));
|
||||
|
||||
bool print = bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf);
|
||||
|
||||
bch2_run_explicit_recovery_pass_printbuf(c, &buf,
|
||||
BCH_RECOVERY_PASS_check_allocations);
|
||||
|
||||
if (print)
|
||||
bch2_print_str(c, KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
ret = -BCH_ERR_metadata_bucket_inconsistency;
|
||||
goto err;
|
||||
}
|
||||
@ -976,7 +982,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
|
||||
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
@ -1137,7 +1142,7 @@ int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c,
|
||||
for_each_online_member(c, ca) {
|
||||
int ret = bch2_trans_mark_dev_sb(c, ca, flags);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -1336,7 +1341,7 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
|
||||
|
||||
int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
ca->usage = alloc_percpu(struct bch_dev_usage);
|
||||
ca->usage = alloc_percpu(struct bch_dev_usage_full);
|
||||
if (!ca->usage)
|
||||
return -BCH_ERR_ENOMEM_usage_init;
|
||||
|
||||
|
@ -39,33 +39,6 @@ static inline u64 sector_to_bucket_and_offset(const struct bch_dev *ca, sector_t
|
||||
for (_b = (_buckets)->b + (_buckets)->first_bucket; \
|
||||
_b < (_buckets)->b + (_buckets)->nbuckets; _b++)
|
||||
|
||||
/*
|
||||
* Ugly hack alert:
|
||||
*
|
||||
* We need to cram a spinlock in a single byte, because that's what we have left
|
||||
* in struct bucket, and we care about the size of these - during fsck, we need
|
||||
* in memory state for every single bucket on every device.
|
||||
*
|
||||
* We used to do
|
||||
* while (xchg(&b->lock, 1) cpu_relax();
|
||||
* but, it turns out not all architectures support xchg on a single byte.
|
||||
*
|
||||
* So now we use bit_spin_lock(), with fun games since we can't burn a whole
|
||||
* ulong for this - we just need to make sure the lock bit always ends up in the
|
||||
* first byte.
|
||||
*/
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define BUCKET_LOCK_BITNR 0
|
||||
#else
|
||||
#define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1)
|
||||
#endif
|
||||
|
||||
union ulong_byte_assert {
|
||||
ulong ulong;
|
||||
u8 byte;
|
||||
};
|
||||
|
||||
static inline void bucket_unlock(struct bucket *b)
|
||||
{
|
||||
BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte);
|
||||
@ -167,9 +140,7 @@ static inline int gen_cmp(u8 a, u8 b)
|
||||
|
||||
static inline int gen_after(u8 a, u8 b)
|
||||
{
|
||||
int r = gen_cmp(a, b);
|
||||
|
||||
return r > 0 ? r : 0;
|
||||
return max(0, gen_cmp(a, b));
|
||||
}
|
||||
|
||||
static inline int dev_ptr_stale_rcu(struct bch_dev *ca, const struct bch_extent_ptr *ptr)
|
||||
@ -201,7 +172,16 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage *);
|
||||
void bch2_dev_usage_full_read_fast(struct bch_dev *, struct bch_dev_usage_full *);
|
||||
static inline struct bch_dev_usage_full bch2_dev_usage_full_read(struct bch_dev *ca)
|
||||
{
|
||||
struct bch_dev_usage_full ret;
|
||||
|
||||
bch2_dev_usage_full_read_fast(ca, &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage_full *);
|
||||
|
||||
static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark)
|
||||
{
|
||||
@ -236,7 +216,7 @@ static inline u64 dev_buckets_free(struct bch_dev *ca,
|
||||
enum bch_watermark watermark)
|
||||
{
|
||||
return max_t(s64, 0,
|
||||
usage.d[BCH_DATA_free].buckets -
|
||||
usage.buckets[BCH_DATA_free]-
|
||||
ca->nr_open_buckets -
|
||||
bch2_dev_buckets_reserved(ca, watermark));
|
||||
}
|
||||
@ -246,10 +226,10 @@ static inline u64 __dev_buckets_available(struct bch_dev *ca,
|
||||
enum bch_watermark watermark)
|
||||
{
|
||||
return max_t(s64, 0,
|
||||
usage.d[BCH_DATA_free].buckets
|
||||
+ usage.d[BCH_DATA_cached].buckets
|
||||
+ usage.d[BCH_DATA_need_gc_gens].buckets
|
||||
+ usage.d[BCH_DATA_need_discard].buckets
|
||||
usage.buckets[BCH_DATA_free]
|
||||
+ usage.buckets[BCH_DATA_cached]
|
||||
+ usage.buckets[BCH_DATA_need_gc_gens]
|
||||
+ usage.buckets[BCH_DATA_need_discard]
|
||||
- ca->nr_open_buckets
|
||||
- bch2_dev_buckets_reserved(ca, watermark));
|
||||
}
|
||||
@ -262,11 +242,6 @@ static inline u64 dev_buckets_available(struct bch_dev *ca,
|
||||
|
||||
/* Filesystem usage: */
|
||||
|
||||
static inline unsigned dev_usage_u64s(void)
|
||||
{
|
||||
return sizeof(struct bch_dev_usage) / sizeof(u64);
|
||||
}
|
||||
|
||||
struct bch_fs_usage_short
|
||||
bch2_fs_usage_read_short(struct bch_fs *);
|
||||
|
||||
|
@ -7,6 +7,33 @@
|
||||
|
||||
#define BUCKET_JOURNAL_SEQ_BITS 16
|
||||
|
||||
/*
|
||||
* Ugly hack alert:
|
||||
*
|
||||
* We need to cram a spinlock in a single byte, because that's what we have left
|
||||
* in struct bucket, and we care about the size of these - during fsck, we need
|
||||
* in memory state for every single bucket on every device.
|
||||
*
|
||||
* We used to do
|
||||
* while (xchg(&b->lock, 1) cpu_relax();
|
||||
* but, it turns out not all architectures support xchg on a single byte.
|
||||
*
|
||||
* So now we use bit_spin_lock(), with fun games since we can't burn a whole
|
||||
* ulong for this - we just need to make sure the lock bit always ends up in the
|
||||
* first byte.
|
||||
*/
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define BUCKET_LOCK_BITNR 0
|
||||
#else
|
||||
#define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1)
|
||||
#endif
|
||||
|
||||
union ulong_byte_assert {
|
||||
ulong ulong;
|
||||
u8 byte;
|
||||
};
|
||||
|
||||
struct bucket {
|
||||
u8 lock;
|
||||
u8 gen_valid:1;
|
||||
@ -27,7 +54,12 @@ struct bucket_gens {
|
||||
u8 b[] __counted_by(nbuckets);
|
||||
};
|
||||
|
||||
/* Only info on bucket countns: */
|
||||
struct bch_dev_usage {
|
||||
u64 buckets[BCH_DATA_NR];
|
||||
};
|
||||
|
||||
struct bch_dev_usage_full {
|
||||
struct bch_dev_usage_type {
|
||||
u64 buckets;
|
||||
u64 sectors; /* _compressed_ sectors: */
|
||||
|
@ -315,8 +315,10 @@ static int bch2_data_thread(void *arg)
|
||||
ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
|
||||
if (ctx->thr.ret == -BCH_ERR_device_offline)
|
||||
ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline;
|
||||
else
|
||||
else {
|
||||
ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done;
|
||||
ctx->stats.data_type = (int) DATA_PROGRESS_DATA_TYPE_done;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -348,8 +350,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
|
||||
if (ctx->arg.op == BCH_DATA_OP_scrub) {
|
||||
struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev);
|
||||
if (ca) {
|
||||
struct bch_dev_usage u;
|
||||
bch2_dev_usage_read_fast(ca, &u);
|
||||
struct bch_dev_usage_full u;
|
||||
bch2_dev_usage_full_read_fast(ca, &u);
|
||||
for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++)
|
||||
if (ctx->arg.scrub.data_types & BIT(i))
|
||||
e.p.sectors_total += u.d[i].sectors;
|
||||
@ -424,10 +426,8 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
|
||||
arg.replica_entries_bytes = replicas.nr;
|
||||
|
||||
for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) {
|
||||
struct disk_accounting_pos k = {
|
||||
.type = BCH_DISK_ACCOUNTING_persistent_reserved,
|
||||
.persistent_reserved.nr_replicas = i,
|
||||
};
|
||||
struct disk_accounting_pos k;
|
||||
disk_accounting_key_init(k, persistent_reserved, .nr_replicas = i);
|
||||
|
||||
bch2_accounting_mem_read(c,
|
||||
disk_accounting_pos_to_bpos(&k),
|
||||
@ -473,7 +473,7 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
|
||||
struct bch_ioctl_dev_usage __user *user_arg)
|
||||
{
|
||||
struct bch_ioctl_dev_usage arg;
|
||||
struct bch_dev_usage src;
|
||||
struct bch_dev_usage_full src;
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
@ -493,7 +493,7 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
|
||||
if (IS_ERR(ca))
|
||||
return PTR_ERR(ca);
|
||||
|
||||
src = bch2_dev_usage_read(ca);
|
||||
src = bch2_dev_usage_full_read(ca);
|
||||
|
||||
arg.state = ca->mi.state;
|
||||
arg.bucket_size = ca->mi.bucket_size;
|
||||
@ -514,7 +514,7 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
|
||||
struct bch_ioctl_dev_usage_v2 __user *user_arg)
|
||||
{
|
||||
struct bch_ioctl_dev_usage_v2 arg;
|
||||
struct bch_dev_usage src;
|
||||
struct bch_dev_usage_full src;
|
||||
struct bch_dev *ca;
|
||||
int ret = 0;
|
||||
|
||||
@ -534,7 +534,7 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
|
||||
if (IS_ERR(ca))
|
||||
return PTR_ERR(ca);
|
||||
|
||||
src = bch2_dev_usage_read(ca);
|
||||
src = bch2_dev_usage_full_read(ca);
|
||||
|
||||
arg.state = ca->mi.state;
|
||||
arg.bucket_size = ca->mi.bucket_size;
|
||||
@ -615,7 +615,7 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
|
||||
|
||||
for_each_online_member(c, ca)
|
||||
if (ca->dev == dev) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref[READ]);
|
||||
return ca->dev_idx;
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user