mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-04-01 00:00:03 +03:00
Compare commits
48 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
33483738c6 | ||
|
ea5bdadabd | ||
|
66c943be6c | ||
|
3d968816a4 | ||
|
76b3ec9d66 | ||
|
3e2d5b2b9a | ||
|
7c47145f6c | ||
|
396545c2ea | ||
|
3bfe433d22 | ||
|
19d3a1778c | ||
|
e54f288992 | ||
|
7c66e9fac5 | ||
|
2160e9a7bd | ||
|
941dfd7a29 | ||
|
18b4914587 | ||
|
8af5c93e48 | ||
|
6657ce2de3 | ||
|
5504533986 | ||
|
a55c655158 | ||
|
b8435c5693 | ||
|
80d0a21ed3 | ||
|
a1080f2f6a | ||
|
3bf9a19571 | ||
|
00dd889d33 | ||
|
1a8e684ae0 | ||
|
3d972489a8 | ||
|
197437be12 | ||
|
0a23a5ab1a | ||
|
017c3cc66e | ||
|
8b7810403b | ||
|
ce0c2241a4 | ||
|
846f3398a2 | ||
|
e6aecdd3b8 | ||
|
62ea232b09 | ||
|
64ce740ac6 | ||
|
c0836924b1 | ||
|
f42ee45c6e | ||
|
6cbadc946d | ||
|
86cbeaf1c2 | ||
|
dd1a882d17 | ||
|
3e15e96cb9 | ||
|
d7f02b4ed5 | ||
|
1606364799 | ||
|
553e605886 | ||
|
55538d928b | ||
|
7d30f895c6 | ||
|
8cc5bdede9 | ||
|
67c9b378c7 |
.bcachefs_revision
.github/workflows
Cargo.lockCargo.tomlINSTALL.mdMakefilebch_bindgen/src
c_src
bcachefs.ccmd_device.ccmd_format.ccmd_fs.ccmd_fsck.ccmd_fusemount.ccmd_list_journal.ccmd_migrate.ccmd_option.ccmds.hlibbcachefs.clibbcachefs.hposix_to_bcachefs.cposix_to_bcachefs.h
flake.lockflake.nixinclude
crypto
linux
libbcachefs
alloc_background.calloc_background.halloc_foreground.calloc_foreground.hbackpointers.cbackpointers.hbcachefs.hbcachefs_format.hbcachefs_ioctl.hbtree_cache.cbtree_gc.cbtree_io.cbtree_io.hbtree_iter.cbtree_iter.hbtree_journal_iter.cbtree_key_cache.cbtree_locking.cbtree_locking.hbtree_node_scan.cbtree_trans_commit.cbtree_types.hbtree_update.cbtree_update.hbtree_update_interior.cbtree_update_interior.hbtree_write_buffer.cbuckets.cbuckets.hbuckets_types.hchardev.cchecksum.cchecksum.hcompress.cdata_update.cdata_update.hdirent.cdirent.hdirent_format.hdisk_accounting.cdisk_accounting.hdisk_accounting_format.hec.cec.hec_types.herrcode.herror.cerror.hextents.cextents.hextents_format.hextents_types.heytzinger.ceytzinger.hfs-io-buffered.cfs-io.cfs-ioctl.cfs-ioctl.hfs.cfsck.cinode.cinode.hinode_format.hio_misc.c
@ -1 +1 @@
|
||||
63bbe0ca416791095c994aba7bea388e947dd60a
|
||||
7fdc3fa3cb5fb561f5945b4de418d48d1a726a8d
|
||||
|
41
.github/workflows/nix-flake.yml
vendored
41
.github/workflows/nix-flake.yml
vendored
@ -1,22 +1,31 @@
|
||||
name: "Nix-Tests"
|
||||
name: Nix Flake actions
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
|
||||
jobs:
|
||||
nix-flake-check:
|
||||
nix-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: cachix/install-nix-action@v27
|
||||
with:
|
||||
extra_nix_config: |
|
||||
experimental-features = nix-command flakes
|
||||
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
|
||||
- uses: cachix/cachix-action@v15
|
||||
with:
|
||||
name: bcachefs-tools
|
||||
# If you chose API tokens for write access OR if you have a private cache
|
||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
||||
- run: nix flake show
|
||||
- run: nix flake check --print-build-logs
|
||||
- run: nix build --print-build-logs
|
||||
- uses: actions/checkout@v4
|
||||
- uses: cachix/install-nix-action@v30
|
||||
- id: set-matrix
|
||||
name: Generate Nix Matrix
|
||||
run: |
|
||||
set -Eeu
|
||||
matrix="$(nix eval --json '.#githubActions.matrix')"
|
||||
echo "matrix=$matrix" >> "$GITHUB_OUTPUT"
|
||||
|
||||
nix-build:
|
||||
name: ${{ matrix.name }} (${{ matrix.system }})
|
||||
needs: nix-matrix
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix: ${{fromJSON(needs.nix-matrix.outputs.matrix)}}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: cachix/install-nix-action@v30
|
||||
- run: nix build -L '.#${{ matrix.attr }}'
|
||||
|
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -68,7 +68,7 @@ checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
|
||||
|
||||
[[package]]
|
||||
name = "bcachefs-tools"
|
||||
version = "1.12.0"
|
||||
version = "1.25.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bch_bindgen",
|
||||
|
@ -1,9 +1,13 @@
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
default-members = [".", "bch_bindgen"]
|
||||
|
||||
[package]
|
||||
name = "bcachefs-tools"
|
||||
version = "1.20.0"
|
||||
version = "1.25.0"
|
||||
authors = ["Yuxuan Shui <yshuiv7@gmail.com>", "Kayla Firestack <dev@kaylafire.me>", "Kent Overstreet <kent.overstreet@linux.dev>" ]
|
||||
edition = "2021"
|
||||
rust-version = "1.77"
|
||||
rust-version = "1.77.0"
|
||||
|
||||
[[bin]]
|
||||
name = "bcachefs"
|
||||
|
18
INSTALL.md
18
INSTALL.md
@ -39,20 +39,19 @@ Starting from Debian Trixie and Ubuntu 23.10, you will additionally need:
|
||||
apt install -y systemd-dev
|
||||
```
|
||||
|
||||
Fedora: install the "Development tools" group along with:
|
||||
Fedora: install build dependencies either with `dnf builddep bcachefs-tools` or with:
|
||||
```shell
|
||||
dnf install -y libaio-devel libsodium-devel \
|
||||
dnf install -y @c-development libaio-devel libsodium-devel \
|
||||
libblkid-devel libzstd-devel zlib-devel userspace-rcu-devel \
|
||||
lz4-devel libuuid-devel valgrind-devel keyutils-libs-devel \
|
||||
findutils udev systemd-devel llvm-devel
|
||||
findutils systemd-devel clang-devel llvm-devel rust cargo
|
||||
```
|
||||
|
||||
openSUSE: install build dependencies with:
|
||||
```shell
|
||||
zypper install -y libaio-devel libsodium-devel \
|
||||
libblkid-devel liburcu-devel libzstd-devel zlib-devel \
|
||||
liblz4-devel libuuid-devel valgrind-devel keyutils-devel \
|
||||
findutils udev systemd-devel llvm-devel
|
||||
zypper in -y libaio-devel libsodium-devel libblkid-devel liburcu-devel \
|
||||
libzstd-devel zlib-devel liblz4-devel libuuid-devel valgrind-devel \
|
||||
keyutils-devel findutils udev systemd-devel llvm-devel
|
||||
```
|
||||
|
||||
Arch: install bcachefs-tools-git from the AUR.
|
||||
@ -88,6 +87,11 @@ Arch:
|
||||
pacman -S fuse3
|
||||
```
|
||||
|
||||
openSUSE:
|
||||
```shell
|
||||
zypper in -y fuse3-devel
|
||||
```
|
||||
|
||||
Then, make using the `BCACHEFS_FUSE` environment variable (make clean first if
|
||||
previously built without fuse support):
|
||||
|
||||
|
11
Makefile
11
Makefile
@ -1,4 +1,4 @@
|
||||
VERSION=1.20.0
|
||||
VERSION=1.25.0
|
||||
|
||||
PREFIX?=/usr/local
|
||||
LIBEXECDIR?=$(PREFIX)/libexec
|
||||
@ -22,6 +22,13 @@ else
|
||||
CARGO_CLEAN_ARGS = --quiet
|
||||
endif
|
||||
|
||||
# when cross compiling, cargo places the built binary in a different location
|
||||
ifdef CARGO_BUILD_TARGET
|
||||
BUILT_BIN = target/$(CARGO_BUILD_TARGET)/release/bcachefs
|
||||
else
|
||||
BUILT_BIN = target/release/bcachefs
|
||||
endif
|
||||
|
||||
# Prevent recursive expansions of $(CFLAGS) to avoid repeatedly performing
|
||||
# compile tests
|
||||
CFLAGS:=$(CFLAGS)
|
||||
@ -195,7 +202,7 @@ cmd_version.o : .version
|
||||
install: INITRAMFS_HOOK=$(INITRAMFS_DIR)/hooks/bcachefs
|
||||
install: INITRAMFS_SCRIPT=$(INITRAMFS_DIR)/scripts/local-premount/bcachefs
|
||||
install: bcachefs $(optional_install)
|
||||
$(INSTALL) -m0755 -D target/release/bcachefs -t $(DESTDIR)$(ROOT_SBINDIR)
|
||||
$(INSTALL) -m0755 -D $(BUILT_BIN) -t $(DESTDIR)$(ROOT_SBINDIR)
|
||||
$(INSTALL) -m0644 -D bcachefs.8 -t $(DESTDIR)$(PREFIX)/share/man/man8/
|
||||
$(INSTALL) -m0755 -D initramfs/script $(DESTDIR)$(INITRAMFS_SCRIPT)
|
||||
$(INSTALL) -m0755 -D initramfs/hook $(DESTDIR)$(INITRAMFS_HOOK)
|
||||
|
@ -1,3 +1,7 @@
|
||||
use crate::c;
|
||||
use crate::fs::Fs;
|
||||
use std::ffi::{CString, c_char};
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! opt_set {
|
||||
($opts:ident, $n:ident, $v:expr) => {
|
||||
@ -33,3 +37,29 @@ macro_rules! opt_get {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn parse_mount_opts(fs: Option<&mut Fs>, optstr: Option<&str>, ignore_unknown: bool)
|
||||
-> Result<c::bch_opts, c::bch_errcode> {
|
||||
let mut opts: c::bch_opts = Default::default();
|
||||
|
||||
if let Some(optstr) = optstr {
|
||||
let optstr = CString::new(optstr).unwrap();
|
||||
let optstr_ptr = optstr.as_ptr();
|
||||
|
||||
let ret = unsafe {
|
||||
c::bch2_parse_mount_opts(fs.map_or(std::ptr::null_mut(), |f| f.raw),
|
||||
&mut opts as *mut c::bch_opts,
|
||||
std::ptr::null_mut(),
|
||||
optstr_ptr as *mut c_char,
|
||||
ignore_unknown)
|
||||
};
|
||||
|
||||
drop(optstr);
|
||||
|
||||
if ret != 0 {
|
||||
let err: c::bch_errcode = unsafe { std::mem::transmute(-ret) };
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
Ok(opts)
|
||||
}
|
||||
|
@ -33,6 +33,7 @@ void bcachefs_usage(void)
|
||||
"Superblock commands:\n"
|
||||
" format Format a new filesystem\n"
|
||||
" show-super Dump superblock information to stdout\n"
|
||||
" recover-super Attempt to recover overwritten superblock from backups\n"
|
||||
" set-fs-option Set a filesystem option\n"
|
||||
" reset-counters Reset all counters on an unmounted device\n"
|
||||
"\n"
|
||||
@ -92,9 +93,11 @@ void bcachefs_usage(void)
|
||||
" list List filesystem metadata in textual form\n"
|
||||
" list_journal List contents of journal\n"
|
||||
"\n"
|
||||
#ifdef BCACHEFS_FUSE
|
||||
"FUSE:\n"
|
||||
" fusemount Mount a filesystem via FUSE\n"
|
||||
"\n"
|
||||
#endif
|
||||
"Miscellaneous:\n"
|
||||
" completions Generate shell completions\n"
|
||||
" version Display the version of the invoked bcachefs tool\n");
|
||||
@ -115,16 +118,15 @@ int fs_cmds(int argc, char *argv[])
|
||||
{
|
||||
char *cmd = pop_cmd(&argc, argv);
|
||||
|
||||
if (argc < 1) {
|
||||
bcachefs_usage();
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (argc < 1)
|
||||
return fs_usage();
|
||||
if (!strcmp(cmd, "usage"))
|
||||
return cmd_fs_usage(argc, argv);
|
||||
if (!strcmp(cmd, "top"))
|
||||
return cmd_fs_top(argc, argv);
|
||||
|
||||
return 0;
|
||||
fs_usage();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int device_cmds(int argc, char *argv[])
|
||||
@ -150,7 +152,8 @@ int device_cmds(int argc, char *argv[])
|
||||
if (!strcmp(cmd, "resize-journal"))
|
||||
return cmd_device_resize_journal(argc, argv);
|
||||
|
||||
return 0;
|
||||
device_usage();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int data_cmds(int argc, char *argv[])
|
||||
@ -166,5 +169,6 @@ int data_cmds(int argc, char *argv[])
|
||||
if (!strcmp(cmd, "job"))
|
||||
return cmd_data_job(argc, argv);
|
||||
|
||||
return 0;
|
||||
data_usage();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -47,11 +47,11 @@ static void device_add_usage(void)
|
||||
puts("bcachefs device add - add a device to an existing filesystem\n"
|
||||
"Usage: bcachefs device add [OPTION]... filesystem device\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -S, --fs_size=size Size of filesystem on device\n"
|
||||
" -B, --bucket=size Bucket size\n"
|
||||
" -D, --discard Enable discards\n"
|
||||
" -l, --label=label Disk label\n"
|
||||
"Options:\n");
|
||||
|
||||
bch2_opts_usage(OPT_FORMAT|OPT_DEVICE);
|
||||
|
||||
puts(" -l, --label=label Disk label\n"
|
||||
" -f, --force Use device even if it appears to already be formatted\n"
|
||||
" -h, --help Display this help and exit\n"
|
||||
"\n"
|
||||
@ -61,9 +61,6 @@ static void device_add_usage(void)
|
||||
int cmd_device_add(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "fs_size", required_argument, NULL, 'S' },
|
||||
{ "bucket", required_argument, NULL, 'B' },
|
||||
{ "discard", no_argument, NULL, 'D' },
|
||||
{ "label", required_argument, NULL, 'l' },
|
||||
{ "force", no_argument, NULL, 'f' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
@ -72,22 +69,31 @@ int cmd_device_add(int argc, char *argv[])
|
||||
struct format_opts format_opts = format_opts_default();
|
||||
struct dev_opts dev_opts = dev_opts_default();
|
||||
bool force = false;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "S:B:Dl:fh",
|
||||
longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'S':
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.size))
|
||||
die("invalid filesystem size");
|
||||
break;
|
||||
case 'B':
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.bucket_size))
|
||||
die("bad bucket_size %s", optarg);
|
||||
break;
|
||||
case 'D':
|
||||
dev_opts.discard = true;
|
||||
while (true) {
|
||||
const struct bch_option *opt =
|
||||
bch2_cmdline_opt_parse(argc, argv, OPT_FORMAT|OPT_DEVICE);
|
||||
if (opt) {
|
||||
unsigned id = opt - bch2_opt_table;
|
||||
u64 v;
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_opt_parse(NULL, opt, optarg, &v, &err);
|
||||
if (ret)
|
||||
die("invalid %s: %s", opt->attr.name, err.buf);
|
||||
|
||||
if (opt->flags & OPT_DEVICE)
|
||||
bch2_opt_set_by_id(&dev_opts.opts, id, v);
|
||||
else
|
||||
die("got bch_opt of wrong type %s", opt->attr.name);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
int optid = getopt_long(argc, argv, "S:B:Dl:fh", longopts, NULL);
|
||||
if (optid == -1)
|
||||
break;
|
||||
|
||||
switch (optid) {
|
||||
case 'l':
|
||||
dev_opts.label = strdup(optarg);
|
||||
break;
|
||||
@ -97,7 +103,11 @@ int cmd_device_add(int argc, char *argv[])
|
||||
case 'h':
|
||||
device_add_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
case '?':
|
||||
exit(EXIT_FAILURE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
args_shift(optind);
|
||||
|
||||
char *fs_path = arg_pop();
|
||||
@ -127,10 +137,11 @@ int cmd_device_add(int argc, char *argv[])
|
||||
opt_set(fs_opts, btree_node_size,
|
||||
read_file_u64(fs.sysfs_fd, "options/btree_node_size"));
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs,
|
||||
fs_opts,
|
||||
format_opts,
|
||||
&dev_opts, 1);
|
||||
dev_opts_list devs = {};
|
||||
darray_push(&devs, dev_opts);
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs);
|
||||
darray_exit(&devs);
|
||||
free(sb);
|
||||
bchu_disk_add(fs, dev_opts.path);
|
||||
return 0;
|
||||
|
@ -39,11 +39,7 @@ x('L', fs_label, required_argument) \
|
||||
x('U', uuid, required_argument) \
|
||||
x(0, fs_size, required_argument) \
|
||||
x(0, superblock_size, required_argument) \
|
||||
x(0, bucket_size, required_argument) \
|
||||
x('l', label, required_argument) \
|
||||
x(0, discard, no_argument) \
|
||||
x(0, data_allowed, required_argument) \
|
||||
x(0, durability, required_argument) \
|
||||
x(0, version, required_argument) \
|
||||
x(0, no_initialize, no_argument) \
|
||||
x(0, source, required_argument) \
|
||||
@ -52,17 +48,16 @@ x('q', quiet, no_argument) \
|
||||
x('v', verbose, no_argument) \
|
||||
x('h', help, no_argument)
|
||||
|
||||
static void usage(void)
|
||||
static void format_usage(void)
|
||||
{
|
||||
puts("bcachefs format - create a new bcachefs filesystem on one or more devices\n"
|
||||
"Usage: bcachefs format [OPTION]... <devices>\n"
|
||||
"\n"
|
||||
"Options:");
|
||||
|
||||
bch2_opts_usage(OPT_FORMAT);
|
||||
bch2_opts_usage(OPT_FORMAT|OPT_FS);
|
||||
|
||||
puts(
|
||||
" --replicas=# Sets both data and metadata replicas\n"
|
||||
puts(" --replicas=# Sets both data and metadata replicas\n"
|
||||
" --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
|
||||
" --no_passphrase Don't encrypt master encryption key\n"
|
||||
" -L, --fs_label=label\n"
|
||||
@ -72,9 +67,10 @@ static void usage(void)
|
||||
"\n"
|
||||
"Device specific options:");
|
||||
|
||||
bch2_opts_usage(OPT_DEVICE);
|
||||
bch2_opts_usage(OPT_FORMAT|OPT_DEVICE);
|
||||
|
||||
puts(" -l, --label=label Disk label\n"
|
||||
puts(" --fs_size=size Size of filesystem on device\n"
|
||||
" -l, --label=label Disk label\n"
|
||||
"\n"
|
||||
" -f, --force\n"
|
||||
" -q, --quiet Only print errors\n"
|
||||
@ -125,32 +121,60 @@ void build_fs(struct bch_fs *c, const char *src_path)
|
||||
if (!S_ISDIR(stat.st_mode))
|
||||
die("%s is not a directory", src_path);
|
||||
|
||||
copy_fs(c, src_fd, src_path, &s);
|
||||
copy_fs(c, src_fd, src_path, &s, 0);
|
||||
}
|
||||
|
||||
int cmd_format(int argc, char *argv[])
|
||||
{
|
||||
DARRAY(struct dev_opts) devices = { 0 };
|
||||
DARRAY(char *) device_paths = { 0 };
|
||||
dev_opts_list devices = {};
|
||||
darray_str device_paths = {};
|
||||
struct format_opts opts = format_opts_default();
|
||||
struct dev_opts dev_opts = dev_opts_default();
|
||||
bool force = false, no_passphrase = false, quiet = false, initialize = true, verbose = false;
|
||||
bool unconsumed_dev_option = false;
|
||||
unsigned v;
|
||||
int opt;
|
||||
|
||||
struct bch_opt_strs fs_opt_strs =
|
||||
bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
|
||||
struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
|
||||
struct bch_opt_strs fs_opt_strs = {};
|
||||
struct bch_opts fs_opts = bch2_opts_empty();
|
||||
|
||||
if (getenv("BCACHEFS_KERNEL_ONLY"))
|
||||
initialize = false;
|
||||
|
||||
while ((opt = getopt_long(argc, argv,
|
||||
"-L:l:U:g:fqhv",
|
||||
format_opts,
|
||||
NULL)) != -1)
|
||||
switch (opt) {
|
||||
while (true) {
|
||||
const struct bch_option *opt =
|
||||
bch2_cmdline_opt_parse(argc, argv, OPT_FORMAT|OPT_FS|OPT_DEVICE);
|
||||
if (opt) {
|
||||
unsigned id = opt - bch2_opt_table;
|
||||
u64 v;
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_opt_parse(NULL, opt, optarg, &v, &err);
|
||||
if (ret == -BCH_ERR_option_needs_open_fs) {
|
||||
fs_opt_strs.by_id[id] = strdup(optarg);
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
die("invalid option: %s", err.buf);
|
||||
|
||||
if (opt->flags & OPT_DEVICE) {
|
||||
bch2_opt_set_by_id(&dev_opts.opts, id, v);
|
||||
unconsumed_dev_option = true;
|
||||
} else if (opt->flags & OPT_FS) {
|
||||
bch2_opt_set_by_id(&fs_opts, id, v);
|
||||
} else {
|
||||
die("got bch_opt of wrong type %s", opt->attr.name);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
int optid = getopt_long(argc, argv,
|
||||
"-L:l:U:g:fqhv",
|
||||
format_opts,
|
||||
NULL);
|
||||
if (optid == -1)
|
||||
break;
|
||||
|
||||
switch (optid) {
|
||||
case O_replicas:
|
||||
if (kstrtouint(optarg, 10, &v) ||
|
||||
!v ||
|
||||
@ -183,7 +207,7 @@ int cmd_format(int argc, char *argv[])
|
||||
force = true;
|
||||
break;
|
||||
case O_fs_size:
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.size))
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.fs_size))
|
||||
die("invalid filesystem size");
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
@ -193,32 +217,11 @@ int cmd_format(int argc, char *argv[])
|
||||
|
||||
opts.superblock_size >>= 9;
|
||||
break;
|
||||
case O_bucket_size:
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.bucket_size))
|
||||
die("bad bucket_size %s", optarg);
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_label:
|
||||
case 'l':
|
||||
dev_opts.label = optarg;
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_discard:
|
||||
dev_opts.discard = true;
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_data_allowed:
|
||||
dev_opts.data_allowed =
|
||||
read_flag_list_or_die(optarg,
|
||||
__bch2_data_types, "data type");
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_durability:
|
||||
if (kstrtouint(optarg, 10, &dev_opts.durability) ||
|
||||
dev_opts.durability > BCH_REPLICAS_MAX)
|
||||
die("invalid durability");
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_version:
|
||||
opts.version = version_parse(optarg);
|
||||
break;
|
||||
@ -229,7 +232,7 @@ int cmd_format(int argc, char *argv[])
|
||||
darray_push(&device_paths, optarg);
|
||||
dev_opts.path = optarg;
|
||||
darray_push(&devices, dev_opts);
|
||||
dev_opts.size = 0;
|
||||
dev_opts.fs_size = 0;
|
||||
unconsumed_dev_option = false;
|
||||
break;
|
||||
case O_quiet:
|
||||
@ -241,13 +244,16 @@ int cmd_format(int argc, char *argv[])
|
||||
break;
|
||||
case O_help:
|
||||
case 'h':
|
||||
usage();
|
||||
format_usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
break;
|
||||
case '?':
|
||||
exit(EXIT_FAILURE);
|
||||
break;
|
||||
default:
|
||||
die("getopt ret %i %c", optid, optid);
|
||||
}
|
||||
}
|
||||
|
||||
if (unconsumed_dev_option)
|
||||
die("Options for devices apply to subsequent devices; got a device option with no device");
|
||||
@ -269,11 +275,7 @@ int cmd_format(int argc, char *argv[])
|
||||
die("Error opening %s: %s", dev_opts.path, strerror(-ret));
|
||||
}
|
||||
|
||||
struct bch_sb *sb =
|
||||
bch2_format(fs_opt_strs,
|
||||
fs_opts,
|
||||
opts,
|
||||
devices.data, devices.nr);
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, opts, devices);
|
||||
bch2_opt_strs_free(&fs_opt_strs);
|
||||
|
||||
if (!quiet) {
|
||||
@ -433,3 +435,204 @@ int cmd_show_super(int argc, char *argv[])
|
||||
printbuf_exit(&buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "libbcachefs/super-io.h"
|
||||
#include "libbcachefs/sb-members.h"
|
||||
|
||||
typedef DARRAY(struct bch_sb *) probed_sb_list;
|
||||
|
||||
static void probe_one_super(int dev_fd, unsigned sb_size, u64 offset,
|
||||
probed_sb_list *sbs, bool verbose)
|
||||
{
|
||||
darray_char sb_buf = {};
|
||||
darray_resize(&sb_buf, sb_size);
|
||||
|
||||
xpread(dev_fd, sb_buf.data, sb_buf.size, offset);
|
||||
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_sb_validate((void *) sb_buf.data, offset >> 9, 0, &err);
|
||||
printbuf_exit(&err);
|
||||
|
||||
if (!ret) {
|
||||
if (verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_human_readable_u64(&buf, offset);
|
||||
printf("found superblock at %s\n", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
darray_push(sbs, (void *) sb_buf.data);
|
||||
sb_buf.data = NULL;
|
||||
}
|
||||
|
||||
darray_exit(&sb_buf);
|
||||
}
|
||||
|
||||
static void probe_sb_range(int dev_fd, u64 start_offset, u64 end_offset,
|
||||
probed_sb_list *sbs, bool verbose)
|
||||
{
|
||||
start_offset &= ~((u64) 511);
|
||||
end_offset &= ~((u64) 511);
|
||||
|
||||
size_t buflen = end_offset - start_offset;
|
||||
void *buf = malloc(buflen);
|
||||
xpread(dev_fd, buf, buflen, start_offset);
|
||||
|
||||
for (u64 offset = 0; offset < buflen; offset += 512) {
|
||||
struct bch_sb *sb = buf + offset;
|
||||
|
||||
if (!uuid_equal(&sb->magic, &BCACHE_MAGIC) &&
|
||||
!uuid_equal(&sb->magic, &BCHFS_MAGIC))
|
||||
continue;
|
||||
|
||||
size_t bytes = vstruct_bytes(sb);
|
||||
if (offset + bytes > buflen) {
|
||||
fprintf(stderr, "found sb %llu size %zu that overran buffer\n",
|
||||
start_offset + offset, bytes);
|
||||
continue;
|
||||
}
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = bch2_sb_validate(sb, (start_offset + offset) >> 9, 0, &err);
|
||||
if (ret)
|
||||
fprintf(stderr, "found sb %llu that failed to validate: %s\n",
|
||||
start_offset + offset, err.buf);
|
||||
printbuf_exit(&err);
|
||||
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
if (verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_human_readable_u64(&buf, start_offset + offset);
|
||||
printf("found superblock at %s\n", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
void *sb_copy = malloc(bytes);
|
||||
memcpy(sb_copy, sb, bytes);
|
||||
darray_push(sbs, sb_copy);
|
||||
}
|
||||
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static u64 bch2_sb_last_mount_time(struct bch_sb *sb)
|
||||
{
|
||||
u64 ret = 0;
|
||||
for (unsigned i = 0; i < sb->nr_devices; i++)
|
||||
ret = max(ret, le64_to_cpu(bch2_sb_member_get(sb, i).last_mount));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_sb_time_cmp(struct bch_sb *l, struct bch_sb *r)
|
||||
{
|
||||
return cmp_int(bch2_sb_last_mount_time(l),
|
||||
bch2_sb_last_mount_time(r));
|
||||
}
|
||||
|
||||
static void recover_super_usage(void)
|
||||
{
|
||||
puts("bcachefs recover-super \n"
|
||||
"Usage: bcachefs recover-super [OPTION].. device\n"
|
||||
"\n"
|
||||
"Attempt to recover a filesystem on a device that has had the main superblock\n"
|
||||
"and superblock layout overwritten.\n"
|
||||
"All options will be guessed if not provided\n"
|
||||
"\n"
|
||||
"Options:\n"
|
||||
" -d, --dev_size size of filessytem on device, in bytes \n"
|
||||
" -o, --offset offset to probe, in bytes\n"
|
||||
" -y, --yes Recover without prompting\n"
|
||||
" -v, --verbose Increase logging level\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int cmd_recover_super(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "dev_size", 1, NULL, 'd' },
|
||||
{ "offset", 1, NULL, 'o' },
|
||||
{ "yes", 0, NULL, 'y' },
|
||||
{ "verbose", 0, NULL, 'v' },
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ NULL }
|
||||
};
|
||||
u64 dev_size = 0, offset = 0;
|
||||
bool yes = false, verbose = false;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "d:o:yvh", longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'd':
|
||||
if (bch2_strtoull_h(optarg, &dev_size))
|
||||
die("invalid offset");
|
||||
break;
|
||||
case 'o':
|
||||
if (bch2_strtoull_h(optarg, &offset))
|
||||
die("invalid offset");
|
||||
|
||||
if (offset & 511)
|
||||
die("offset must be a multiple of 512");
|
||||
break;
|
||||
case 'y':
|
||||
yes = true;
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
case 'h':
|
||||
recover_super_usage();
|
||||
break;
|
||||
}
|
||||
args_shift(optind);
|
||||
|
||||
char *dev_path = arg_pop();
|
||||
if (!dev_path)
|
||||
die("please supply a device");
|
||||
if (argc)
|
||||
die("too many arguments");
|
||||
|
||||
int dev_fd = xopen(dev_path, O_RDWR);
|
||||
|
||||
if (!dev_size)
|
||||
dev_size = get_size(dev_fd);
|
||||
|
||||
probed_sb_list sbs = {};
|
||||
|
||||
if (offset) {
|
||||
probe_one_super(dev_fd, SUPERBLOCK_SIZE_DEFAULT, offset, &sbs, verbose);
|
||||
} else {
|
||||
unsigned scan_len = 16 << 20; /* 16MB, start and end of device */
|
||||
|
||||
probe_sb_range(dev_fd, 4096, scan_len, &sbs, verbose);
|
||||
probe_sb_range(dev_fd, dev_size - scan_len, dev_size, &sbs, verbose);
|
||||
}
|
||||
|
||||
if (!sbs.nr) {
|
||||
printf("Found no bcachefs superblocks\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
struct bch_sb *best = NULL;
|
||||
darray_for_each(sbs, sb)
|
||||
if (!best || bch2_sb_time_cmp(best, *sb) < 0)
|
||||
best = *sb;
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_sb_to_text(&buf, best, true, BIT_ULL(BCH_SB_FIELD_members_v2));
|
||||
|
||||
printf("Found superblock:\n%s", buf.buf);
|
||||
printf("Recover?");
|
||||
|
||||
if (yes || ask_yn())
|
||||
bch2_super_write(dev_fd, best);
|
||||
|
||||
printbuf_exit(&buf);
|
||||
darray_for_each(sbs, sb)
|
||||
kfree(*sb);
|
||||
darray_exit(&sbs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -237,6 +237,18 @@ static void accounting_sort(darray_accounting_p *sorted,
|
||||
sort(sorted->data, sorted->nr, sizeof(sorted->data[0]), accounting_p_cmp, NULL);
|
||||
}
|
||||
|
||||
static void accounting_swab_if_old(struct bch_ioctl_query_accounting *in)
|
||||
{
|
||||
unsigned kernel_version = bcachefs_kernel_version();
|
||||
|
||||
if (kernel_version &&
|
||||
kernel_version < bcachefs_metadata_version_disk_accounting_big_endian)
|
||||
for (struct bkey_i_accounting *a = in->accounting;
|
||||
a < (struct bkey_i_accounting *) ((u64 *) in->accounting + in->accounting_u64s);
|
||||
a = bkey_i_to_accounting(bkey_next(&a->k_i)))
|
||||
bch2_bpos_swab(&a->k.p);
|
||||
}
|
||||
|
||||
static int fs_usage_v1_to_text(struct printbuf *out,
|
||||
struct bchfs_handle fs,
|
||||
dev_names dev_names)
|
||||
@ -251,6 +263,8 @@ static int fs_usage_v1_to_text(struct printbuf *out,
|
||||
if (!a)
|
||||
return -1;
|
||||
|
||||
accounting_swab_if_old(a);
|
||||
|
||||
darray_accounting_p a_sorted = {};
|
||||
|
||||
accounting_sort(&a_sorted, a);
|
||||
@ -477,6 +491,19 @@ devs:
|
||||
bcache_fs_close(fs);
|
||||
}
|
||||
|
||||
int fs_usage(void)
|
||||
{
|
||||
puts("bcachefs fs - manage a running filesystem\n"
|
||||
"Usage: bcachefs fs <CMD> [OPTIONS]\n"
|
||||
"\n"
|
||||
"Commands:\n"
|
||||
" usage Display detailed filesystem usage\n"
|
||||
" top Show runtime performance information\n"
|
||||
"\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fs_usage_usage(void)
|
||||
{
|
||||
puts("bcachefs fs usage - display detailed filesystem usage\n"
|
||||
|
@ -21,7 +21,6 @@ static void fsck_usage(void)
|
||||
" -y Assume \"yes\" to all questions\n"
|
||||
" -f Force checking even if filesystem is marked clean\n"
|
||||
" -r, --ratelimit_errors Don't display more than 10 errors of a given type\n"
|
||||
" -R, --reconstruct_alloc Reconstruct the alloc btree\n"
|
||||
" -k, --kernel Use the in-kernel fsck implementation\n"
|
||||
" -v Be verbose\n"
|
||||
" -h, --help Display this help and exit\n"
|
||||
@ -117,9 +116,7 @@ static bool should_use_kernel_fsck(darray_str devs)
|
||||
{
|
||||
system("modprobe bcachefs");
|
||||
|
||||
unsigned kernel_version = !access("/sys/module/bcachefs/parameters/version", R_OK)
|
||||
? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
|
||||
: 0;
|
||||
unsigned kernel_version = bcachefs_kernel_version();
|
||||
|
||||
if (!kernel_version)
|
||||
return false;
|
||||
@ -205,7 +202,6 @@ int cmd_fsck(int argc, char *argv[])
|
||||
{
|
||||
static const struct option longopts[] = {
|
||||
{ "ratelimit_errors", no_argument, NULL, 'r' },
|
||||
{ "reconstruct_alloc", no_argument, NULL, 'R' },
|
||||
{ "kernel", no_argument, NULL, 'k' },
|
||||
{ "no-kernel", no_argument, NULL, 'K' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
@ -224,10 +220,13 @@ int cmd_fsck(int argc, char *argv[])
|
||||
append_opt(&opts_str, "read_only");
|
||||
|
||||
while ((opt = getopt_long(argc, argv,
|
||||
"apynfo:rRkKvh",
|
||||
"apynfo:rkKvh",
|
||||
longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'a': /* outdated alias for -p */
|
||||
case 'a':
|
||||
/* "automatic" run, called by the system, for us to do checks as needed.
|
||||
* we don't need checks here: */
|
||||
exit(EXIT_SUCCESS);
|
||||
case 'p':
|
||||
case 'y':
|
||||
append_opt(&opts_str, "fix_errors=yes");
|
||||
@ -245,9 +244,6 @@ int cmd_fsck(int argc, char *argv[])
|
||||
case 'r':
|
||||
append_opt(&opts_str, "ratelimit_errors");
|
||||
break;
|
||||
case 'R':
|
||||
append_opt(&opts_str, "reconstruct_alloc");
|
||||
break;
|
||||
case 'k':
|
||||
kernel = true;
|
||||
break;
|
||||
@ -323,7 +319,7 @@ kernel_fsck_err:
|
||||
} else {
|
||||
userland_fsck:
|
||||
printf("Running userspace offline fsck\n");
|
||||
ret = bch2_parse_mount_opts(NULL, &opts, &parse_later, opts_str.buf);
|
||||
ret = bch2_parse_mount_opts(NULL, &opts, &parse_later, opts_str.buf, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include "libbcachefs/dirent.h"
|
||||
#include "libbcachefs/errcode.h"
|
||||
#include "libbcachefs/error.h"
|
||||
#include "libbcachefs/fs-common.h"
|
||||
#include "libbcachefs/namei.h"
|
||||
#include "libbcachefs/inode.h"
|
||||
#include "libbcachefs/io_read.h"
|
||||
#include "libbcachefs/io_write.h"
|
||||
@ -31,9 +31,6 @@
|
||||
|
||||
#include <linux/dcache.h>
|
||||
|
||||
/* XXX cut and pasted from fsck.c */
|
||||
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
|
||||
|
||||
/* used by write_aligned function for waiting on bch2_write closure */
|
||||
struct write_aligned_op_t {
|
||||
struct closure cl;
|
||||
@ -478,10 +475,9 @@ static int read_aligned(struct bch_fs *c, subvol_inum inum, size_t aligned_size,
|
||||
closure_init_stack(&cl);
|
||||
|
||||
closure_get(&cl);
|
||||
rbio.bio.bi_end_io = bcachefs_fuse_read_endio;
|
||||
rbio.bio.bi_private = &cl;
|
||||
rbio.bio.bi_private = &cl;
|
||||
|
||||
bch2_read(c, rbio_init(&rbio.bio, io_opts), inum);
|
||||
bch2_read(c, rbio_init(&rbio.bio, c, io_opts, bcachefs_fuse_read_endio), inum);
|
||||
|
||||
closure_sync(&cl);
|
||||
|
||||
|
@ -65,9 +65,21 @@ static bool bkey_matches_filter(d_bbpos_range filter, struct jset_entry *entry,
|
||||
struct bbpos k_start = BBPOS(entry->btree_id, bkey_start_pos(&k->k));
|
||||
struct bbpos k_end = BBPOS(entry->btree_id, k->k.p);
|
||||
|
||||
if (bbpos_cmp(k_start, i->start) >= 0 &&
|
||||
bbpos_cmp(k_end, i->end) <= 0)
|
||||
return true;
|
||||
if (!i->start.pos.snapshot &&
|
||||
!i->end.pos.snapshot) {
|
||||
k_start.pos.snapshot = 0;
|
||||
k_end.pos.snapshot = 0;
|
||||
}
|
||||
|
||||
if (!k->k.size) {
|
||||
if (bbpos_cmp(k_start, i->start) >= 0 &&
|
||||
bbpos_cmp(k_end, i->end) <= 0)
|
||||
return true;
|
||||
} else {
|
||||
if (bbpos_cmp(i->start, k_end) <= 0 &&
|
||||
bbpos_cmp(i->end, k_start) >= 0)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -75,9 +87,9 @@ static bool bkey_matches_filter(d_bbpos_range filter, struct jset_entry *entry,
|
||||
static bool entry_matches_transaction_filter(struct jset_entry *entry,
|
||||
d_bbpos_range filter)
|
||||
{
|
||||
if (entry->type == BCH_JSET_ENTRY_btree_root ||
|
||||
entry->type == BCH_JSET_ENTRY_btree_keys ||
|
||||
entry->type == BCH_JSET_ENTRY_overwrite)
|
||||
if (!entry->level &&
|
||||
(entry->type == BCH_JSET_ENTRY_btree_keys ||
|
||||
entry->type == BCH_JSET_ENTRY_overwrite))
|
||||
jset_entry_for_each_key(entry, k)
|
||||
if (bkey_matches_filter(filter, entry, k))
|
||||
return true;
|
||||
@ -90,6 +102,8 @@ static bool should_print_transaction(struct jset_entry *entry, struct jset_entry
|
||||
{
|
||||
struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry);
|
||||
unsigned b = jset_entry_log_msg_bytes(l);
|
||||
bool have_log_messages = false;
|
||||
bool have_non_log_messages = false;
|
||||
|
||||
darray_for_each(msg_filter, i)
|
||||
if (!strncmp(*i, l->d, b))
|
||||
@ -100,11 +114,19 @@ static bool should_print_transaction(struct jset_entry *entry, struct jset_entry
|
||||
|
||||
for (entry = vstruct_next(entry);
|
||||
entry != end && !entry_is_transaction_start(entry);
|
||||
entry = vstruct_next(entry))
|
||||
if (entry_is_log_msg(entry) ||
|
||||
entry_matches_transaction_filter(entry, key_filter))
|
||||
entry = vstruct_next(entry)) {
|
||||
if (entry_matches_transaction_filter(entry, key_filter))
|
||||
return true;
|
||||
|
||||
if (entry_is_log_msg(entry))
|
||||
have_log_messages = true;
|
||||
else
|
||||
have_non_log_messages = true;
|
||||
}
|
||||
|
||||
if (have_log_messages && !have_non_log_messages)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -134,6 +156,7 @@ static void journal_entry_header_to_text(struct printbuf *out,
|
||||
prt_str(out, "blacklisted ");
|
||||
|
||||
prt_printf(out,
|
||||
"\n"
|
||||
"journal entry %llu\n"
|
||||
" version %u\n"
|
||||
" last seq %llu\n"
|
||||
|
@ -159,9 +159,9 @@ static void find_superblock_space(ranges extents,
|
||||
{
|
||||
darray_for_each(extents, i) {
|
||||
u64 start = round_up(max(256ULL << 10, i->start),
|
||||
dev->bucket_size << 9);
|
||||
dev->opts.bucket_size << 9);
|
||||
u64 end = round_down(i->end,
|
||||
dev->bucket_size << 9);
|
||||
dev->opts.bucket_size << 9);
|
||||
|
||||
/* Need space for two superblocks: */
|
||||
if (start + (opts.superblock_size << 9) * 2 <= end) {
|
||||
@ -209,38 +209,43 @@ static int migrate_fs(const char *fs_path,
|
||||
if (!S_ISDIR(stat.st_mode))
|
||||
die("%s is not a directory", fs_path);
|
||||
|
||||
struct dev_opts dev = dev_opts_default();
|
||||
dev_opts_list devs = {};
|
||||
darray_push(&devs, dev_opts_default());
|
||||
|
||||
dev.path = dev_t_to_path(stat.st_dev);
|
||||
dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
|
||||
struct dev_opts *dev = &devs.data[0];
|
||||
|
||||
int ret = PTR_ERR_OR_ZERO(dev.file);
|
||||
dev->path = dev_t_to_path(stat.st_dev);
|
||||
dev->file = bdev_file_open_by_path(dev->path, BLK_OPEN_READ|BLK_OPEN_WRITE, dev, NULL);
|
||||
|
||||
int ret = PTR_ERR_OR_ZERO(dev->file);
|
||||
if (ret < 0)
|
||||
die("Error opening device to format %s: %s", dev.path, strerror(-ret));
|
||||
dev.bdev = file_bdev(dev.file);
|
||||
die("Error opening device to format %s: %s", dev->path, strerror(-ret));
|
||||
dev->bdev = file_bdev(dev->file);
|
||||
|
||||
opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));
|
||||
opt_set(fs_opts, block_size, get_blocksize(dev->bdev->bd_fd));
|
||||
|
||||
char *file_path = mprintf("%s/bcachefs", fs_path);
|
||||
printf("Creating new filesystem on %s in space reserved at %s\n",
|
||||
dev.path, file_path);
|
||||
dev->path, file_path);
|
||||
|
||||
dev.size = get_size(dev.bdev->bd_fd);
|
||||
dev.bucket_size = bch2_pick_bucket_size(fs_opts, &dev);
|
||||
dev.nbuckets = dev.size / dev.bucket_size;
|
||||
dev->fs_size = get_size(dev->bdev->bd_fd);
|
||||
opt_set(dev->opts, bucket_size, bch2_pick_bucket_size(fs_opts, devs));
|
||||
|
||||
bch2_check_bucket_size(fs_opts, &dev);
|
||||
dev->nbuckets = dev->fs_size / dev->opts.bucket_size;
|
||||
|
||||
bch2_check_bucket_size(fs_opts, dev);
|
||||
|
||||
u64 bcachefs_inum;
|
||||
ranges extents = reserve_new_fs_space(file_path,
|
||||
fs_opts.block_size >> 9,
|
||||
get_size(dev.bdev->bd_fd) / 5,
|
||||
get_size(dev->bdev->bd_fd) / 5,
|
||||
&bcachefs_inum, stat.st_dev, force);
|
||||
|
||||
find_superblock_space(extents, format_opts, &dev);
|
||||
find_superblock_space(extents, format_opts, dev);
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs);
|
||||
darray_exit(&devs);
|
||||
|
||||
struct bch_sb *sb = bch2_format(fs_opt_strs,
|
||||
fs_opts, format_opts, &dev, 1);
|
||||
u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
|
||||
|
||||
if (format_opts.passphrase)
|
||||
@ -248,16 +253,14 @@ static int migrate_fs(const char *fs_path,
|
||||
|
||||
free(sb);
|
||||
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_fs *c = NULL;
|
||||
char *path[1] = { dev.path };
|
||||
char *path[1] = { dev->path };
|
||||
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
opt_set(opts, sb, sb_offset);
|
||||
opt_set(opts, nostart, true);
|
||||
opt_set(opts, noexcl, true);
|
||||
opt_set(opts, nostart, true);
|
||||
|
||||
c = bch2_fs_open(path, 1, opts);
|
||||
struct bch_fs *c = bch2_fs_open(path, 1, opts);
|
||||
if (IS_ERR(c))
|
||||
die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
|
||||
|
||||
@ -265,10 +268,6 @@ static int migrate_fs(const char *fs_path,
|
||||
if (ret)
|
||||
die("Error allocating buckets_nouse: %s", bch2_err_str(ret));
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
if (IS_ERR(c))
|
||||
die("Error starting new filesystem: %s", bch2_err_str(ret));
|
||||
|
||||
mark_unreserved_space(c, extents);
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
@ -282,7 +281,10 @@ static int migrate_fs(const char *fs_path,
|
||||
.type = BCH_MIGRATE_migrate,
|
||||
};
|
||||
|
||||
copy_fs(c, fs_fd, fs_path, &s);
|
||||
u64 reserve_start = round_up((format_opts.superblock_size * 2 + 8) << 9,
|
||||
dev->opts.bucket_size);
|
||||
|
||||
copy_fs(c, fs_fd, fs_path, &s, reserve_start);
|
||||
|
||||
bch2_fs_stop(c);
|
||||
|
||||
@ -310,7 +312,7 @@ static int migrate_fs(const char *fs_path,
|
||||
"filesystem. That file can be deleted once the old filesystem is\n"
|
||||
"no longer needed (and should be deleted prior to running\n"
|
||||
"bcachefs migrate-superblock)\n",
|
||||
sb_offset, dev.path, dev.path, sb_offset);
|
||||
sb_offset, dev->path, dev->path, sb_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -374,7 +376,7 @@ static void migrate_superblock_usage(void)
|
||||
int cmd_migrate_superblock(int argc, char *argv[])
|
||||
{
|
||||
char *dev = NULL;
|
||||
u64 offset = 0;
|
||||
u64 sb_offset = 0;
|
||||
int opt, ret;
|
||||
|
||||
while ((opt = getopt(argc, argv, "d:o:h")) != -1)
|
||||
@ -383,7 +385,7 @@ int cmd_migrate_superblock(int argc, char *argv[])
|
||||
dev = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
ret = kstrtou64(optarg, 10, &offset);
|
||||
ret = kstrtou64(optarg, 10, &sb_offset);
|
||||
if (ret)
|
||||
die("Invalid offset");
|
||||
break;
|
||||
@ -395,29 +397,72 @@ int cmd_migrate_superblock(int argc, char *argv[])
|
||||
if (!dev)
|
||||
die("Please specify a device");
|
||||
|
||||
if (!offset)
|
||||
if (!sb_offset)
|
||||
die("Please specify offset of existing superblock");
|
||||
|
||||
int fd = xopen(dev, O_RDWR);
|
||||
struct bch_sb *sb = __bch2_super_read(fd, offset);
|
||||
struct bch_sb *sb = __bch2_super_read(fd, sb_offset);
|
||||
unsigned sb_size = 1U << sb->layout.sb_max_size_bits;
|
||||
|
||||
if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
|
||||
die("Can't add superblock: no space left in superblock layout");
|
||||
|
||||
unsigned i;
|
||||
for (i = 0; i < sb->layout.nr_superblocks; i++)
|
||||
if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
|
||||
die("Superblock layout already has default superblock");
|
||||
for (unsigned i = 0; i < sb->layout.nr_superblocks; i++)
|
||||
if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR ||
|
||||
le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR + sb_size)
|
||||
die("Superblock layout already has default superblocks");
|
||||
|
||||
memmove(&sb->layout.sb_offset[1],
|
||||
memmove(&sb->layout.sb_offset[2],
|
||||
&sb->layout.sb_offset[0],
|
||||
sb->layout.nr_superblocks * sizeof(u64));
|
||||
sb->layout.nr_superblocks++;
|
||||
|
||||
sb->layout.nr_superblocks += 2;
|
||||
sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
|
||||
sb->layout.sb_offset[1] = cpu_to_le64(BCH_SB_SECTOR + sb_size);
|
||||
|
||||
/* also write first 0-3.5k bytes with zeroes, ensure we blow away old
|
||||
* superblock */
|
||||
static const char zeroes[BCH_SB_SECTOR << 9];
|
||||
xpwrite(fd, zeroes, BCH_SB_SECTOR << 9, 0, "zeroing start of disk");
|
||||
|
||||
bch2_super_write(fd, sb);
|
||||
close(fd);
|
||||
|
||||
/* mark new superblocks */
|
||||
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
opt_set(opts, nostart, true);
|
||||
opt_set(opts, sb, sb_offset);
|
||||
|
||||
struct bch_fs *c = bch2_fs_open(&dev, 1, opts);
|
||||
ret = PTR_ERR_OR_ZERO(c) ?:
|
||||
bch2_buckets_nouse_alloc(c);
|
||||
if (ret)
|
||||
die("error opening filesystem: %s", bch2_err_str(ret));
|
||||
|
||||
struct bch_dev *ca = c->devs[0];
|
||||
for (u64 b = 0; bucket_to_sector(ca, b) < BCH_SB_SECTOR + sb_size * 2; b++)
|
||||
set_bit(b, ca->buckets_nouse);
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
if (ret)
|
||||
die("Error starting filesystem: %s", bch2_err_str(ret));
|
||||
|
||||
bch2_fs_stop(c);
|
||||
|
||||
opts = bch2_opts_empty();
|
||||
opt_set(opts, fsck, true);
|
||||
opt_set(opts, fix_errors, true);
|
||||
|
||||
/*
|
||||
* Hack: the free space counters are coming out wrong after marking the
|
||||
* new superblock, but it's just the device counters so it's
|
||||
* inconsequential:
|
||||
*/
|
||||
|
||||
c = bch2_fs_open(&dev, 1, opts);
|
||||
ret = PTR_ERR_OR_ZERO(c);
|
||||
if (ret)
|
||||
die("error opening filesystem: %s", bch2_err_str(ret));
|
||||
bch2_fs_stop(c);
|
||||
return 0;
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ static void set_option_usage(void)
|
||||
"Usage: bcachefs set-fs-option [OPTION].. device\n"
|
||||
"\n"
|
||||
"Options:\n");
|
||||
bch2_opts_usage(OPT_MOUNT);
|
||||
bch2_opts_usage(OPT_MOUNT|OPT_RUNTIME);
|
||||
puts(" -d, --dev-idx index for device specific options\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
"Report bugs to <linux-bcachefs@vger.kernel.org>");
|
||||
@ -111,16 +111,16 @@ int cmd_set_option(int argc, char *argv[])
|
||||
if (!bch2_opt_defined_by_id(&new_opts, i))
|
||||
continue;
|
||||
|
||||
ret = bch2_opt_check_may_set(c, i, v);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(opt->flags & (OPT_FS|OPT_DEVICE)))
|
||||
fprintf(stderr, "Can't set option %s\n", opt->attr.name);
|
||||
|
||||
if (opt->flags & OPT_FS) {
|
||||
ret = bch2_opt_check_may_set(c, NULL, i, v);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
|
||||
continue;
|
||||
}
|
||||
|
||||
bch2_opt_set_sb(c, NULL, opt, v);
|
||||
}
|
||||
|
||||
@ -133,6 +133,12 @@ int cmd_set_option(int argc, char *argv[])
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = bch2_opt_check_may_set(c, ca, i, v);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
|
||||
continue;
|
||||
}
|
||||
|
||||
bch2_opt_set_sb(c, ca, opt, v);
|
||||
bch2_dev_put(ca);
|
||||
}
|
||||
|
@ -11,9 +11,11 @@
|
||||
|
||||
int cmd_format(int argc, char *argv[]);
|
||||
int cmd_show_super(int argc, char *argv[]);
|
||||
int cmd_recover_super(int argc, char *argv[]);
|
||||
int cmd_reset_counters(int argc, char *argv[]);
|
||||
int cmd_set_option(int argc, char *argv[]);
|
||||
|
||||
int fs_usage(void);
|
||||
int cmd_fs_usage(int argc, char *argv[]);
|
||||
int cmd_fs_top(int argc, char *argv[]);
|
||||
|
||||
|
@ -16,6 +16,8 @@
|
||||
|
||||
#include <uuid/uuid.h>
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include "libbcachefs.h"
|
||||
#include "crypto.h"
|
||||
#include "libbcachefs/bcachefs_format.h"
|
||||
@ -31,10 +33,10 @@
|
||||
|
||||
#define NSEC_PER_SEC 1000000000L
|
||||
|
||||
static void init_layout(struct bch_sb_layout *l,
|
||||
unsigned block_size,
|
||||
unsigned sb_size,
|
||||
u64 sb_start, u64 sb_end)
|
||||
void bch2_sb_layout_init(struct bch_sb_layout *l,
|
||||
unsigned block_size,
|
||||
unsigned sb_size,
|
||||
u64 sb_start, u64 sb_end)
|
||||
{
|
||||
u64 sb_pos = sb_start;
|
||||
unsigned i;
|
||||
@ -60,83 +62,86 @@ static void init_layout(struct bch_sb_layout *l,
|
||||
sb_start, sb_pos, sb_end, sb_size);
|
||||
}
|
||||
|
||||
/* minimum size filesystem we can create, given a bucket size: */
|
||||
static u64 min_size(unsigned bucket_size)
|
||||
static u64 dev_max_bucket_size(u64 dev_size)
|
||||
{
|
||||
return BCH_MIN_NR_NBUCKETS * bucket_size;
|
||||
return rounddown_pow_of_two(dev_size / (BCH_MIN_NR_NBUCKETS * 4));
|
||||
}
|
||||
|
||||
u64 bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev)
|
||||
u64 bch2_pick_bucket_size(struct bch_opts opts, dev_opts_list devs)
|
||||
{
|
||||
u64 bucket_size;
|
||||
|
||||
if (dev->size < min_size(opts.block_size))
|
||||
die("cannot format %s, too small (%llu bytes, min %llu)",
|
||||
dev->path, dev->size, min_size(opts.block_size));
|
||||
|
||||
/* Bucket size must be >= block size: */
|
||||
bucket_size = opts.block_size;
|
||||
u64 bucket_size = opts.block_size;
|
||||
|
||||
/* Bucket size must be >= btree node size: */
|
||||
if (opt_defined(opts, btree_node_size))
|
||||
bucket_size = max_t(unsigned, bucket_size,
|
||||
opts.btree_node_size);
|
||||
bucket_size = max_t(u64, bucket_size, opts.btree_node_size);
|
||||
|
||||
/* Want a bucket size of at least 128k, if possible: */
|
||||
bucket_size = max(bucket_size, 128ULL << 10);
|
||||
u64 min_dev_size = BCH_MIN_NR_NBUCKETS * bucket_size;
|
||||
darray_for_each(devs, i)
|
||||
if (i->fs_size < min_dev_size)
|
||||
die("cannot format %s, too small (%llu bytes, min %llu)",
|
||||
i->path, i->fs_size, min_dev_size);
|
||||
|
||||
if (dev->size >= min_size(bucket_size)) {
|
||||
unsigned scale = max(1,
|
||||
ilog2(dev->size / min_size(bucket_size)) / 4);
|
||||
u64 total_fs_size = 0;
|
||||
darray_for_each(devs, i)
|
||||
total_fs_size += i->fs_size;
|
||||
|
||||
scale = rounddown_pow_of_two(scale);
|
||||
struct sysinfo info;
|
||||
si_meminfo(&info);
|
||||
|
||||
/* max bucket size 1 mb */
|
||||
bucket_size = min(bucket_size * scale, 1ULL << 20);
|
||||
} else {
|
||||
do {
|
||||
bucket_size /= 2;
|
||||
} while (dev->size < min_size(bucket_size));
|
||||
}
|
||||
/*
|
||||
* Large fudge factor to allow for other fsck processes and devices
|
||||
* being added after creation
|
||||
*/
|
||||
u64 mem_available_for_fsck = info.totalram / 8;
|
||||
u64 buckets_can_fsck = mem_available_for_fsck / (sizeof(struct bucket) * 1.5);
|
||||
u64 mem_lower_bound = roundup_pow_of_two(total_fs_size / buckets_can_fsck);
|
||||
|
||||
/*
|
||||
* Lower bound to avoid fragmenting encoded (checksummed, compressed)
|
||||
* extents too much as they're moved:
|
||||
*/
|
||||
bucket_size = max(bucket_size, opt_get(opts, encoded_extent_max) * 4);
|
||||
|
||||
/* Lower bound to ensure we can fsck: */
|
||||
bucket_size = max(bucket_size, mem_lower_bound);
|
||||
|
||||
u64 perf_lower_bound = min(2ULL << 20, total_fs_size / (1ULL << 20));
|
||||
|
||||
/* We also prefer larger buckets for performance, up to 2MB at 2T */
|
||||
bucket_size = max(bucket_size, perf_lower_bound);
|
||||
|
||||
return bucket_size;
|
||||
}
|
||||
|
||||
void bch2_check_bucket_size(struct bch_opts opts, struct dev_opts *dev)
|
||||
{
|
||||
if (dev->bucket_size < opts.block_size)
|
||||
die("Bucket size (%llu) cannot be smaller than block size (%u)",
|
||||
dev->bucket_size, opts.block_size);
|
||||
if (dev->opts.bucket_size < opts.block_size)
|
||||
die("Bucket size (%u) cannot be smaller than block size (%u)",
|
||||
dev->opts.bucket_size, opts.block_size);
|
||||
|
||||
if (opt_defined(opts, btree_node_size) &&
|
||||
dev->bucket_size < opts.btree_node_size)
|
||||
die("Bucket size (%llu) cannot be smaller than btree node size (%u)",
|
||||
dev->bucket_size, opts.btree_node_size);
|
||||
dev->opts.bucket_size < opts.btree_node_size)
|
||||
die("Bucket size (%u) cannot be smaller than btree node size (%u)",
|
||||
dev->opts.bucket_size, opts.btree_node_size);
|
||||
|
||||
if (dev->nbuckets < BCH_MIN_NR_NBUCKETS)
|
||||
die("Not enough buckets: %llu, need %u (bucket size %llu)",
|
||||
dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->bucket_size);
|
||||
|
||||
if (dev->bucket_size > (u32) U16_MAX << 9)
|
||||
die("Bucket size (%llu) too big (max %u)",
|
||||
dev->bucket_size, (u32) U16_MAX << 9);
|
||||
die("Not enough buckets: %llu, need %u (bucket size %u)",
|
||||
dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->opts.bucket_size);
|
||||
}
|
||||
|
||||
static unsigned parse_target(struct bch_sb_handle *sb,
|
||||
struct dev_opts *devs, size_t nr_devs,
|
||||
dev_opts_list devs,
|
||||
const char *s)
|
||||
{
|
||||
struct dev_opts *i;
|
||||
int idx;
|
||||
|
||||
if (!s)
|
||||
return 0;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
darray_for_each(devs, i)
|
||||
if (!strcmp(s, i->path))
|
||||
return dev_to_target(i - devs);
|
||||
return dev_to_target(i - devs.data);
|
||||
|
||||
idx = bch2_disk_path_find(sb, s);
|
||||
int idx = bch2_disk_path_find(sb, s);
|
||||
if (idx >= 0)
|
||||
return group_to_target(idx);
|
||||
|
||||
@ -144,56 +149,61 @@ static unsigned parse_target(struct bch_sb_handle *sb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bch2_opt_set_sb_all(struct bch_sb *sb, int dev_idx, struct bch_opts *opts)
|
||||
{
|
||||
for (unsigned id = 0; id < bch2_opts_nr; id++) {
|
||||
u64 v = bch2_opt_defined_by_id(opts, id)
|
||||
? bch2_opt_get_by_id(opts, id)
|
||||
: bch2_opt_get_by_id(&bch2_opts_default, id);
|
||||
|
||||
__bch2_opt_set_sb(sb, dev_idx, &bch2_opt_table[id], v);
|
||||
}
|
||||
}
|
||||
|
||||
struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
struct bch_opts fs_opts,
|
||||
struct format_opts opts,
|
||||
struct dev_opts *devs,
|
||||
size_t nr_devs)
|
||||
dev_opts_list devs)
|
||||
{
|
||||
struct bch_sb_handle sb = { NULL };
|
||||
struct dev_opts *i;
|
||||
unsigned max_dev_block_size = 0;
|
||||
unsigned opt_id;
|
||||
u64 min_bucket_size = U64_MAX;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
darray_for_each(devs, i)
|
||||
max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd));
|
||||
|
||||
/* calculate block size: */
|
||||
if (!opt_defined(fs_opts, block_size)) {
|
||||
if (!opt_defined(fs_opts, block_size))
|
||||
opt_set(fs_opts, block_size, max_dev_block_size);
|
||||
} else if (fs_opts.block_size < max_dev_block_size)
|
||||
|
||||
if (fs_opts.block_size < max_dev_block_size)
|
||||
die("blocksize too small: %u, must be greater than device blocksize %u",
|
||||
fs_opts.block_size, max_dev_block_size);
|
||||
|
||||
/* get device size, if it wasn't specified: */
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
if (!i->size)
|
||||
i->size = get_size(i->bdev->bd_fd);
|
||||
darray_for_each(devs, i)
|
||||
if (!i->fs_size)
|
||||
i->fs_size = get_size(i->bdev->bd_fd);
|
||||
|
||||
/* calculate bucket sizes: */
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
min_bucket_size = min(min_bucket_size,
|
||||
i->bucket_size ?: bch2_pick_bucket_size(fs_opts, i));
|
||||
u64 fs_bucket_size = bch2_pick_bucket_size(fs_opts, devs);
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
if (!i->bucket_size)
|
||||
i->bucket_size = min_bucket_size;
|
||||
darray_for_each(devs, i)
|
||||
if (!opt_defined(i->opts, bucket_size))
|
||||
opt_set(i->opts, bucket_size,
|
||||
min(fs_bucket_size, dev_max_bucket_size(i->fs_size)));
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
i->nbuckets = i->size / i->bucket_size;
|
||||
darray_for_each(devs, i) {
|
||||
i->nbuckets = i->fs_size / i->opts.bucket_size;
|
||||
bch2_check_bucket_size(fs_opts, i);
|
||||
}
|
||||
|
||||
/* calculate btree node size: */
|
||||
if (!opt_defined(fs_opts, btree_node_size)) {
|
||||
/* 256k default btree node size */
|
||||
opt_set(fs_opts, btree_node_size, 256 << 10);
|
||||
unsigned s = bch2_opts_default.btree_node_size;
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++)
|
||||
fs_opts.btree_node_size =
|
||||
min_t(unsigned, fs_opts.btree_node_size,
|
||||
i->bucket_size);
|
||||
darray_for_each(devs, i)
|
||||
s = min(s, i->opts.bucket_size);
|
||||
opt_set(fs_opts, btree_node_size, s);
|
||||
}
|
||||
|
||||
if (uuid_is_null(opts.uuid.b))
|
||||
@ -206,7 +216,8 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
sb.sb->version_min = le16_to_cpu(opts.version);
|
||||
sb.sb->magic = BCHFS_MAGIC;
|
||||
sb.sb->user_uuid = opts.uuid;
|
||||
sb.sb->nr_devices = nr_devs;
|
||||
sb.sb->nr_devices = devs.nr;
|
||||
SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb.sb, opts.version);
|
||||
|
||||
if (opts.version == bcachefs_metadata_version_current)
|
||||
sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
|
||||
@ -218,17 +229,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
opts.label,
|
||||
min(strlen(opts.label), sizeof(sb.sb->label)));
|
||||
|
||||
for (opt_id = 0;
|
||||
opt_id < bch2_opts_nr;
|
||||
opt_id++) {
|
||||
u64 v;
|
||||
|
||||
v = bch2_opt_defined_by_id(&fs_opts, opt_id)
|
||||
? bch2_opt_get_by_id(&fs_opts, opt_id)
|
||||
: bch2_opt_get_by_id(&bch2_opts_default, opt_id);
|
||||
|
||||
__bch2_opt_set_sb(sb.sb, -1, &bch2_opt_table[opt_id], v);
|
||||
}
|
||||
bch2_opt_set_sb_all(sb.sb, -1, &fs_opts);
|
||||
|
||||
struct timespec now;
|
||||
if (clock_gettime(CLOCK_REALTIME, &now))
|
||||
@ -240,31 +241,25 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
/* Member info: */
|
||||
struct bch_sb_field_members_v2 *mi =
|
||||
bch2_sb_field_resize(&sb, members_v2,
|
||||
(sizeof(*mi) + sizeof(struct bch_member) *
|
||||
nr_devs) / sizeof(u64));
|
||||
(sizeof(*mi) + sizeof(struct bch_member) * devs.nr) / sizeof(u64));
|
||||
|
||||
mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
struct bch_member *m = bch2_members_v2_get_mut(sb.sb, (i - devs));
|
||||
darray_for_each(devs, i) {
|
||||
unsigned idx = i - devs.data;
|
||||
struct bch_member *m = bch2_members_v2_get_mut(sb.sb, idx);
|
||||
|
||||
uuid_generate(m->uuid.b);
|
||||
m->nbuckets = cpu_to_le64(i->nbuckets);
|
||||
m->first_bucket = 0;
|
||||
m->bucket_size = cpu_to_le16(i->bucket_size >> 9);
|
||||
|
||||
SET_BCH_MEMBER_DISCARD(m, i->discard);
|
||||
SET_BCH_MEMBER_DATA_ALLOWED(m, i->data_allowed);
|
||||
SET_BCH_MEMBER_DURABILITY(m, i->durability + 1);
|
||||
bch2_opt_set_sb_all(sb.sb, idx, &i->opts);
|
||||
}
|
||||
|
||||
/* Disk labels*/
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
struct bch_member *m;
|
||||
int idx;
|
||||
|
||||
darray_for_each(devs, i) {
|
||||
if (!i->label)
|
||||
continue;
|
||||
|
||||
idx = bch2_disk_path_find_or_create(&sb, i->label);
|
||||
int idx = bch2_disk_path_find_or_create(&sb, i->label);
|
||||
if (idx < 0)
|
||||
die("error creating disk path: %s", strerror(-idx));
|
||||
|
||||
@ -272,18 +267,18 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
* Recompute mi and m after each sb modification: its location
|
||||
* in memory may have changed due to reallocation.
|
||||
*/
|
||||
m = bch2_members_v2_get_mut(sb.sb, (i - devs));
|
||||
struct bch_member *m = bch2_members_v2_get_mut(sb.sb, (i - devs.data));
|
||||
SET_BCH_MEMBER_GROUP(m, idx + 1);
|
||||
}
|
||||
|
||||
SET_BCH_SB_FOREGROUND_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.foreground_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.foreground_target));
|
||||
SET_BCH_SB_BACKGROUND_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.background_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.background_target));
|
||||
SET_BCH_SB_PROMOTE_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.promote_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.promote_target));
|
||||
SET_BCH_SB_METADATA_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, fs_opt_strs.metadata_target));
|
||||
parse_target(&sb, devs, fs_opt_strs.metadata_target));
|
||||
|
||||
/* Crypt: */
|
||||
if (opts.encrypted) {
|
||||
@ -296,19 +291,19 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
|
||||
bch2_sb_members_cpy_v2_v1(&sb);
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
u64 size_sectors = i->size >> 9;
|
||||
darray_for_each(devs, i) {
|
||||
u64 size_sectors = i->fs_size >> 9;
|
||||
|
||||
sb.sb->dev_idx = i - devs;
|
||||
sb.sb->dev_idx = i - devs.data;
|
||||
|
||||
if (!i->sb_offset) {
|
||||
i->sb_offset = BCH_SB_SECTOR;
|
||||
i->sb_end = size_sectors;
|
||||
}
|
||||
|
||||
init_layout(&sb.sb->layout, fs_opts.block_size,
|
||||
opts.superblock_size,
|
||||
i->sb_offset, i->sb_end);
|
||||
bch2_sb_layout_init(&sb.sb->layout, fs_opts.block_size,
|
||||
opts.superblock_size,
|
||||
i->sb_offset, i->sb_end);
|
||||
|
||||
/*
|
||||
* Also create a backup superblock at the end of the disk:
|
||||
@ -321,7 +316,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
struct bch_sb_layout *l = &sb.sb->layout;
|
||||
u64 backup_sb = size_sectors - (1 << l->sb_max_size_bits);
|
||||
|
||||
backup_sb = rounddown(backup_sb, i->bucket_size >> 9);
|
||||
backup_sb = rounddown(backup_sb, i->opts.bucket_size >> 9);
|
||||
l->sb_offset[l->nr_superblocks++] = cpu_to_le64(backup_sb);
|
||||
}
|
||||
|
||||
@ -352,9 +347,9 @@ void bch2_super_write(int fd, struct bch_sb *sb)
|
||||
if (sb->offset == BCH_SB_SECTOR) {
|
||||
/* Write backup layout */
|
||||
|
||||
BUG_ON(bs > 4096);
|
||||
unsigned buflen = max(bs, 4096);
|
||||
|
||||
char *buf = aligned_alloc(bs, bs);
|
||||
char *buf = aligned_alloc(buflen, buflen);
|
||||
xpread(fd, buf, bs, 4096 - bs);
|
||||
memcpy(buf + bs - sizeof(sb->layout),
|
||||
&sb->layout,
|
||||
@ -618,6 +613,8 @@ int bchu_data(struct bchfs_handle fs, struct bch_ioctl_data cmd)
|
||||
|
||||
/* option parsing */
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
void bch2_opt_strs_free(struct bch_opt_strs *opts)
|
||||
{
|
||||
unsigned i;
|
||||
@ -628,6 +625,64 @@ void bch2_opt_strs_free(struct bch_opt_strs *opts)
|
||||
}
|
||||
}
|
||||
|
||||
static bool opt_type_filter(const struct bch_option *opt, unsigned opt_types)
|
||||
{
|
||||
if (!(opt->flags & opt_types))
|
||||
return false;
|
||||
|
||||
if ((opt_types & OPT_FORMAT) &&
|
||||
!opt->set_sb && !opt->set_member)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const struct bch_option *bch2_cmdline_opt_parse(int argc, char *argv[],
|
||||
unsigned opt_types)
|
||||
{
|
||||
if (optind >= argc)
|
||||
return NULL;
|
||||
|
||||
if (argv[optind][0] != '-' ||
|
||||
argv[optind][1] != '-')
|
||||
return NULL;
|
||||
|
||||
char *optstr = strdup(argv[optind] + 2);
|
||||
optarg = argv[optind + 1];
|
||||
|
||||
char *eq = strchr(optstr, '=');
|
||||
if (eq) {
|
||||
*eq = '\0';
|
||||
optarg = eq + 1;
|
||||
}
|
||||
|
||||
if (!optarg)
|
||||
optarg = "1";
|
||||
|
||||
|
||||
int optid = bch2_opt_lookup(optstr);
|
||||
if (optid < 0)
|
||||
goto noopt;
|
||||
|
||||
const struct bch_option *opt = bch2_opt_table + optid;
|
||||
if (!opt_type_filter(opt, opt_types))
|
||||
goto noopt;
|
||||
|
||||
optind++;
|
||||
|
||||
if (opt->type != BCH_OPT_BOOL) {
|
||||
if (optarg == argv[optind])
|
||||
optind++;
|
||||
} else {
|
||||
optarg = NULL;
|
||||
}
|
||||
|
||||
return opt;
|
||||
noopt:
|
||||
free(optstr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct bch_opt_strs bch2_cmdline_opts_get(int *argc, char *argv[],
|
||||
unsigned opt_types)
|
||||
{
|
||||
@ -715,19 +770,17 @@ struct bch_opts bch2_parse_opts(struct bch_opt_strs strs)
|
||||
#define newline(c) \
|
||||
do { \
|
||||
printf("\n"); \
|
||||
c = 0; \
|
||||
c = 0; \
|
||||
} while(0)
|
||||
void bch2_opts_usage(unsigned opt_types)
|
||||
{
|
||||
const struct bch_option *opt;
|
||||
unsigned i, c = 0, helpcol = 30;
|
||||
|
||||
|
||||
|
||||
for (opt = bch2_opt_table;
|
||||
opt < bch2_opt_table + bch2_opts_nr;
|
||||
opt++) {
|
||||
if (!(opt->flags & opt_types))
|
||||
if (!opt_type_filter(opt, opt_types))
|
||||
continue;
|
||||
|
||||
c += printf(" --%s", opt->attr.name);
|
||||
|
@ -28,6 +28,9 @@ struct {
|
||||
};
|
||||
|
||||
void bch2_opt_strs_free(struct bch_opt_strs *);
|
||||
|
||||
const struct bch_option *bch2_cmdline_opt_parse(int argc, char *argv[],
|
||||
unsigned opt_types);
|
||||
struct bch_opt_strs bch2_cmdline_opts_get(int *, char *[], unsigned);
|
||||
struct bch_opts bch2_parse_opts(struct bch_opt_strs);
|
||||
void bch2_opts_usage(unsigned);
|
||||
@ -42,14 +45,18 @@ struct format_opts {
|
||||
char *source;
|
||||
};
|
||||
|
||||
static inline unsigned bcachefs_kernel_version(void)
|
||||
{
|
||||
return !access("/sys/module/bcachefs/parameters/version", R_OK)
|
||||
? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
|
||||
: 0;
|
||||
}
|
||||
|
||||
static inline struct format_opts format_opts_default()
|
||||
{
|
||||
unsigned version = !access( "/sys/module/bcachefs/parameters/version", R_OK)
|
||||
? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
|
||||
: bcachefs_metadata_version_current;
|
||||
|
||||
return (struct format_opts) {
|
||||
.version = version,
|
||||
.version = bcachefs_kernel_version() ?:
|
||||
bcachefs_metadata_version_current,
|
||||
.superblock_size = SUPERBLOCK_SIZE_DEFAULT,
|
||||
};
|
||||
}
|
||||
@ -58,33 +65,35 @@ struct dev_opts {
|
||||
struct file *file;
|
||||
struct block_device *bdev;
|
||||
char *path;
|
||||
u64 size; /* bytes*/
|
||||
u64 bucket_size; /* bytes */
|
||||
const char *label;
|
||||
unsigned data_allowed;
|
||||
unsigned durability;
|
||||
bool discard;
|
||||
|
||||
u64 nbuckets;
|
||||
|
||||
u64 sb_offset;
|
||||
u64 sb_end;
|
||||
|
||||
u64 nbuckets;
|
||||
u64 fs_size;
|
||||
|
||||
const char *label; /* make this a bch_opt */
|
||||
|
||||
struct bch_opts opts;
|
||||
};
|
||||
|
||||
typedef DARRAY(struct dev_opts) dev_opts_list;
|
||||
|
||||
static inline struct dev_opts dev_opts_default()
|
||||
{
|
||||
return (struct dev_opts) {
|
||||
.data_allowed = ~0U << 2,
|
||||
.durability = 1,
|
||||
};
|
||||
return (struct dev_opts) { .opts = bch2_opts_empty() };
|
||||
}
|
||||
|
||||
u64 bch2_pick_bucket_size(struct bch_opts, struct dev_opts *);
|
||||
void bch2_sb_layout_init(struct bch_sb_layout *,
|
||||
unsigned, unsigned, u64, u64);
|
||||
|
||||
u64 bch2_pick_bucket_size(struct bch_opts, dev_opts_list);
|
||||
void bch2_check_bucket_size(struct bch_opts, struct dev_opts *);
|
||||
|
||||
struct bch_sb *bch2_format(struct bch_opt_strs,
|
||||
struct bch_opts,
|
||||
struct format_opts, struct dev_opts *, size_t);
|
||||
struct format_opts,
|
||||
dev_opts_list devs);
|
||||
|
||||
void bch2_super_write(int, struct bch_sb *);
|
||||
struct bch_sb *__bch2_super_read(int, u64);
|
||||
|
@ -6,8 +6,8 @@
|
||||
#include "posix_to_bcachefs.h"
|
||||
#include "libbcachefs/alloc_foreground.h"
|
||||
#include "libbcachefs/buckets.h"
|
||||
#include "libbcachefs/fs-common.h"
|
||||
#include "libbcachefs/io_write.h"
|
||||
#include "libbcachefs/namei.h"
|
||||
#include "libbcachefs/str_hash.h"
|
||||
#include "libbcachefs/xattr.h"
|
||||
|
||||
@ -264,7 +264,8 @@ void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
|
||||
static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
int src_fd, u64 src_size,
|
||||
char *src_path, struct copy_fs_state *s)
|
||||
char *src_path, struct copy_fs_state *s,
|
||||
u64 reserve_start)
|
||||
{
|
||||
struct fiemap_iter iter;
|
||||
struct fiemap_extent e;
|
||||
@ -295,11 +296,8 @@ static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* if the data is below 1 MB, copy it so it doesn't conflict
|
||||
* with bcachefs's potentially larger superblock:
|
||||
*/
|
||||
if (e.fe_physical < 1 << 20) {
|
||||
/* If the data is in bcachefs's superblock region, copy it: */
|
||||
if (e.fe_physical < reserve_start) {
|
||||
copy_data(c, dst, src_fd, e.fe_logical,
|
||||
e.fe_logical + min(src_size - e.fe_logical,
|
||||
e.fe_length));
|
||||
@ -318,7 +316,8 @@ static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
static void copy_dir(struct copy_fs_state *s,
|
||||
struct bch_fs *c,
|
||||
struct bch_inode_unpacked *dst,
|
||||
int src_fd, const char *src_path)
|
||||
int src_fd, const char *src_path,
|
||||
u64 reserve_start)
|
||||
{
|
||||
DIR *dir = fdopendir(src_fd);
|
||||
struct dirent *d;
|
||||
@ -369,7 +368,7 @@ static void copy_dir(struct copy_fs_state *s,
|
||||
switch (mode_to_type(stat.st_mode)) {
|
||||
case DT_DIR:
|
||||
fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
|
||||
copy_dir(s, c, &inode, fd, child_path);
|
||||
copy_dir(s, c, &inode, fd, child_path, reserve_start);
|
||||
close(fd);
|
||||
break;
|
||||
case DT_REG:
|
||||
@ -377,7 +376,7 @@ static void copy_dir(struct copy_fs_state *s,
|
||||
|
||||
fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
|
||||
copy_file(c, &inode, fd, stat.st_size,
|
||||
child_path, s);
|
||||
child_path, s, reserve_start);
|
||||
close(fd);
|
||||
break;
|
||||
case DT_LNK:
|
||||
@ -409,7 +408,8 @@ next:
|
||||
|
||||
static void reserve_old_fs_space(struct bch_fs *c,
|
||||
struct bch_inode_unpacked *root_inode,
|
||||
ranges *extents)
|
||||
ranges *extents,
|
||||
u64 reserve_start)
|
||||
{
|
||||
struct bch_dev *ca = c->devs[0];
|
||||
struct bch_inode_unpacked dst;
|
||||
@ -422,14 +422,20 @@ static void reserve_old_fs_space(struct bch_fs *c,
|
||||
|
||||
ranges_sort_merge(extents);
|
||||
|
||||
for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
|
||||
link_data(c, &dst, i.start, i.start, i.end - i.start);
|
||||
for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
|
||||
if (i.end <= reserve_start)
|
||||
continue;
|
||||
|
||||
u64 start = max(i.start, reserve_start);
|
||||
|
||||
link_data(c, &dst, start, start, i.end - start);
|
||||
}
|
||||
|
||||
update_inode(c, &dst);
|
||||
}
|
||||
|
||||
void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
struct copy_fs_state *s)
|
||||
struct copy_fs_state *s, u64 reserve_start)
|
||||
{
|
||||
syncfs(src_fd);
|
||||
|
||||
@ -448,10 +454,10 @@ void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
|
||||
|
||||
/* now, copy: */
|
||||
copy_dir(s, c, &root_inode, src_fd, src_path);
|
||||
copy_dir(s, c, &root_inode, src_fd, src_path, reserve_start);
|
||||
|
||||
if (BCH_MIGRATE_migrate == s->type)
|
||||
reserve_old_fs_space(c, &root_inode, &s->extents);
|
||||
reserve_old_fs_space(c, &root_inode, &s->extents, reserve_start);
|
||||
|
||||
update_inode(c, &root_inode);
|
||||
|
||||
|
@ -50,5 +50,5 @@ struct copy_fs_state {
|
||||
* initialized (`hardlinks` is initialized with zeroes).
|
||||
*/
|
||||
void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
struct copy_fs_state *s);
|
||||
struct copy_fs_state *s, u64);
|
||||
#endif /* _LIBBCACHE_H */
|
||||
|
117
flake.lock
generated
117
flake.lock
generated
@ -1,17 +1,12 @@
|
||||
{
|
||||
"nodes": {
|
||||
"crane": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1721842668,
|
||||
"narHash": "sha256-k3oiD2z2AAwBFLa4+xfU+7G5fisRXfkvrMTCJrjZzXo=",
|
||||
"lastModified": 1742394900,
|
||||
"narHash": "sha256-vVOAp9ahvnU+fQoKd4SEXB2JG2wbENkpqcwlkIXgUC0=",
|
||||
"owner": "ipetkov",
|
||||
"repo": "crane",
|
||||
"rev": "529c1a0b1f29f0d78fa3086b8f6a134c71ef3aaf",
|
||||
"rev": "70947c1908108c0c551ddfd73d4f750ff2ea67cd",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -20,35 +15,14 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"fenix": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
],
|
||||
"rust-analyzer-src": "rust-analyzer-src"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722320953,
|
||||
"narHash": "sha256-DfGaJtgrzcwPQYLTvjL1KaVIjpvi85b2MpM6yEGvJzM=",
|
||||
"owner": "nix-community",
|
||||
"repo": "fenix",
|
||||
"rev": "483df76def3e5010d709aa3a0418ba2088503994",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-community",
|
||||
"repo": "fenix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-compat": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1696426674,
|
||||
"narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
|
||||
"lastModified": 1733328505,
|
||||
"narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=",
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
|
||||
"rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -62,11 +36,11 @@
|
||||
"nixpkgs-lib": "nixpkgs-lib"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1719994518,
|
||||
"narHash": "sha256-pQMhCCHyQGRzdfAkdJ4cIWiw+JNuWsTX7f0ZYSyz0VY=",
|
||||
"lastModified": 1741352980,
|
||||
"narHash": "sha256-+u2UunDA4Cl5Fci3m7S643HzKmIDAe+fiXrLqYsR2fs=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "flake-parts",
|
||||
"rev": "9227223f6d922fee3c7b190b2cc238a99527bbb7",
|
||||
"rev": "f4330d22f1c5d2ba72d3d22df5597d123fdb60a9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -75,13 +49,33 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nix-github-actions": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1737420293,
|
||||
"narHash": "sha256-F1G5ifvqTpJq7fdkT34e/Jy9VCyzd5XfJ9TO8fHhJWE=",
|
||||
"owner": "nix-community",
|
||||
"repo": "nix-github-actions",
|
||||
"rev": "f4158fa080ef4503c8f4c820967d946c2af31ec9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-community",
|
||||
"repo": "nix-github-actions",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1722185531,
|
||||
"narHash": "sha256-veKR07psFoJjINLC8RK4DiLniGGMgF3QMlS4tb74S6k=",
|
||||
"lastModified": 1742422364,
|
||||
"narHash": "sha256-mNqIplmEohk5jRkqYqG19GA8MbQ/D4gQSK0Mu4LvfRQ=",
|
||||
"owner": "nixos",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "52ec9ac3b12395ad677e8b62106f0b98c1f8569d",
|
||||
"rev": "a84ebe20c6bc2ecbcfb000a50776219f48d134cc",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -93,40 +87,47 @@
|
||||
},
|
||||
"nixpkgs-lib": {
|
||||
"locked": {
|
||||
"lastModified": 1719876945,
|
||||
"narHash": "sha256-Fm2rDDs86sHy0/1jxTOKB1118Q0O3Uc7EC0iXvXKpbI=",
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/5daf0514482af3f97abaefc78a6606365c9108e2.tar.gz"
|
||||
"lastModified": 1740877520,
|
||||
"narHash": "sha256-oiwv/ZK/2FhGxrCkQkB83i7GnWXPPLzoqFHpDD3uYpk=",
|
||||
"owner": "nix-community",
|
||||
"repo": "nixpkgs.lib",
|
||||
"rev": "147dee35aab2193b174e4c0868bd80ead5ce755c",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/5daf0514482af3f97abaefc78a6606365c9108e2.tar.gz"
|
||||
"owner": "nix-community",
|
||||
"repo": "nixpkgs.lib",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"crane": "crane",
|
||||
"fenix": "fenix",
|
||||
"flake-compat": "flake-compat",
|
||||
"flake-parts": "flake-parts",
|
||||
"nix-github-actions": "nix-github-actions",
|
||||
"nixpkgs": "nixpkgs",
|
||||
"rust-overlay": "rust-overlay",
|
||||
"treefmt-nix": "treefmt-nix"
|
||||
}
|
||||
},
|
||||
"rust-analyzer-src": {
|
||||
"flake": false,
|
||||
"rust-overlay": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722262053,
|
||||
"narHash": "sha256-KxjkPVn9rQqYam6DhiN/V2NcMXtYW25maxkJoiVMpmE=",
|
||||
"owner": "rust-lang",
|
||||
"repo": "rust-analyzer",
|
||||
"rev": "a021b85be57d34b1eed687fcafd5d5ec64b2d853",
|
||||
"lastModified": 1742524367,
|
||||
"narHash": "sha256-KzTwk/5ETJavJZYV1DEWdCx05M4duFCxCpRbQSKWpng=",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"rev": "70bf752d176b2ce07417e346d85486acea9040ef",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "rust-lang",
|
||||
"ref": "nightly",
|
||||
"repo": "rust-analyzer",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
@ -137,11 +138,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722330636,
|
||||
"narHash": "sha256-uru7JzOa33YlSRwf9sfXpJG+UAV+bnBEYMjrzKrQZFw=",
|
||||
"lastModified": 1742370146,
|
||||
"narHash": "sha256-XRE8hL4vKIQyVMDXykFh4ceo3KSpuJF3ts8GKwh5bIU=",
|
||||
"owner": "numtide",
|
||||
"repo": "treefmt-nix",
|
||||
"rev": "768acdb06968e53aa1ee8de207fd955335c754b7",
|
||||
"rev": "adc195eef5da3606891cedf80c0d9ce2d3190808",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
329
flake.nix
329
flake.nix
@ -11,13 +11,10 @@
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
crane = {
|
||||
url = "github:ipetkov/crane";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
crane.url = "github:ipetkov/crane";
|
||||
|
||||
fenix = {
|
||||
url = "github:nix-community/fenix";
|
||||
rust-overlay = {
|
||||
url = "github:oxalica/rust-overlay";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
@ -25,6 +22,11 @@
|
||||
url = "github:edolstra/flake-compat";
|
||||
flake = false;
|
||||
};
|
||||
|
||||
nix-github-actions = {
|
||||
url = "github:nix-community/nix-github-actions";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
};
|
||||
|
||||
outputs =
|
||||
@ -33,27 +35,31 @@
|
||||
nixpkgs,
|
||||
flake-parts,
|
||||
treefmt-nix,
|
||||
fenix,
|
||||
crane,
|
||||
...
|
||||
rust-overlay,
|
||||
flake-compat,
|
||||
nix-github-actions,
|
||||
}:
|
||||
let
|
||||
systems = nixpkgs.lib.filter (s: nixpkgs.lib.hasSuffix "-linux" s) nixpkgs.lib.systems.flakeExposed;
|
||||
in
|
||||
flake-parts.lib.mkFlake { inherit inputs; } {
|
||||
imports = [ inputs.treefmt-nix.flakeModule ];
|
||||
|
||||
# can be extended, but these have proper binary cache support in nixpkgs
|
||||
# as of writing.
|
||||
systems = [
|
||||
"aarch64-linux"
|
||||
"x86_64-linux"
|
||||
"i686-linux"
|
||||
];
|
||||
flake = {
|
||||
githubActions = nix-github-actions.lib.mkGithubMatrix {
|
||||
# github actions supports fewer architectures
|
||||
checks = nixpkgs.lib.getAttrs [ "aarch64-linux" "x86_64-linux" ] self.checks;
|
||||
};
|
||||
};
|
||||
|
||||
inherit systems;
|
||||
|
||||
perSystem =
|
||||
{
|
||||
self',
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
system,
|
||||
...
|
||||
}:
|
||||
@ -62,119 +68,225 @@
|
||||
inherit (lib.lists) findFirst;
|
||||
inherit (lib.strings) hasPrefix removePrefix substring;
|
||||
|
||||
pkgs = import nixpkgs {
|
||||
inherit system;
|
||||
overlays = [ (import rust-overlay) ];
|
||||
};
|
||||
|
||||
cargoToml = builtins.fromTOML (builtins.readFile ./Cargo.toml);
|
||||
rustfmtToml = builtins.fromTOML (builtins.readFile ./rustfmt.toml);
|
||||
|
||||
craneLib = crane.mkLib pkgs;
|
||||
|
||||
rev = self.shortRev or self.dirtyShortRev or (substring 0 8 self.lastModifiedDate);
|
||||
makefileVersion = removePrefix "VERSION=" (
|
||||
findFirst (line: hasPrefix "VERSION=" line) "VERSION=0.0.0" (split "\n" (readFile ./Makefile))
|
||||
);
|
||||
version = "${makefileVersion}+${rev}";
|
||||
|
||||
commonArgs = {
|
||||
inherit version;
|
||||
src = self;
|
||||
mkCommon =
|
||||
{
|
||||
crane,
|
||||
pkgs,
|
||||
rustVersion ? "latest",
|
||||
|
||||
env = {
|
||||
PKG_CONFIG_SYSTEMD_SYSTEMDSYSTEMUNITDIR = "${placeholder "out"}/lib/systemd/system";
|
||||
PKG_CONFIG_UDEV_UDEVDIR = "${placeholder "out"}/lib/udev";
|
||||
# build time
|
||||
buildPackages,
|
||||
pkg-config,
|
||||
rustPlatform,
|
||||
stdenv,
|
||||
|
||||
# run time
|
||||
keyutils,
|
||||
libaio,
|
||||
libsodium,
|
||||
liburcu,
|
||||
libuuid,
|
||||
lz4,
|
||||
udev,
|
||||
zlib,
|
||||
zstd,
|
||||
}:
|
||||
let
|
||||
inherit (stdenv) cc hostPlatform;
|
||||
|
||||
craneLib = (crane.mkLib pkgs).overrideToolchain (
|
||||
p: p.rust-bin.stable."${rustVersion}".minimal.override { extensions = [ "clippy" ]; }
|
||||
);
|
||||
|
||||
args = {
|
||||
inherit version;
|
||||
src = self;
|
||||
strictDeps = true;
|
||||
|
||||
env = {
|
||||
PKG_CONFIG_SYSTEMD_SYSTEMDSYSTEMUNITDIR = "${placeholder "out"}/lib/systemd/system";
|
||||
PKG_CONFIG_UDEV_UDEVDIR = "${placeholder "out"}/lib/udev";
|
||||
|
||||
CARGO_BUILD_TARGET = hostPlatform.rust.rustcTargetSpec;
|
||||
"CARGO_TARGET_${hostPlatform.rust.cargoEnvVarTarget}_LINKER" = "${cc.targetPrefix}cc";
|
||||
HOST_CC = "${cc.nativePrefix}cc";
|
||||
TARGET_CC = "${cc.targetPrefix}cc";
|
||||
};
|
||||
|
||||
makeFlags = [
|
||||
"INITRAMFS_DIR=${placeholder "out"}/etc/initramfs-tools"
|
||||
"PREFIX=${placeholder "out"}"
|
||||
"VERSION=${version}"
|
||||
];
|
||||
|
||||
dontStrip = true;
|
||||
|
||||
depsBuildBuild = [
|
||||
buildPackages.stdenv.cc
|
||||
];
|
||||
|
||||
nativeBuildInputs = [
|
||||
pkg-config
|
||||
rustPlatform.bindgenHook
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
keyutils
|
||||
libaio
|
||||
libsodium
|
||||
liburcu
|
||||
libuuid
|
||||
lz4
|
||||
udev
|
||||
zlib
|
||||
zstd
|
||||
];
|
||||
|
||||
meta = {
|
||||
description = "Userspace tools for bcachefs";
|
||||
license = lib.licenses.gpl2Only;
|
||||
mainProgram = "bcachefs";
|
||||
};
|
||||
};
|
||||
|
||||
cargoArtifacts = craneLib.buildDepsOnly args;
|
||||
in
|
||||
{
|
||||
inherit args cargoArtifacts craneLib;
|
||||
};
|
||||
common = pkgs.callPackage mkCommon { inherit crane; };
|
||||
|
||||
makeFlags = [
|
||||
"INITRAMFS_DIR=${placeholder "out"}/etc/initramfs-tools"
|
||||
"PREFIX=${placeholder "out"}"
|
||||
"VERSION=${version}"
|
||||
];
|
||||
mkPackage =
|
||||
{ common, name }:
|
||||
common.craneLib.buildPackage (
|
||||
common.args
|
||||
// {
|
||||
inherit (common) cargoArtifacts;
|
||||
pname = name;
|
||||
|
||||
dontStrip = true;
|
||||
enableParallelBuilding = true;
|
||||
buildPhaseCargoCommand = ''
|
||||
make ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}} $makeFlags
|
||||
'';
|
||||
doNotPostBuildInstallCargoBinaries = true;
|
||||
installPhaseCommand = ''
|
||||
make ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}} $makeFlags install
|
||||
'';
|
||||
|
||||
nativeBuildInputs = with pkgs; [
|
||||
pkg-config
|
||||
rustPlatform.bindgenHook
|
||||
];
|
||||
doInstallCheck = true;
|
||||
installCheckPhase = ''
|
||||
runHook preInstallCheck
|
||||
|
||||
buildInputs = with pkgs; [
|
||||
attr
|
||||
keyutils
|
||||
libaio
|
||||
libsodium
|
||||
liburcu
|
||||
libuuid
|
||||
lz4
|
||||
udev
|
||||
zlib
|
||||
zstd
|
||||
];
|
||||
test "$($out/bin/bcachefs version)" = "${version}"
|
||||
|
||||
meta = {
|
||||
description = "Userspace tools for bcachefs";
|
||||
license = lib.licenses.gpl2Only;
|
||||
mainProgram = "bcachefs";
|
||||
};
|
||||
};
|
||||
runHook postInstallCheck
|
||||
'';
|
||||
}
|
||||
);
|
||||
|
||||
cargoArtifacts = craneLib.buildDepsOnly (commonArgs // { pname = cargoToml.package.name; });
|
||||
mkPackages =
|
||||
name: systems:
|
||||
let
|
||||
packagesForSystem =
|
||||
crossSystem:
|
||||
let
|
||||
localSystem = system;
|
||||
pkgs' = import nixpkgs {
|
||||
inherit crossSystem localSystem;
|
||||
overlays = [ (import rust-overlay) ];
|
||||
};
|
||||
|
||||
common = pkgs'.callPackage mkCommon { inherit crane; };
|
||||
package = pkgs'.callPackage mkPackage { inherit common name; };
|
||||
packageFuse = package.overrideAttrs (
|
||||
final: prev: {
|
||||
makeFlags = prev.makeFlags ++ [ "BCACHEFS_FUSE=1" ];
|
||||
buildInputs = prev.buildInputs ++ [ pkgs'.fuse3 ];
|
||||
}
|
||||
);
|
||||
in
|
||||
[
|
||||
(lib.nameValuePair "${name}-${crossSystem}" package)
|
||||
(lib.nameValuePair "${name}-fuse-${crossSystem}" packageFuse)
|
||||
];
|
||||
in
|
||||
lib.listToAttrs (lib.flatten (map packagesForSystem systems));
|
||||
in
|
||||
{
|
||||
packages.default = config.packages.bcachefs-tools;
|
||||
packages.bcachefs-tools = craneLib.buildPackage (
|
||||
commonArgs
|
||||
packages =
|
||||
let
|
||||
inherit (cargoToml.package) name;
|
||||
in
|
||||
(mkPackages name systems)
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
${name} = config.packages."${name}-${system}";
|
||||
"${name}-fuse" = config.packages."${name}-fuse-${system}";
|
||||
default = config.packages.${name};
|
||||
};
|
||||
|
||||
enableParallelBuilding = true;
|
||||
buildPhaseCargoCommand = ''
|
||||
make ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}} $makeFlags
|
||||
'';
|
||||
installPhaseCommand = ''
|
||||
make ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}} $makeFlags install
|
||||
'';
|
||||
checks = {
|
||||
inherit (config.packages)
|
||||
bcachefs-tools
|
||||
bcachefs-tools-fuse
|
||||
bcachefs-tools-fuse-i686-linux
|
||||
;
|
||||
|
||||
doInstallCheck = true;
|
||||
installCheckPhase = ''
|
||||
runHook preInstallCheck
|
||||
cargo-clippy = common.craneLib.cargoClippy (
|
||||
common.args
|
||||
// {
|
||||
inherit (common) cargoArtifacts;
|
||||
cargoClippyExtraArgs = "--all-targets --all-features -- --deny warnings";
|
||||
}
|
||||
);
|
||||
|
||||
test "$($out/bin/bcachefs version)" = "${version}"
|
||||
# we have to build our own `craneLib.cargoTest`
|
||||
cargo-test = common.craneLib.mkCargoDerivation (
|
||||
common.args
|
||||
// {
|
||||
inherit (common) cargoArtifacts;
|
||||
doCheck = true;
|
||||
|
||||
runHook postInstallCheck
|
||||
'';
|
||||
}
|
||||
);
|
||||
enableParallelChecking = true;
|
||||
|
||||
packages.bcachefs-tools-fuse = config.packages.bcachefs-tools.overrideAttrs (
|
||||
final: prev: {
|
||||
makeFlags = prev.makeFlags ++ [ "BCACHEFS_FUSE=1" ];
|
||||
buildInputs = prev.buildInputs ++ [ pkgs.fuse3 ];
|
||||
}
|
||||
);
|
||||
pnameSuffix = "-test";
|
||||
buildPhaseCargoCommand = "";
|
||||
checkPhaseCargoCommand = ''
|
||||
make ''${enableParallelChecking:+-j''${NIX_BUILD_CORES}} $makeFlags libbcachefs.a
|
||||
cargo test --profile release -- --nocapture
|
||||
'';
|
||||
}
|
||||
);
|
||||
|
||||
checks.cargo-clippy = craneLib.cargoClippy (
|
||||
commonArgs
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
cargoClippyExtraArgs = "--all-targets -- --deny warnings";
|
||||
}
|
||||
);
|
||||
|
||||
# we have to build our own `craneLib.cargoTest`
|
||||
checks.cargo-test = craneLib.mkCargoDerivation (
|
||||
commonArgs
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
doCheck = true;
|
||||
|
||||
enableParallelChecking = true;
|
||||
|
||||
pnameSuffix = "-test";
|
||||
buildPhaseCargoCommand = "";
|
||||
checkPhaseCargoCommand = ''
|
||||
make ''${enableParallelChecking:+-j''${NIX_BUILD_CORES}} $makeFlags libbcachefs.a
|
||||
cargo test --profile release -- --nocapture
|
||||
'';
|
||||
}
|
||||
);
|
||||
# cargo clippy with the current minimum supported rust version
|
||||
# according to Cargo.toml
|
||||
msrv =
|
||||
let
|
||||
rustVersion = cargoToml.package.rust-version;
|
||||
common = pkgs.callPackage mkCommon { inherit crane rustVersion; };
|
||||
in
|
||||
common.craneLib.cargoClippy (
|
||||
common.args
|
||||
// {
|
||||
pname = "msrv";
|
||||
inherit (common) cargoArtifacts;
|
||||
cargoClippyExtraArgs = "--all-targets --all-features -- --deny warnings";
|
||||
}
|
||||
);
|
||||
};
|
||||
|
||||
devShells.default = pkgs.mkShell {
|
||||
inputsFrom = [
|
||||
@ -190,9 +302,12 @@
|
||||
cargo-audit
|
||||
cargo-outdated
|
||||
clang-tools
|
||||
clippy
|
||||
rust-analyzer
|
||||
rustc
|
||||
(rust-bin.stable.latest.minimal.override {
|
||||
extensions = [
|
||||
"rust-analyzer"
|
||||
"rust-src"
|
||||
];
|
||||
})
|
||||
];
|
||||
};
|
||||
|
||||
@ -204,7 +319,7 @@
|
||||
nixfmt.enable = true;
|
||||
rustfmt.edition = rustfmtToml.edition;
|
||||
rustfmt.enable = true;
|
||||
rustfmt.package = fenix.packages.${system}.default.rustfmt;
|
||||
rustfmt.package = pkgs.rust-bin.selectLatestNightlyWith (toolchain: toolchain.rustfmt);
|
||||
};
|
||||
};
|
||||
};
|
||||
|
@ -7,6 +7,7 @@
|
||||
#define _CRYPTO_SHA_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <sodium/crypto_hash_sha256.h>
|
||||
|
||||
#define SHA1_DIGEST_SIZE 20
|
||||
#define SHA1_BLOCK_SIZE 64
|
||||
@ -112,4 +113,9 @@ extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
|
||||
|
||||
extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *hash);
|
||||
|
||||
static inline void sha256(const u8 *data, unsigned int len, u8 *out)
|
||||
{
|
||||
crypto_hash_sha256(out, data, len);
|
||||
}
|
||||
#endif
|
||||
|
@ -16,53 +16,6 @@ typedef struct {
|
||||
u64 counter;
|
||||
} atomic64_t;
|
||||
|
||||
#ifndef C11_ATOMICS
|
||||
|
||||
#include <urcu/uatomic.h>
|
||||
|
||||
#if (CAA_BITS_PER_LONG != 64)
|
||||
#define ATOMIC64_SPINLOCK
|
||||
#endif
|
||||
|
||||
#define __ATOMIC_READ(p) uatomic_read(p)
|
||||
#define __ATOMIC_SET(p, v) uatomic_set(p, v)
|
||||
#define __ATOMIC_SET_RELEASE(p, v) uatomic_set(p, v)
|
||||
#define __ATOMIC_ADD_RETURN(v, p) uatomic_add_return(p, v)
|
||||
#define __ATOMIC_SUB_RETURN(v, p) uatomic_sub_return(p, v)
|
||||
#define __ATOMIC_ADD(v, p) uatomic_add(p, v)
|
||||
#define __ATOMIC_SUB(v, p) uatomic_sub(p, v)
|
||||
#define __ATOMIC_INC(p) uatomic_inc(p)
|
||||
#define __ATOMIC_DEC(p) uatomic_dec(p)
|
||||
#define __ATOMIC_AND(v, p) uatomic_and(p, v)
|
||||
#define __ATOMIC_OR(v, p) uatomic_or(p, v)
|
||||
|
||||
#define xchg(p, v) uatomic_xchg(p, v)
|
||||
#define xchg_acquire(p, v) uatomic_xchg(p, v)
|
||||
#define cmpxchg(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
#define cmpxchg_acquire(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
#define cmpxchg_release(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
|
||||
#define try_cmpxchg(p, _old, _new) \
|
||||
({ \
|
||||
typeof(*(_old)) _v = cmpxchg(p, *(_old), _new); \
|
||||
bool _ret = _v == *(_old); \
|
||||
*(_old) = _v; \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define try_cmpxchg_acquire(p, _old, _new) \
|
||||
try_cmpxchg(p, _old, _new)
|
||||
|
||||
#define smp_mb__before_atomic() cmm_smp_mb__before_uatomic_add()
|
||||
#define smp_mb__after_atomic() cmm_smp_mb__after_uatomic_add()
|
||||
#define smp_wmb() cmm_smp_wmb()
|
||||
#define smp_rmb() cmm_smp_rmb()
|
||||
#define smp_mb() cmm_smp_mb()
|
||||
#define smp_read_barrier_depends() cmm_smp_read_barrier_depends()
|
||||
#define smp_acquire__after_ctrl_dep() cmm_smp_mb()
|
||||
|
||||
#else /* C11_ATOMICS */
|
||||
|
||||
#define __ATOMIC_READ(p) __atomic_load_n(p, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SET(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SET_RELEASE(p, v) __atomic_store_n(p, v, __ATOMIC_RELEASE)
|
||||
@ -83,6 +36,11 @@ typedef struct {
|
||||
__ATOMIC_SEQ_CST, \
|
||||
__ATOMIC_SEQ_CST)
|
||||
|
||||
#define try_cmpxchg_acquire(p, old, new) \
|
||||
__atomic_compare_exchange_n((p), old, new, false, \
|
||||
__ATOMIC_ACQUIRE, \
|
||||
__ATOMIC_RELAXED)
|
||||
|
||||
#define cmpxchg(p, old, new) \
|
||||
({ \
|
||||
typeof(*(p)) __old = (old); \
|
||||
@ -109,7 +67,7 @@ typedef struct {
|
||||
\
|
||||
__atomic_compare_exchange_n((p), &__old, new, false, \
|
||||
__ATOMIC_RELEASE, \
|
||||
__ATOMIC_RELEASE); \
|
||||
__ATOMIC_RELAXED); \
|
||||
__old; \
|
||||
})
|
||||
|
||||
@ -119,9 +77,7 @@ typedef struct {
|
||||
#define smp_rmb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_mb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_read_barrier_depends()
|
||||
|
||||
#endif
|
||||
|
||||
#define smp_acquire__after_ctrl_dep() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
|
@ -10,6 +10,8 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/bvec.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/rwsem.h>
|
||||
|
||||
struct bio_set;
|
||||
struct bio;
|
||||
@ -63,6 +65,8 @@ struct block_device {
|
||||
struct gendisk * bd_disk;
|
||||
struct gendisk __bd_disk;
|
||||
int bd_fd;
|
||||
|
||||
struct mutex bd_holder_lock;
|
||||
};
|
||||
|
||||
#define bdev_kobj(_bdev) (&((_bdev)->kobj))
|
||||
|
@ -65,7 +65,10 @@ unsigned bdev_logical_block_size(struct block_device *bdev);
|
||||
sector_t get_capacity(struct gendisk *disk);
|
||||
|
||||
struct blk_holder_ops {
|
||||
void (*mark_dead)(struct block_device *bdev);
|
||||
void (*mark_dead)(struct block_device *bdev, bool surprise);
|
||||
void (*sync)(struct block_device *bdev);
|
||||
int (*freeze)(struct block_device *bdev);
|
||||
int (*thaw)(struct block_device *bdev);
|
||||
};
|
||||
|
||||
static inline struct block_device *file_bdev(struct file *file)
|
||||
@ -80,8 +83,12 @@ int lookup_bdev(const char *path, dev_t *);
|
||||
|
||||
struct super_block {
|
||||
void *s_fs_info;
|
||||
struct rw_semaphore s_umount;
|
||||
};
|
||||
|
||||
static inline void evict_inodes(struct super_block *sb) {}
|
||||
static inline int sync_filesystem(struct super_block *) { return 0; }
|
||||
|
||||
/*
|
||||
* File types
|
||||
*
|
||||
|
@ -67,6 +67,7 @@
|
||||
#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
|
||||
#define fallthrough __attribute__((__fallthrough__))
|
||||
#define __noreturn __attribute__((__noreturn__))
|
||||
#define __no_kmsan_checks
|
||||
|
||||
#ifndef __counted_by
|
||||
#define __counted_by(nr)
|
||||
|
@ -9,6 +9,8 @@ struct dentry {
|
||||
struct inode *d_inode;
|
||||
};
|
||||
|
||||
static inline void shrink_dcache_sb(struct super_block *) {}
|
||||
|
||||
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
|
||||
#define QSTR(n) (struct qstr)QSTR_INIT(n, strlen(n))
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <linux/byteorder.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/kmsan-checks.h>
|
||||
#include <linux/math.h>
|
||||
#include <linux/minmax.h>
|
||||
|
||||
|
98
include/linux/kmsan-checks.h
Normal file
98
include/linux/kmsan-checks.h
Normal file
@ -0,0 +1,98 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* KMSAN checks to be used for one-off annotations in subsystems.
|
||||
*
|
||||
* Copyright (C) 2017-2022 Google LLC
|
||||
* Author: Alexander Potapenko <glider@google.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_KMSAN_CHECKS_H
|
||||
#define _LINUX_KMSAN_CHECKS_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef CONFIG_KMSAN
|
||||
|
||||
/**
|
||||
* kmsan_poison_memory() - Mark the memory range as uninitialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to poison.
|
||||
* @flags: GFP flags for allocations done by this function.
|
||||
*
|
||||
* Until other data is written to this range, KMSAN will treat it as
|
||||
* uninitialized. Error reports for this memory will reference the call site of
|
||||
* kmsan_poison_memory() as origin.
|
||||
*/
|
||||
void kmsan_poison_memory(const void *address, size_t size, gfp_t flags);
|
||||
|
||||
/**
|
||||
* kmsan_unpoison_memory() - Mark the memory range as initialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to unpoison.
|
||||
*
|
||||
* Until other data is written to this range, KMSAN will treat it as
|
||||
* initialized.
|
||||
*/
|
||||
void kmsan_unpoison_memory(const void *address, size_t size);
|
||||
|
||||
/**
|
||||
* kmsan_check_memory() - Check the memory range for being initialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to check.
|
||||
*
|
||||
* If any piece of the given range is marked as uninitialized, KMSAN will report
|
||||
* an error.
|
||||
*/
|
||||
void kmsan_check_memory(const void *address, size_t size);
|
||||
|
||||
/**
|
||||
* kmsan_copy_to_user() - Notify KMSAN about a data transfer to userspace.
|
||||
* @to: destination address in the userspace.
|
||||
* @from: source address in the kernel.
|
||||
* @to_copy: number of bytes to copy.
|
||||
* @left: number of bytes not copied.
|
||||
*
|
||||
* If this is a real userspace data transfer, KMSAN checks the bytes that were
|
||||
* actually copied to ensure there was no information leak. If @to belongs to
|
||||
* the kernel space (which is possible for compat syscalls), KMSAN just copies
|
||||
* the metadata.
|
||||
*/
|
||||
void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
|
||||
size_t left);
|
||||
|
||||
/**
|
||||
* kmsan_memmove() - Notify KMSAN about a data copy within kernel.
|
||||
* @to: destination address in the kernel.
|
||||
* @from: source address in the kernel.
|
||||
* @size: number of bytes to copy.
|
||||
*
|
||||
* Invoked after non-instrumented version (e.g. implemented using assembly
|
||||
* code) of memmove()/memcpy() is called, in order to copy KMSAN's metadata.
|
||||
*/
|
||||
void kmsan_memmove(void *to, const void *from, size_t to_copy);
|
||||
|
||||
#else
|
||||
|
||||
static inline void kmsan_poison_memory(const void *address, size_t size,
|
||||
gfp_t flags)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_unpoison_memory(const void *address, size_t size)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_check_memory(const void *address, size_t size)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_copy_to_user(void __user *to, const void *from,
|
||||
size_t to_copy, size_t left)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void kmsan_memmove(void *to, const void *from, size_t to_copy)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_KMSAN_CHECKS_H */
|
@ -82,4 +82,71 @@ static inline s64 div_s64(s64 dividend, s32 divisor)
|
||||
return div_s64_rem(dividend, divisor, &remainder);
|
||||
}
|
||||
|
||||
#ifndef mul_u32_u32
|
||||
/*
|
||||
* Many a GCC version messes this up and generates a 64x64 mult :-(
|
||||
*/
|
||||
static inline u64 mul_u32_u32(u32 a, u32 b)
|
||||
{
|
||||
return (u64)a * b;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
|
||||
|
||||
#ifndef mul_u64_u64_shr
|
||||
static __always_inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
|
||||
{
|
||||
return (u64)(((unsigned __int128)a * mul) >> shift);
|
||||
}
|
||||
#endif /* mul_u64_u64_shr */
|
||||
|
||||
#else
|
||||
|
||||
#ifndef mul_u64_u64_shr
|
||||
static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
|
||||
{
|
||||
union {
|
||||
u64 ll;
|
||||
struct {
|
||||
#ifdef __BIG_ENDIAN
|
||||
u32 high, low;
|
||||
#else
|
||||
u32 low, high;
|
||||
#endif
|
||||
} l;
|
||||
} rl, rm, rn, rh, a0, b0;
|
||||
u64 c;
|
||||
|
||||
a0.ll = a;
|
||||
b0.ll = b;
|
||||
|
||||
rl.ll = mul_u32_u32(a0.l.low, b0.l.low);
|
||||
rm.ll = mul_u32_u32(a0.l.low, b0.l.high);
|
||||
rn.ll = mul_u32_u32(a0.l.high, b0.l.low);
|
||||
rh.ll = mul_u32_u32(a0.l.high, b0.l.high);
|
||||
|
||||
/*
|
||||
* Each of these lines computes a 64-bit intermediate result into "c",
|
||||
* starting at bits 32-95. The low 32-bits go into the result of the
|
||||
* multiplication, the high 32-bits are carried into the next step.
|
||||
*/
|
||||
rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low;
|
||||
rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low;
|
||||
rh.l.high = (c >> 32) + rh.l.high;
|
||||
|
||||
/*
|
||||
* The 128-bit result of the multiplication is in rl.ll and rh.ll,
|
||||
* shift it right and throw away the high part of the result.
|
||||
*/
|
||||
if (shift == 0)
|
||||
return rl.ll;
|
||||
if (shift < 64)
|
||||
return (rl.ll >> shift) | (rh.ll << (64 - shift));
|
||||
return rh.ll >> (shift & 63);
|
||||
}
|
||||
#endif /* mul_u64_u64_shr */
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_MATH64_H */
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct sysinfo {
|
||||
|
@ -9,7 +9,9 @@
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/math64.h>
|
||||
|
||||
#ifdef SYS_getrandom
|
||||
static inline int getrandom(void *buf, size_t buflen, unsigned int flags)
|
||||
@ -67,4 +69,24 @@ static inline u32 get_random_u32_below(u32 ceil)
|
||||
}
|
||||
}
|
||||
|
||||
static inline u32 __get_random_u32_below(u32 ceil)
|
||||
{
|
||||
return get_random_u32_below(ceil);
|
||||
}
|
||||
|
||||
static inline u64 get_random_u64_below(u64 ceil)
|
||||
{
|
||||
if (ceil <= 1)
|
||||
return 0;
|
||||
if (ceil <= U32_MAX)
|
||||
return get_random_u32_below(ceil);
|
||||
|
||||
for (;;) {
|
||||
u64 rand = get_random_u64();
|
||||
u64 mult = ceil * rand;
|
||||
if (likely(mult >= -ceil % ceil))
|
||||
return mul_u64_u64_shr(ceil, rand, 64);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _LINUX_RANDOM_H */
|
||||
|
6
include/linux/sched/sysctl.h
Normal file
6
include/linux/sched/sysctl.h
Normal file
@ -0,0 +1,6 @@
|
||||
#ifndef __TOOLS_LINUX_SCHED_SYSCTL_H
|
||||
#define __TOOLS_LINUX_SCHED_SYSCTL_H
|
||||
|
||||
#define sysctl_hung_task_timeout_secs (HZ * 10)
|
||||
|
||||
#endif /* __TOOLS_LINUX_SCHED_SYSCTL_H */
|
0
include/linux/unicode.h
Normal file
0
include/linux/unicode.h
Normal file
@ -232,7 +232,7 @@ int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
int ret = 0;
|
||||
|
||||
bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k),
|
||||
c, alloc_v2_unpack_error,
|
||||
c, alloc_v3_unpack_error,
|
||||
"unpack error");
|
||||
fsck_err:
|
||||
return ret;
|
||||
@ -589,6 +589,8 @@ iter_err:
|
||||
|
||||
int bch2_alloc_read(struct bch_fs *c)
|
||||
{
|
||||
down_read(&c->state_lock);
|
||||
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct bch_dev *ca = NULL;
|
||||
int ret;
|
||||
@ -652,6 +654,7 @@ int bch2_alloc_read(struct bch_fs *c)
|
||||
bch2_dev_put(ca);
|
||||
bch2_trans_put(trans);
|
||||
|
||||
up_read(&c->state_lock);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
@ -673,8 +676,7 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans,
|
||||
bch2_bkey_val_to_text(&buf, c, alloc_k);
|
||||
|
||||
int ret = __bch2_fsck_err(NULL, trans, flags, err_id,
|
||||
"bucket incorrectly %sset in %s btree\n"
|
||||
" %s",
|
||||
"bucket incorrectly %sset in %s btree\n%s",
|
||||
set ? "" : "un",
|
||||
bch2_btree_id_str(btree),
|
||||
buf.buf);
|
||||
@ -777,14 +779,12 @@ static inline int bch2_dev_data_type_accounting_mod(struct btree_trans *trans, s
|
||||
s64 delta_sectors,
|
||||
s64 delta_fragmented, unsigned flags)
|
||||
{
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_dev_data_type,
|
||||
.dev_data_type.dev = ca->dev_idx,
|
||||
.dev_data_type.data_type = data_type,
|
||||
};
|
||||
s64 d[3] = { delta_buckets, delta_sectors, delta_fragmented };
|
||||
|
||||
return bch2_disk_accounting_mod(trans, &acc, d, 3, flags & BTREE_TRIGGER_gc);
|
||||
return bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc,
|
||||
d, dev_data_type,
|
||||
.dev = ca->dev_idx,
|
||||
.data_type = data_type);
|
||||
}
|
||||
|
||||
int bch2_alloc_key_to_dev_counters(struct btree_trans *trans, struct bch_dev *ca,
|
||||
@ -837,7 +837,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
|
||||
struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p);
|
||||
if (!ca)
|
||||
return -EIO;
|
||||
return -BCH_ERR_trigger_alloc;
|
||||
|
||||
struct bch_alloc_v4 old_a_convert;
|
||||
const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);
|
||||
@ -871,6 +871,9 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
if (data_type_is_empty(new_a->data_type) &&
|
||||
BCH_ALLOC_V4_NEED_INC_GEN(new_a) &&
|
||||
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
|
||||
if (new_a->oldest_gen == new_a->gen &&
|
||||
!bch2_bucket_sectors_total(*new_a))
|
||||
new_a->oldest_gen++;
|
||||
new_a->gen++;
|
||||
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
|
||||
alloc_data_type_set(new_a, new_a->data_type);
|
||||
@ -889,26 +892,20 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
!new_a->io_time[READ])
|
||||
new_a->io_time[READ] = bch2_current_io_time(c, READ);
|
||||
|
||||
u64 old_lru = alloc_lru_idx_read(*old_a);
|
||||
u64 new_lru = alloc_lru_idx_read(*new_a);
|
||||
if (old_lru != new_lru) {
|
||||
ret = bch2_lru_change(trans, new.k->p.inode,
|
||||
bucket_to_u64(new.k->p),
|
||||
old_lru, new_lru);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
ret = bch2_lru_change(trans, new.k->p.inode,
|
||||
bucket_to_u64(new.k->p),
|
||||
alloc_lru_idx_read(*old_a),
|
||||
alloc_lru_idx_read(*new_a));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
old_lru = alloc_lru_idx_fragmentation(*old_a, ca);
|
||||
new_lru = alloc_lru_idx_fragmentation(*new_a, ca);
|
||||
if (old_lru != new_lru) {
|
||||
ret = bch2_lru_change(trans,
|
||||
BCH_LRU_FRAGMENTATION_START,
|
||||
bucket_to_u64(new.k->p),
|
||||
old_lru, new_lru);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
ret = bch2_lru_change(trans,
|
||||
BCH_LRU_BUCKET_FRAGMENTATION,
|
||||
bucket_to_u64(new.k->p),
|
||||
alloc_lru_idx_fragmentation(*old_a, ca),
|
||||
alloc_lru_idx_fragmentation(*new_a, ca));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (old_a->gen != new_a->gen) {
|
||||
ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen);
|
||||
@ -1032,9 +1029,9 @@ fsck_err:
|
||||
bch2_dev_put(ca);
|
||||
return ret;
|
||||
invalid_bucket:
|
||||
bch2_fs_inconsistent(c, "reference to invalid bucket\n %s",
|
||||
bch2_fs_inconsistent(c, "reference to invalid bucket\n%s",
|
||||
(bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf));
|
||||
ret = -EIO;
|
||||
ret = -BCH_ERR_trigger_alloc;
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1206,8 +1203,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
|
||||
|
||||
if (fsck_err_on(a->gen != alloc_gen(k, gens_offset),
|
||||
trans, bucket_gens_key_wrong,
|
||||
"incorrect gen in bucket_gens btree (got %u should be %u)\n"
|
||||
" %s",
|
||||
"incorrect gen in bucket_gens btree (got %u should be %u)\n%s",
|
||||
alloc_gen(k, gens_offset), a->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
|
||||
@ -1265,7 +1261,7 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
|
||||
if (fsck_err_on(k.k->type != KEY_TYPE_set,
|
||||
trans, freespace_hole_missing,
|
||||
"hole in alloc btree missing in freespace btree\n"
|
||||
" device %llu buckets %llu-%llu",
|
||||
"device %llu buckets %llu-%llu",
|
||||
freespace_iter->pos.inode,
|
||||
freespace_iter->pos.offset,
|
||||
end->offset)) {
|
||||
@ -1424,7 +1420,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite
|
||||
(state == BCH_DATA_free &&
|
||||
genbits != alloc_freespace_genbits(*a))) {
|
||||
if (fsck_err(trans, need_discard_freespace_key_bad,
|
||||
"%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
|
||||
"%s\nincorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
|
||||
(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
|
||||
bch2_btree_id_str(iter->btree_id),
|
||||
iter->pos.inode,
|
||||
@ -1505,7 +1501,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
|
||||
struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode);
|
||||
if (!ca) {
|
||||
if (fsck_err(trans, bucket_gens_to_invalid_dev,
|
||||
"bucket_gens key for invalid device:\n %s",
|
||||
"bucket_gens key for invalid device:\n%s",
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
|
||||
ret = bch2_btree_delete_at(trans, iter, 0);
|
||||
goto out;
|
||||
@ -1514,7 +1510,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
|
||||
if (fsck_err_on(end <= ca->mi.first_bucket ||
|
||||
start >= ca->mi.nbuckets,
|
||||
trans, bucket_gens_to_invalid_buckets,
|
||||
"bucket_gens key for invalid buckets:\n %s",
|
||||
"bucket_gens key for invalid buckets:\n%s",
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||
ret = bch2_btree_delete_at(trans, iter, 0);
|
||||
goto out;
|
||||
@ -1705,7 +1701,8 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
|
||||
u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca);
|
||||
if (lru_idx) {
|
||||
ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START,
|
||||
ret = bch2_lru_check_set(trans, BCH_LRU_BUCKET_FRAGMENTATION,
|
||||
bucket_to_u64(alloc_k.k->p),
|
||||
lru_idx, alloc_k, last_flushed);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1716,8 +1713,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
|
||||
if (fsck_err_on(!a->io_time[READ],
|
||||
trans, alloc_key_cached_but_read_time_zero,
|
||||
"cached bucket with read_time 0\n"
|
||||
" %s",
|
||||
"cached bucket with read_time 0\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
|
||||
struct bkey_i_alloc_v4 *a_mut =
|
||||
@ -1735,7 +1731,9 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
a = &a_mut->v;
|
||||
}
|
||||
|
||||
ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ],
|
||||
ret = bch2_lru_check_set(trans, alloc_k.k->p.inode,
|
||||
bucket_to_u64(alloc_k.k->p),
|
||||
a->io_time[READ],
|
||||
alloc_k, last_flushed);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1757,7 +1755,8 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
|
||||
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
|
||||
POS_MIN, BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
|
||||
bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?:
|
||||
bch2_check_stripe_to_lru_refs(c);
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
bch_err_fn(c, ret);
|
||||
@ -1805,6 +1804,19 @@ struct discard_buckets_state {
|
||||
u64 discarded;
|
||||
};
|
||||
|
||||
/*
|
||||
* This is needed because discard is both a filesystem option and a device
|
||||
* option, and mount options are supposed to apply to that mount and not be
|
||||
* persisted, i.e. if it's set as a mount option we can't propagate it to the
|
||||
* device.
|
||||
*/
|
||||
static inline bool discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
return test_bit(BCH_FS_discard_mount_opt_set, &c->flags)
|
||||
? c->opts.discard
|
||||
: ca->mi.discard;
|
||||
}
|
||||
|
||||
static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct btree_iter *need_discard_iter,
|
||||
@ -1868,7 +1880,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
s->discarded++;
|
||||
*discard_pos_done = iter.pos;
|
||||
|
||||
if (ca->mi.discard && !c->opts.nochanges) {
|
||||
if (discard_opt_enabled(c, ca) && !c->opts.nochanges) {
|
||||
/*
|
||||
* This works without any other locks because this is the only
|
||||
* thread that removes items from the need_discard tree
|
||||
@ -2058,16 +2070,71 @@ put_ref:
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
|
||||
}
|
||||
|
||||
static int invalidate_one_bp(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct bkey_s_c_backpointer bp,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct btree_iter extent_iter;
|
||||
struct bkey_s_c extent_k =
|
||||
bch2_backpointer_get_key(trans, bp, &extent_iter, 0, last_flushed);
|
||||
int ret = bkey_err(extent_k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct bkey_i *n =
|
||||
bch2_bkey_make_mut(trans, &extent_iter, &extent_k,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_bkey_drop_device(bkey_i_to_s(n), ca->dev_idx);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &extent_iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int invalidate_one_bucket_by_bps(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct bpos bucket,
|
||||
u8 gen,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct bpos bp_start = bucket_pos_to_bp_start(ca, bucket);
|
||||
struct bpos bp_end = bucket_pos_to_bp_end(ca, bucket);
|
||||
|
||||
return for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers,
|
||||
bp_start, bp_end, 0, k,
|
||||
NULL, NULL,
|
||||
BCH_WATERMARK_btree|
|
||||
BCH_TRANS_COMMIT_no_enospc, ({
|
||||
if (k.k->type != KEY_TYPE_backpointer)
|
||||
continue;
|
||||
|
||||
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
|
||||
|
||||
if (bp.v->bucket_gen != gen)
|
||||
continue;
|
||||
|
||||
/* filter out bps with gens that don't match */
|
||||
|
||||
invalidate_one_bp(trans, ca, bp, last_flushed);
|
||||
}));
|
||||
}
|
||||
|
||||
noinline_for_stack
|
||||
static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct btree_iter *lru_iter,
|
||||
struct bkey_s_c lru_k,
|
||||
struct bkey_buf *last_flushed,
|
||||
s64 *nr_to_invalidate)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i_alloc_v4 *a = NULL;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
|
||||
unsigned cached_sectors;
|
||||
struct btree_iter alloc_iter = {};
|
||||
int ret = 0;
|
||||
|
||||
if (*nr_to_invalidate <= 0)
|
||||
@ -2084,35 +2151,37 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
|
||||
return 0;
|
||||
|
||||
a = bch2_trans_start_alloc_update(trans, bucket, BTREE_TRIGGER_bucket_invalidate);
|
||||
ret = PTR_ERR_OR_ZERO(a);
|
||||
struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter,
|
||||
BTREE_ID_alloc, bucket,
|
||||
BTREE_ITER_cached);
|
||||
ret = bkey_err(alloc_k);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
struct bch_alloc_v4 a_convert;
|
||||
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
|
||||
|
||||
/* We expect harmless races here due to the btree write buffer: */
|
||||
if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(a->v))
|
||||
if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(*a))
|
||||
goto out;
|
||||
|
||||
BUG_ON(a->v.data_type != BCH_DATA_cached);
|
||||
BUG_ON(a->v.dirty_sectors);
|
||||
/*
|
||||
* Impossible since alloc_lru_idx_read() only returns nonzero if the
|
||||
* bucket is supposed to be on the cached bucket LRU (i.e.
|
||||
* BCH_DATA_cached)
|
||||
*
|
||||
* bch2_lru_validate() also disallows lru keys with lru_pos_time() == 0
|
||||
*/
|
||||
BUG_ON(a->data_type != BCH_DATA_cached);
|
||||
BUG_ON(a->dirty_sectors);
|
||||
|
||||
if (!a->v.cached_sectors)
|
||||
if (!a->cached_sectors)
|
||||
bch_err(c, "invalidating empty bucket, confused");
|
||||
|
||||
cached_sectors = a->v.cached_sectors;
|
||||
unsigned cached_sectors = a->cached_sectors;
|
||||
u8 gen = a->gen;
|
||||
|
||||
SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
|
||||
a->v.gen++;
|
||||
a->v.data_type = 0;
|
||||
a->v.dirty_sectors = 0;
|
||||
a->v.stripe_sectors = 0;
|
||||
a->v.cached_sectors = 0;
|
||||
a->v.io_time[READ] = bch2_current_io_time(c, READ);
|
||||
a->v.io_time[WRITE] = bch2_current_io_time(c, WRITE);
|
||||
|
||||
ret = bch2_trans_commit(trans, NULL, NULL,
|
||||
BCH_WATERMARK_btree|
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
ret = invalidate_one_bucket_by_bps(trans, ca, bucket, gen, last_flushed);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -2120,6 +2189,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
--*nr_to_invalidate;
|
||||
out:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
@ -2146,6 +2216,10 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
int ret = 0;
|
||||
|
||||
struct bkey_buf last_flushed;
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
|
||||
ret = bch2_btree_write_buffer_tryflush(trans);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -2170,7 +2244,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
|
||||
ret = invalidate_one_bucket(trans, ca, &iter, k, &last_flushed, &nr_to_invalidate);
|
||||
restart_err:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
@ -2183,6 +2257,7 @@ restart_err:
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||
}
|
||||
|
||||
|
@ -131,7 +131,7 @@ static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
|
||||
if (a.stripe)
|
||||
return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe;
|
||||
if (bch2_bucket_sectors_dirty(a))
|
||||
return data_type;
|
||||
return bucket_data_type(data_type);
|
||||
if (a.cached_sectors)
|
||||
return BCH_DATA_cached;
|
||||
if (BCH_ALLOC_V4_NEED_DISCARD(&a))
|
||||
|
@ -127,14 +127,14 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
|
||||
void bch2_open_bucket_write_error(struct bch_fs *c,
|
||||
struct open_buckets *obs,
|
||||
unsigned dev)
|
||||
unsigned dev, int err)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
unsigned i;
|
||||
|
||||
open_bucket_for_each(c, obs, ob, i)
|
||||
if (ob->dev == dev && ob->ec)
|
||||
bch2_ec_bucket_cancel(c, ob);
|
||||
bch2_ec_bucket_cancel(c, ob, err);
|
||||
}
|
||||
|
||||
static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
|
||||
@ -631,7 +631,7 @@ static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca,
|
||||
struct bch_dev_usage *usage)
|
||||
{
|
||||
u64 *v = stripe->next_alloc + ca->dev_idx;
|
||||
u64 free_space = dev_buckets_available(ca, BCH_WATERMARK_normal);
|
||||
u64 free_space = __dev_buckets_available(ca, *usage, BCH_WATERMARK_normal);
|
||||
u64 free_space_inv = free_space
|
||||
? div64_u64(1ULL << 48, free_space)
|
||||
: 1ULL << 48;
|
||||
|
@ -82,7 +82,7 @@ static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
|
||||
}
|
||||
|
||||
void bch2_open_bucket_write_error(struct bch_fs *,
|
||||
struct open_buckets *, unsigned);
|
||||
struct open_buckets *, unsigned, int);
|
||||
|
||||
void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "checksum.h"
|
||||
#include "disk_accounting.h"
|
||||
#include "error.h"
|
||||
#include "progress.h"
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
||||
@ -49,6 +50,8 @@ void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bke
|
||||
}
|
||||
|
||||
bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level);
|
||||
prt_str(out, " data_type=");
|
||||
bch2_prt_data_type(out, bp.v->data_type);
|
||||
prt_printf(out, " suboffset=%u len=%u gen=%u pos=",
|
||||
(u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
|
||||
bp.v->bucket_len,
|
||||
@ -93,6 +96,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
if (insert) {
|
||||
prt_printf(&buf, "existing backpointer found when inserting ");
|
||||
@ -122,17 +126,15 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
|
||||
|
||||
prt_printf(&buf, "for ");
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
|
||||
bch_err(c, "%s", buf.buf);
|
||||
}
|
||||
|
||||
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers &&
|
||||
__bch2_inconsistent_error(c, &buf))
|
||||
ret = -BCH_ERR_erofs_unfixed_errors;
|
||||
|
||||
bch_err(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
||||
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
|
||||
return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
|
||||
@ -207,11 +209,11 @@ static int backpointer_target_not_found(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
|
||||
prt_printf(&buf, "backpointer doesn't match %s it points to:\n",
|
||||
bp.v->level ? "btree node" : "extent");
|
||||
bch2_bkey_val_to_text(&buf, c, bp.s_c);
|
||||
|
||||
prt_printf(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, target_k);
|
||||
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k);
|
||||
@ -219,7 +221,7 @@ static int backpointer_target_not_found(struct btree_trans *trans,
|
||||
struct extent_ptr_decoded p;
|
||||
bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry)
|
||||
if (p.ptr.dev == bp.k->p.inode) {
|
||||
prt_printf(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
struct bkey_i_backpointer bp2;
|
||||
bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i));
|
||||
@ -440,12 +442,11 @@ found:
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
|
||||
prt_printf(&buf, "\n ");
|
||||
prt_printf(&buf, "extents pointing to same space, but first extent checksum bad:\n");
|
||||
bch2_btree_id_to_text(&buf, btree);
|
||||
prt_str(&buf, " ");
|
||||
bch2_bkey_val_to_text(&buf, c, extent);
|
||||
prt_printf(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
bch2_btree_id_to_text(&buf, o_btree);
|
||||
prt_str(&buf, " ");
|
||||
bch2_bkey_val_to_text(&buf, c, extent2);
|
||||
@ -518,11 +519,27 @@ check_existing_bp:
|
||||
if (!other_extent.k)
|
||||
goto missing;
|
||||
|
||||
rcu_read_lock();
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp->k.p.inode);
|
||||
if (ca) {
|
||||
struct bkey_ptrs_c other_extent_ptrs = bch2_bkey_ptrs_c(other_extent);
|
||||
bkey_for_each_ptr(other_extent_ptrs, ptr)
|
||||
if (ptr->dev == bp->k.p.inode &&
|
||||
dev_ptr_stale_rcu(ca, ptr)) {
|
||||
ret = drop_dev_and_update(trans, other_bp.v->btree_id,
|
||||
other_extent, bp->k.p.inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (bch2_extents_match(orig_k, other_extent)) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n ");
|
||||
prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n");
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
prt_str(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, other_extent);
|
||||
bch_err(c, "%s", buf.buf);
|
||||
|
||||
@ -561,20 +578,20 @@ check_existing_bp:
|
||||
}
|
||||
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bp->k.p.inode);
|
||||
prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n", bp->k.p.inode);
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
prt_str(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, other_extent);
|
||||
bch_err(c, "%s", buf.buf);
|
||||
ret = -BCH_ERR_fsck_repair_unimplemented;
|
||||
goto err;
|
||||
missing:
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "missing backpointer\n for: ");
|
||||
prt_str(&buf, "missing backpointer\nfor: ");
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
prt_printf(&buf, "\n want: ");
|
||||
prt_printf(&buf, "\nwant: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i));
|
||||
prt_printf(&buf, "\n got: ");
|
||||
prt_printf(&buf, "\ngot: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bp_k);
|
||||
|
||||
if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf))
|
||||
@ -594,9 +611,6 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
struct extent_ptr_decoded p;
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
if (p.ptr.cached)
|
||||
continue;
|
||||
|
||||
if (p.ptr.dev == BCH_SB_MEMBER_INVALID)
|
||||
continue;
|
||||
|
||||
@ -604,9 +618,11 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
|
||||
bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches);
|
||||
bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty);
|
||||
|
||||
bool stale = p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr));
|
||||
rcu_read_unlock();
|
||||
|
||||
if (check || empty) {
|
||||
if ((check || empty) && !stale) {
|
||||
struct bkey_i_backpointer bp;
|
||||
bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
|
||||
|
||||
@ -719,71 +735,6 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct progress_indicator_state {
|
||||
unsigned long next_print;
|
||||
u64 nodes_seen;
|
||||
u64 nodes_total;
|
||||
struct btree *last_node;
|
||||
};
|
||||
|
||||
static inline void progress_init(struct progress_indicator_state *s,
|
||||
struct bch_fs *c,
|
||||
u64 btree_id_mask)
|
||||
{
|
||||
memset(s, 0, sizeof(*s));
|
||||
|
||||
s->next_print = jiffies + HZ * 10;
|
||||
|
||||
for (unsigned i = 0; i < BTREE_ID_NR; i++) {
|
||||
if (!(btree_id_mask & BIT_ULL(i)))
|
||||
continue;
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_btree,
|
||||
.btree.id = i,
|
||||
};
|
||||
|
||||
u64 v;
|
||||
bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
|
||||
s->nodes_total += div64_ul(v, btree_sectors(c));
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool progress_update_p(struct progress_indicator_state *s)
|
||||
{
|
||||
bool ret = time_after_eq(jiffies, s->next_print);
|
||||
|
||||
if (ret)
|
||||
s->next_print = jiffies + HZ * 10;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void progress_update_iter(struct btree_trans *trans,
|
||||
struct progress_indicator_state *s,
|
||||
struct btree_iter *iter,
|
||||
const char *msg)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b = path_l(btree_iter_path(trans, iter))->b;
|
||||
|
||||
s->nodes_seen += b != s->last_node;
|
||||
s->last_node = b;
|
||||
|
||||
if (progress_update_p(s)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
unsigned percent = s->nodes_total
|
||||
? div64_u64(s->nodes_seen * 100, s->nodes_total)
|
||||
: 0;
|
||||
|
||||
prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ",
|
||||
msg, percent, s->nodes_seen, s->nodes_total);
|
||||
bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos));
|
||||
|
||||
bch_info(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
}
|
||||
|
||||
static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
struct extents_to_bp_state *s)
|
||||
{
|
||||
@ -791,7 +742,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
struct progress_indicator_state progress;
|
||||
int ret = 0;
|
||||
|
||||
progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
|
||||
bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
|
||||
|
||||
for (enum btree_id btree_id = 0;
|
||||
btree_id < btree_id_nr_alive(c);
|
||||
@ -810,7 +761,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
BTREE_ITER_prefetch);
|
||||
|
||||
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
||||
progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
|
||||
bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
|
||||
check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
}));
|
||||
@ -831,7 +782,7 @@ enum alloc_sector_counter {
|
||||
ALLOC_SECTORS_NR
|
||||
};
|
||||
|
||||
static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t)
|
||||
static int data_type_to_alloc_counter(enum bch_data_type t)
|
||||
{
|
||||
switch (t) {
|
||||
case BCH_DATA_btree:
|
||||
@ -840,9 +791,10 @@ static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t
|
||||
case BCH_DATA_cached:
|
||||
return ALLOC_cached;
|
||||
case BCH_DATA_stripe:
|
||||
case BCH_DATA_parity:
|
||||
return ALLOC_stripe;
|
||||
default:
|
||||
BUG();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -893,7 +845,11 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
if (bp.v->bucket_gen != a->gen)
|
||||
continue;
|
||||
|
||||
sectors[data_type_to_alloc_counter(bp.v->data_type)] += bp.v->bucket_len;
|
||||
int alloc_counter = data_type_to_alloc_counter(bp.v->data_type);
|
||||
if (alloc_counter < 0)
|
||||
continue;
|
||||
|
||||
sectors[alloc_counter] += bp.v->bucket_len;
|
||||
};
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
if (ret)
|
||||
@ -905,9 +861,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Cached pointers don't have backpointers: */
|
||||
|
||||
if (sectors[ALLOC_dirty] != a->dirty_sectors ||
|
||||
sectors[ALLOC_cached] != a->cached_sectors ||
|
||||
sectors[ALLOC_stripe] != a->stripe_sectors) {
|
||||
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) {
|
||||
ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
|
||||
@ -916,6 +871,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
}
|
||||
|
||||
if (sectors[ALLOC_dirty] > a->dirty_sectors ||
|
||||
sectors[ALLOC_cached] > a->cached_sectors ||
|
||||
sectors[ALLOC_stripe] > a->stripe_sectors) {
|
||||
ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
|
||||
-BCH_ERR_transaction_restart_nested;
|
||||
@ -923,7 +879,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
}
|
||||
|
||||
if (!sectors[ALLOC_dirty] &&
|
||||
!sectors[ALLOC_stripe])
|
||||
!sectors[ALLOC_stripe] &&
|
||||
!sectors[ALLOC_cached])
|
||||
__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty);
|
||||
else
|
||||
__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
|
||||
@ -1064,7 +1021,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
||||
* Can't allow devices to come/go/resize while we have bucket bitmaps
|
||||
* allocated
|
||||
*/
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
down_read(&c->state_lock);
|
||||
|
||||
for_each_member_device(c, ca) {
|
||||
BUG_ON(ca->bucket_backpointer_mismatches);
|
||||
@ -1149,6 +1106,7 @@ err_free_bitmaps:
|
||||
ca->bucket_backpointer_mismatches = NULL;
|
||||
}
|
||||
|
||||
up_read(&c->state_lock);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
@ -1210,11 +1168,11 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
|
||||
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
|
||||
bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
|
||||
|
||||
int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers,
|
||||
POS_MIN, BTREE_ITER_prefetch, k, ({
|
||||
progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
|
||||
bch2_progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
|
||||
check_one_backpointer(trans, start, end, k, &last_flushed);
|
||||
}));
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_H
|
||||
#define _BCACHEFS_BACKPOINTERS_H
|
||||
|
||||
#include "btree_cache.h"
|
||||
#include "btree_iter.h"
|
||||
@ -123,7 +123,12 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
|
||||
return BCH_DATA_btree;
|
||||
case KEY_TYPE_extent:
|
||||
case KEY_TYPE_reflink_v:
|
||||
return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user;
|
||||
if (p.has_ec)
|
||||
return BCH_DATA_stripe;
|
||||
if (p.ptr.cached)
|
||||
return BCH_DATA_cached;
|
||||
else
|
||||
return BCH_DATA_user;
|
||||
case KEY_TYPE_stripe: {
|
||||
const struct bch_extent_ptr *ptr = &entry->ptr;
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
@ -147,7 +152,20 @@ static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
|
||||
struct bkey_i_backpointer *bp)
|
||||
{
|
||||
bkey_backpointer_init(&bp->k_i);
|
||||
bp->k.p = POS(p.ptr.dev, ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset);
|
||||
bp->k.p.inode = p.ptr.dev;
|
||||
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
bp->k.p.offset = ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset;
|
||||
else {
|
||||
/*
|
||||
* Put stripe backpointers where they won't collide with the
|
||||
* extent backpointers within the stripe:
|
||||
*/
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
bp->k.p.offset = ((u64) (p.ptr.offset + le16_to_cpu(s.v->sectors)) <<
|
||||
MAX_EXTENT_COMPRESS_RATIO_SHIFT) - 1;
|
||||
}
|
||||
|
||||
bp->v = (struct bch_backpointer) {
|
||||
.btree_id = btree_id,
|
||||
.level = level,
|
||||
|
@ -203,6 +203,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/zstd.h>
|
||||
#include <linux/unicode.h>
|
||||
|
||||
#include "bcachefs_format.h"
|
||||
#include "btree_journal_iter_types.h"
|
||||
@ -444,6 +445,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(btree_node_sort) \
|
||||
x(btree_node_read) \
|
||||
x(btree_node_read_done) \
|
||||
x(btree_node_write) \
|
||||
x(btree_interior_update_foreground) \
|
||||
x(btree_interior_update_total) \
|
||||
x(btree_gc) \
|
||||
@ -456,6 +458,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(blocked_journal_low_on_space) \
|
||||
x(blocked_journal_low_on_pin) \
|
||||
x(blocked_journal_max_in_flight) \
|
||||
x(blocked_journal_max_open) \
|
||||
x(blocked_key_cache_flush) \
|
||||
x(blocked_allocate) \
|
||||
x(blocked_allocate_open_bucket) \
|
||||
@ -533,6 +536,7 @@ struct bch_dev {
|
||||
*/
|
||||
struct bch_member_cpu mi;
|
||||
atomic64_t errors[BCH_MEMBER_ERROR_NR];
|
||||
unsigned long write_errors_start;
|
||||
|
||||
__uuid_t uuid;
|
||||
char name[BDEVNAME_SIZE];
|
||||
@ -623,7 +627,8 @@ struct bch_dev {
|
||||
x(topology_error) \
|
||||
x(errors_fixed) \
|
||||
x(errors_not_fixed) \
|
||||
x(no_invalid_checks)
|
||||
x(no_invalid_checks) \
|
||||
x(discard_mount_opt_set) \
|
||||
|
||||
enum bch_fs_flags {
|
||||
#define x(n) BCH_FS_##n,
|
||||
@ -697,6 +702,8 @@ enum bch_write_ref {
|
||||
BCH_WRITE_REF_NR,
|
||||
};
|
||||
|
||||
#define BCH_FS_DEFAULT_UTF8_ENCODING UNICODE_AGE(12, 1, 0)
|
||||
|
||||
struct bch_fs {
|
||||
struct closure cl;
|
||||
|
||||
@ -781,6 +788,9 @@ struct bch_fs {
|
||||
u64 btrees_lost_data;
|
||||
} sb;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
struct unicode_map *cf_encoding;
|
||||
#endif
|
||||
|
||||
struct bch_sb_handle disk_sb;
|
||||
|
||||
@ -970,7 +980,6 @@ struct bch_fs {
|
||||
mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR];
|
||||
size_t zstd_workspace_size;
|
||||
|
||||
struct crypto_shash *sha256;
|
||||
struct crypto_sync_skcipher *chacha20;
|
||||
struct crypto_shash *poly1305;
|
||||
|
||||
@ -994,15 +1003,11 @@ struct bch_fs {
|
||||
wait_queue_head_t copygc_running_wq;
|
||||
|
||||
/* STRIPES: */
|
||||
GENRADIX(struct stripe) stripes;
|
||||
GENRADIX(struct gc_stripe) gc_stripes;
|
||||
|
||||
struct hlist_head ec_stripes_new[32];
|
||||
spinlock_t ec_stripes_new_lock;
|
||||
|
||||
ec_stripes_heap ec_stripes_heap;
|
||||
struct mutex ec_stripes_heap_lock;
|
||||
|
||||
/* ERASURE CODING */
|
||||
struct list_head ec_stripe_head_list;
|
||||
struct mutex ec_stripe_head_lock;
|
||||
|
@ -686,7 +686,12 @@ struct bch_sb_field_ext {
|
||||
x(inode_depth, BCH_VERSION(1, 17)) \
|
||||
x(persistent_inode_cursors, BCH_VERSION(1, 18)) \
|
||||
x(autofix_errors, BCH_VERSION(1, 19)) \
|
||||
x(directory_size, BCH_VERSION(1, 20))
|
||||
x(directory_size, BCH_VERSION(1, 20)) \
|
||||
x(cached_backpointers, BCH_VERSION(1, 21)) \
|
||||
x(stripe_backpointers, BCH_VERSION(1, 22)) \
|
||||
x(stripe_lru, BCH_VERSION(1, 23)) \
|
||||
x(casefolding, BCH_VERSION(1, 24)) \
|
||||
x(extent_flags, BCH_VERSION(1, 25))
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
@ -837,6 +842,7 @@ LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29);
|
||||
LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
|
||||
/* one free bit */
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
|
||||
LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
|
||||
@ -855,6 +861,8 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48);
|
||||
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
|
||||
struct bch_sb, flags[5], 48, 64);
|
||||
LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
|
||||
LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14);
|
||||
LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20);
|
||||
|
||||
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
|
||||
{
|
||||
@ -908,7 +916,8 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u
|
||||
x(journal_no_flush, 16) \
|
||||
x(alloc_v2, 17) \
|
||||
x(extents_across_btree_nodes, 18) \
|
||||
x(incompat_version_field, 19)
|
||||
x(incompat_version_field, 19) \
|
||||
x(casefolding, 20)
|
||||
|
||||
#define BCH_SB_FEATURES_ALWAYS \
|
||||
(BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \
|
||||
@ -922,7 +931,8 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u
|
||||
BIT_ULL(BCH_FEATURE_new_siphash)| \
|
||||
BIT_ULL(BCH_FEATURE_btree_ptr_v2)| \
|
||||
BIT_ULL(BCH_FEATURE_new_varint)| \
|
||||
BIT_ULL(BCH_FEATURE_journal_no_flush))
|
||||
BIT_ULL(BCH_FEATURE_journal_no_flush)| \
|
||||
BIT_ULL(BCH_FEATURE_incompat_version_field))
|
||||
|
||||
enum bch_sb_feature {
|
||||
#define x(f, n) BCH_FEATURE_##f,
|
||||
@ -1133,7 +1143,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
|
||||
x(log, 9) \
|
||||
x(overwrite, 10) \
|
||||
x(write_buffer_keys, 11) \
|
||||
x(datetime, 12)
|
||||
x(datetime, 12) \
|
||||
x(log_bkey, 13)
|
||||
|
||||
enum bch_jset_entry_type {
|
||||
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
|
||||
|
@ -234,6 +234,11 @@ enum bch_data_event {
|
||||
BCH_DATA_EVENT_NR = 1,
|
||||
};
|
||||
|
||||
enum data_progress_data_type_special {
|
||||
DATA_PROGRESS_DATA_TYPE_phys = 254,
|
||||
DATA_PROGRESS_DATA_TYPE_done = 255,
|
||||
};
|
||||
|
||||
struct bch_ioctl_data_progress {
|
||||
__u8 data_type;
|
||||
__u8 btree_id;
|
||||
|
@ -203,7 +203,7 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bch2_btree_lock_init(&b->c, 0);
|
||||
bch2_btree_lock_init(&b->c, 0, GFP_KERNEL);
|
||||
|
||||
__bch2_btree_node_to_freelist(bc, b);
|
||||
return b;
|
||||
@ -610,6 +610,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
btree_node_write_in_flight(b));
|
||||
|
||||
btree_node_data_free(bc, b);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
BUG_ON(!bch2_journal_error(&c->journal) &&
|
||||
@ -795,17 +796,18 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
|
||||
}
|
||||
|
||||
b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN);
|
||||
if (!b) {
|
||||
if (b) {
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_NOWAIT);
|
||||
} else {
|
||||
mutex_unlock(&bc->lock);
|
||||
bch2_trans_unlock(trans);
|
||||
b = __btree_node_mem_alloc(c, GFP_KERNEL);
|
||||
if (!b)
|
||||
goto err;
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL);
|
||||
mutex_lock(&bc->lock);
|
||||
}
|
||||
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0);
|
||||
|
||||
BUG_ON(!six_trylock_intent(&b->c.lock));
|
||||
BUG_ON(!six_trylock_write(&b->c.lock));
|
||||
|
||||
@ -1415,7 +1417,7 @@ void __bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, "%u", r->level);
|
||||
else
|
||||
prt_printf(out, "(unknown)");
|
||||
prt_printf(out, "\n ");
|
||||
prt_newline(out);
|
||||
|
||||
bch2_bkey_val_to_text(out, c, k);
|
||||
}
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "progress.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "reflink.h"
|
||||
#include "recovery.h"
|
||||
@ -212,15 +213,15 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree *
|
||||
|
||||
prt_printf(&buf, " at ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_printf(&buf, ":\n parent: ");
|
||||
prt_printf(&buf, ":\nparent: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
if (prev) {
|
||||
prt_printf(&buf, "\n prev: ");
|
||||
prt_printf(&buf, "\nprev: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key));
|
||||
}
|
||||
|
||||
prt_str(&buf, "\n next: ");
|
||||
prt_str(&buf, "\nnext: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key));
|
||||
|
||||
if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */
|
||||
@ -279,12 +280,12 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b,
|
||||
if (bpos_eq(child->key.k.p, b->key.k.p))
|
||||
return 0;
|
||||
|
||||
prt_printf(&buf, " at ");
|
||||
prt_printf(&buf, "\nat: ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_printf(&buf, ":\n parent: ");
|
||||
prt_printf(&buf, "\nparent: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
prt_str(&buf, "\n child: ");
|
||||
prt_str(&buf, "\nchild: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key));
|
||||
|
||||
if (mustfix_fsck_err(trans, btree_node_topology_bad_max_key,
|
||||
@ -350,8 +351,7 @@ again:
|
||||
|
||||
if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
|
||||
trans, btree_node_read_error,
|
||||
"Topology repair: unreadable btree node at\n"
|
||||
" %s",
|
||||
"Topology repair: unreadable btree node at\n%s",
|
||||
buf.buf)) {
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
cur = NULL;
|
||||
@ -611,7 +611,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
|
||||
if (fsck_err_on(btree_id != BTREE_ID_accounting &&
|
||||
k.k->bversion.lo > atomic64_read(&c->key_version),
|
||||
trans, bkey_version_in_future,
|
||||
"key version number higher than recorded %llu\n %s",
|
||||
"key version number higher than recorded %llu\n%s",
|
||||
atomic64_read(&c->key_version),
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
|
||||
atomic64_set(&c->key_version, k.k->bversion.lo);
|
||||
@ -619,7 +619,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
|
||||
|
||||
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),
|
||||
trans, btree_bitmap_not_marked,
|
||||
"btree ptr not marked in member info btree allocated bitmap\n %s",
|
||||
"btree ptr not marked in member info btree allocated bitmap\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k),
|
||||
buf.buf))) {
|
||||
@ -656,7 +656,9 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool initial)
|
||||
static int bch2_gc_btree(struct btree_trans *trans,
|
||||
struct progress_indicator_state *progress,
|
||||
enum btree_id btree, bool initial)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
unsigned target_depth = btree_node_type_has_triggers(__btree_node_type(0, btree)) ? 0 : 1;
|
||||
@ -673,6 +675,7 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in
|
||||
BTREE_ITER_prefetch);
|
||||
|
||||
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
||||
bch2_progress_update_iter(trans, progress, &iter, "check_allocations");
|
||||
gc_pos_set(c, gc_pos_btree(btree, level, k.k->p));
|
||||
bch2_gc_mark_key(trans, btree, level, &prev, &iter, k, initial);
|
||||
}));
|
||||
@ -717,22 +720,24 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
|
||||
static int bch2_gc_btrees(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
enum btree_id ids[BTREE_ID_NR];
|
||||
struct printbuf buf = PRINTBUF;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
struct progress_indicator_state progress;
|
||||
bch2_progress_init(&progress, c, ~0ULL);
|
||||
|
||||
enum btree_id ids[BTREE_ID_NR];
|
||||
for (unsigned i = 0; i < BTREE_ID_NR; i++)
|
||||
ids[i] = i;
|
||||
bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
|
||||
|
||||
for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
||||
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
||||
unsigned btree = i < BTREE_ID_NR ? ids[i] : i;
|
||||
|
||||
if (IS_ERR_OR_NULL(bch2_btree_id_root(c, btree)->b))
|
||||
continue;
|
||||
|
||||
ret = bch2_gc_btree(trans, btree, true);
|
||||
ret = bch2_gc_btree(trans, &progress, btree, true);
|
||||
}
|
||||
|
||||
printbuf_exit(&buf);
|
||||
@ -1015,8 +1020,7 @@ int bch2_check_allocations(struct bch_fs *c)
|
||||
{
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
down_read(&c->state_lock);
|
||||
down_write(&c->gc_lock);
|
||||
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
@ -1054,6 +1058,7 @@ out:
|
||||
percpu_up_write(&c->mark_lock);
|
||||
|
||||
up_write(&c->gc_lock);
|
||||
up_read(&c->state_lock);
|
||||
|
||||
/*
|
||||
* At startup, allocations can happen directly instead of via the
|
||||
|
@ -525,8 +525,6 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, "at btree ");
|
||||
bch2_btree_pos_to_text(out, c, b);
|
||||
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_printf(out, "\nnode offset %u/%u",
|
||||
b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)));
|
||||
if (i)
|
||||
@ -550,23 +548,7 @@ static int __btree_err(int ret,
|
||||
enum bch_sb_error_id err_type,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct printbuf out = PRINTBUF;
|
||||
bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes;
|
||||
va_list args;
|
||||
|
||||
btree_err_msg(&out, c, ca, b, i, k, b->written, write);
|
||||
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(&out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (write == WRITE) {
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = c->opts.errors == BCH_ON_ERROR_continue
|
||||
? 0
|
||||
: -BCH_ERR_fsck_errors_not_fixed;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
|
||||
ret = -BCH_ERR_btree_node_read_err_fixable;
|
||||
@ -576,6 +558,29 @@ static int __btree_err(int ret,
|
||||
if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable)
|
||||
bch2_sb_error_count(c, err_type);
|
||||
|
||||
struct printbuf out = PRINTBUF;
|
||||
if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) {
|
||||
printbuf_indent_add_nextline(&out, 2);
|
||||
#ifdef BCACHEFS_LOG_PREFIX
|
||||
prt_printf(&out, bch2_log_msg(c, ""));
|
||||
#endif
|
||||
}
|
||||
|
||||
btree_err_msg(&out, c, ca, b, i, k, b->written, write);
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(&out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (write == WRITE) {
|
||||
prt_str(&out, ", ");
|
||||
ret = __bch2_inconsistent_error(c, &out)
|
||||
? -BCH_ERR_fsck_errors_not_fixed
|
||||
: 0;
|
||||
silent = false;
|
||||
}
|
||||
|
||||
switch (ret) {
|
||||
case -BCH_ERR_btree_node_read_err_fixable:
|
||||
ret = !silent
|
||||
@ -585,25 +590,21 @@ static int __btree_err(int ret,
|
||||
ret != -BCH_ERR_fsck_ignore)
|
||||
goto fsck_err;
|
||||
ret = -BCH_ERR_fsck_fix;
|
||||
break;
|
||||
case -BCH_ERR_btree_node_read_err_want_retry:
|
||||
case -BCH_ERR_btree_node_read_err_must_retry:
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
break;
|
||||
goto out;
|
||||
case -BCH_ERR_btree_node_read_err_bad_node:
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = bch2_topology_error(c);
|
||||
prt_str(&out, ", ");
|
||||
ret = __bch2_topology_error(c, &out);
|
||||
if (ret)
|
||||
silent = false;
|
||||
break;
|
||||
case -BCH_ERR_btree_node_read_err_incompatible:
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed;
|
||||
silent = false;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (!silent)
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&out);
|
||||
@ -817,7 +818,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
-BCH_ERR_btree_node_read_err_bad_node,
|
||||
c, ca, b, i, NULL,
|
||||
btree_node_bad_format,
|
||||
"invalid bkey format: %s\n %s", buf1.buf,
|
||||
"invalid bkey format: %s\n%s", buf1.buf,
|
||||
(printbuf_reset(&buf2),
|
||||
bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf));
|
||||
printbuf_reset(&buf1);
|
||||
@ -997,7 +998,7 @@ drop_this_key:
|
||||
}
|
||||
got_good_key:
|
||||
le16_add_cpu(&i->u64s, -next_good_key);
|
||||
memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k);
|
||||
memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k);
|
||||
set_btree_node_need_rewrite(b);
|
||||
}
|
||||
fsck_err:
|
||||
@ -1187,7 +1188,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
le64_to_cpu(i->journal_seq),
|
||||
b->written, b->written + sectors, ptr_written);
|
||||
|
||||
b->written += sectors;
|
||||
b->written = min(b->written + sectors, btree_sectors(c));
|
||||
|
||||
if (blacklisted && !first)
|
||||
continue;
|
||||
@ -1329,6 +1330,7 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
bch_info(c, "retrying read");
|
||||
ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ);
|
||||
rb->have_ioref = ca != NULL;
|
||||
rb->start_time = local_clock();
|
||||
bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
|
||||
bio->bi_iter.bi_sector = rb->pick.ptr.offset;
|
||||
bio->bi_iter.bi_size = btree_buf_bytes(b);
|
||||
@ -1339,17 +1341,22 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
} else {
|
||||
bio->bi_status = BLK_STS_REMOVED;
|
||||
}
|
||||
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
|
||||
rb->start_time, !bio->bi_status);
|
||||
start:
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch2_dev_io_err_on(ca && bio->bi_status, ca, BCH_MEMBER_ERROR_read,
|
||||
"btree read error %s for %s",
|
||||
bch2_blk_status_to_str(bio->bi_status), buf.buf);
|
||||
|
||||
if (ca && bio->bi_status)
|
||||
bch_err_dev_ratelimited(ca,
|
||||
"btree read error %s for %s",
|
||||
bch2_blk_status_to_str(bio->bi_status), buf.buf);
|
||||
if (rb->have_ioref)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
rb->have_ioref = false;
|
||||
|
||||
bch2_mark_io_failure(&failed, &rb->pick);
|
||||
bch2_mark_io_failure(&failed, &rb->pick, false);
|
||||
|
||||
can_retry = bch2_bkey_pick_read_device(c,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
@ -1401,12 +1408,11 @@ static void btree_node_read_endio(struct bio *bio)
|
||||
struct btree_read_bio *rb =
|
||||
container_of(bio, struct btree_read_bio, bio);
|
||||
struct bch_fs *c = rb->c;
|
||||
struct bch_dev *ca = rb->have_ioref
|
||||
? bch2_dev_have_ref(c, rb->pick.ptr.dev) : NULL;
|
||||
|
||||
if (rb->have_ioref) {
|
||||
struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev);
|
||||
|
||||
bch2_latency_acct(ca, rb->start_time, READ);
|
||||
}
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read,
|
||||
rb->start_time, !bio->bi_status);
|
||||
|
||||
queue_work(c->btree_read_complete_wq, &rb->work);
|
||||
}
|
||||
@ -2016,7 +2022,7 @@ static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
|
||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||
}
|
||||
|
||||
static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
static void __btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time)
|
||||
{
|
||||
struct btree_write *w = btree_prev_write(b);
|
||||
unsigned long old, new;
|
||||
@ -2024,6 +2030,9 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
|
||||
bch2_btree_complete_write(c, b, w);
|
||||
|
||||
if (start_time)
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_write], start_time);
|
||||
|
||||
old = READ_ONCE(b->flags);
|
||||
do {
|
||||
new = old;
|
||||
@ -2054,7 +2063,7 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
|
||||
}
|
||||
|
||||
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
@ -2062,7 +2071,7 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
|
||||
/* we don't need transaction context anymore after we got the lock. */
|
||||
bch2_trans_put(trans);
|
||||
__btree_node_write_done(c, b);
|
||||
__btree_node_write_done(c, b, start_time);
|
||||
six_unlock_read(&b->c.lock);
|
||||
}
|
||||
|
||||
@ -2072,6 +2081,7 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
container_of(work, struct btree_write_bio, work);
|
||||
struct bch_fs *c = wbio->wbio.c;
|
||||
struct btree *b = wbio->wbio.bio.bi_private;
|
||||
u64 start_time = wbio->start_time;
|
||||
int ret = 0;
|
||||
|
||||
btree_bounce_free(c,
|
||||
@ -2104,12 +2114,18 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
}
|
||||
out:
|
||||
bio_put(&wbio->wbio.bio);
|
||||
btree_node_write_done(c, b);
|
||||
btree_node_write_done(c, b, start_time);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c,
|
||||
"writing btree node: %s", bch2_err_str(ret));
|
||||
|
||||
if (!bch2_err_matches(ret, EROFS)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret));
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch2_fs_fatal_error(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2122,16 +2138,21 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
struct bch_fs *c = wbio->c;
|
||||
struct btree *b = wbio->bio.bi_private;
|
||||
struct bch_dev *ca = wbio->have_ioref ? bch2_dev_have_ref(c, wbio->dev) : NULL;
|
||||
unsigned long flags;
|
||||
|
||||
if (wbio->have_ioref)
|
||||
bch2_latency_acct(ca, wbio->submit_time, WRITE);
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_write,
|
||||
wbio->submit_time, !bio->bi_status);
|
||||
|
||||
if (!ca ||
|
||||
bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
|
||||
"btree write error: %s",
|
||||
bch2_blk_status_to_str(bio->bi_status)) ||
|
||||
bch2_meta_write_fault("btree")) {
|
||||
if (ca && bio->bi_status) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_printf(&buf, "btree write error: %s\n ",
|
||||
bch2_blk_status_to_str(bio->bi_status));
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch_err_dev_ratelimited(ca, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
if (bio->bi_status) {
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&c->btree_write_error_lock, flags);
|
||||
bch2_dev_list_add_dev(&orig->failed, wbio->dev);
|
||||
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
|
||||
@ -2208,6 +2229,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
||||
bool validate_before_checksum = false;
|
||||
enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK;
|
||||
void *data;
|
||||
u64 start_time = local_clock();
|
||||
int ret;
|
||||
|
||||
if (flags & BTREE_WRITE_ALREADY_STARTED)
|
||||
@ -2416,6 +2438,7 @@ do_write:
|
||||
wbio->data = data;
|
||||
wbio->data_bytes = bytes;
|
||||
wbio->sector_offset = b->written;
|
||||
wbio->start_time = start_time;
|
||||
wbio->wbio.c = c;
|
||||
wbio->wbio.used_mempool = used_mempool;
|
||||
wbio->wbio.first_btree_write = !b->written;
|
||||
@ -2443,7 +2466,7 @@ err:
|
||||
b->written += sectors_to_write;
|
||||
nowrite:
|
||||
btree_bounce_free(c, bytes, used_mempool, data);
|
||||
__btree_node_write_done(c, b);
|
||||
__btree_node_write_done(c, b, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -52,6 +52,7 @@ struct btree_write_bio {
|
||||
void *data;
|
||||
unsigned data_bytes;
|
||||
unsigned sector_offset;
|
||||
u64 start_time;
|
||||
struct bch_write_bio wbio;
|
||||
};
|
||||
|
||||
|
@ -562,20 +562,6 @@ static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
|
||||
bch2_btree_node_iter_peek_all(&l->iter, l->b));
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_path_level *l,
|
||||
struct bkey *u)
|
||||
{
|
||||
struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
|
||||
bch2_btree_node_iter_peek(&l->iter, l->b));
|
||||
|
||||
path->pos = k.k ? k.k->p : l->b->key.k.p;
|
||||
trans->paths_sorted = false;
|
||||
bch2_btree_path_verify_level(trans, path, l - path->l);
|
||||
return k;
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_path_level *l,
|
||||
@ -1501,22 +1487,14 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
|
||||
|
||||
for (struct jset_entry *e = trans->journal_entries;
|
||||
e != btree_trans_journal_entries_top(trans);
|
||||
e = vstruct_next(e))
|
||||
e = vstruct_next(e)) {
|
||||
bch2_journal_entry_to_text(buf, trans->c, e);
|
||||
prt_newline(buf);
|
||||
}
|
||||
|
||||
printbuf_indent_sub(buf, 2);
|
||||
}
|
||||
|
||||
noinline __cold
|
||||
void bch2_dump_trans_updates(struct btree_trans *trans)
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_trans_updates_to_text(&buf, trans);
|
||||
bch2_print_str(trans->c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx)
|
||||
{
|
||||
struct btree_path *path = trans->paths + path_idx;
|
||||
@ -2357,6 +2335,12 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX));
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret)) {
|
||||
k = bkey_s_c_err(ret);
|
||||
goto out_no_locked;
|
||||
}
|
||||
|
||||
if (iter->update_path) {
|
||||
bch2_path_put_nokeep(trans, iter->update_path,
|
||||
iter->flags & BTREE_ITER_intent);
|
||||
@ -2622,6 +2606,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN));
|
||||
|
||||
int ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret)) {
|
||||
k = bkey_s_c_err(ret);
|
||||
goto out_no_locked;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
k = __bch2_btree_iter_peek_prev(iter, search_key);
|
||||
if (unlikely(!k.k))
|
||||
@ -2749,6 +2739,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache));
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret)) {
|
||||
k = bkey_s_c_err(ret);
|
||||
goto out_no_locked;
|
||||
}
|
||||
|
||||
/* extents can't span inode numbers: */
|
||||
if ((iter->flags & BTREE_ITER_is_extents) &&
|
||||
unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
|
||||
@ -3106,6 +3102,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
|
||||
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
struct btree_transaction_stats *s = btree_trans_stats(trans);
|
||||
s->max_mem = max(s->max_mem, new_bytes);
|
||||
|
||||
@ -3163,7 +3163,8 @@ out_new_mem:
|
||||
|
||||
if (old_bytes) {
|
||||
trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
|
||||
return ERR_PTR(btree_trans_restart_ip(trans,
|
||||
BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
|
||||
}
|
||||
out_change_top:
|
||||
p = trans->mem + trans->mem_top;
|
||||
@ -3271,6 +3272,14 @@ u32 bch2_trans_begin(struct btree_trans *trans)
|
||||
|
||||
trans->last_begin_ip = _RET_IP_;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
if (trans->restarted) {
|
||||
trans->restart_count_this_trans++;
|
||||
} else {
|
||||
trans->restart_count_this_trans = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
trans_set_locked(trans, false);
|
||||
|
||||
if (trans->restarted) {
|
||||
|
@ -9,7 +9,6 @@
|
||||
void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
|
||||
void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t);
|
||||
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
|
||||
void bch2_dump_trans_updates(struct btree_trans *);
|
||||
void bch2_dump_trans_paths_updates(struct btree_trans *);
|
||||
|
||||
static inline int __bkey_err(const struct bkey *k)
|
||||
@ -335,13 +334,20 @@ static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_tra
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip)
|
||||
static int btree_trans_restart_foreign_task(struct btree_trans *trans, int err, unsigned long ip)
|
||||
{
|
||||
BUG_ON(err <= 0);
|
||||
BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart));
|
||||
|
||||
trans->restarted = err;
|
||||
trans->last_restarted_ip = ip;
|
||||
return -err;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip)
|
||||
{
|
||||
btree_trans_restart_foreign_task(trans, err, ip);
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_exit(&trans->last_restarted_trace);
|
||||
bch2_save_backtrace(&trans->last_restarted_trace, current, 0, GFP_NOWAIT);
|
||||
@ -355,6 +361,18 @@ static int btree_trans_restart(struct btree_trans *trans, int err)
|
||||
return btree_trans_restart_ip(trans, err, _THIS_IP_);
|
||||
}
|
||||
|
||||
static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
if (!(ktime_get_ns() & ~(~0ULL << min(63, (10 + trans->restart_count_this_trans))))) {
|
||||
trace_and_count(trans->c, trans_restart_injected, trans, ip);
|
||||
return btree_trans_restart_ip(trans,
|
||||
BCH_ERR_transaction_restart_fault_inject, ip);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool bch2_btree_node_upgrade(struct btree_trans *,
|
||||
struct btree_path *, unsigned);
|
||||
|
||||
@ -739,7 +757,7 @@ transaction_restart: \
|
||||
if (!_ret2) \
|
||||
bch2_trans_verify_not_restarted(_trans, _restart_count);\
|
||||
\
|
||||
_ret2 ?: trans_was_restarted(_trans, _restart_count); \
|
||||
_ret2 ?: trans_was_restarted(_trans, _orig_restart_count); \
|
||||
})
|
||||
|
||||
#define for_each_btree_key_max_continue(_trans, _iter, \
|
||||
|
@ -644,6 +644,8 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
|
||||
*/
|
||||
static int journal_sort_key_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
cond_resched();
|
||||
|
||||
const struct journal_key *l = _l;
|
||||
const struct journal_key *r = _r;
|
||||
|
||||
|
@ -156,7 +156,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
|
||||
}
|
||||
|
||||
if (ck) {
|
||||
bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0);
|
||||
bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL);
|
||||
ck->c.cached = true;
|
||||
goto lock;
|
||||
}
|
||||
|
@ -7,9 +7,10 @@
|
||||
static struct lock_class_key bch2_btree_node_lock_key;
|
||||
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *b,
|
||||
enum six_lock_init_flags flags)
|
||||
enum six_lock_init_flags flags,
|
||||
gfp_t gfp)
|
||||
{
|
||||
__six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags);
|
||||
__six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags, gfp);
|
||||
lockdep_set_notrack_class(&b->lock);
|
||||
}
|
||||
|
||||
@ -90,10 +91,10 @@ static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
for (i = g->g; i != g->g + g->nr; i++) {
|
||||
struct task_struct *task = i->trans->locking_wait.task;
|
||||
struct task_struct *task = READ_ONCE(i->trans->locking_wait.task);
|
||||
if (i != g->g)
|
||||
prt_str(out, "<- ");
|
||||
prt_printf(out, "%u ", task ?task->pid : 0);
|
||||
prt_printf(out, "%u ", task ? task->pid : 0);
|
||||
}
|
||||
prt_newline(out);
|
||||
}
|
||||
@ -171,7 +172,9 @@ static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
|
||||
{
|
||||
if (i == g->g) {
|
||||
trace_would_deadlock(g, i->trans);
|
||||
return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
|
||||
return btree_trans_restart_foreign_task(i->trans,
|
||||
BCH_ERR_transaction_restart_would_deadlock,
|
||||
_THIS_IP_);
|
||||
} else {
|
||||
i->trans->lock_must_abort = true;
|
||||
wake_up_process(i->trans->locking_wait.task);
|
||||
|
@ -13,7 +13,7 @@
|
||||
#include "btree_iter.h"
|
||||
#include "six.h"
|
||||
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags);
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags, gfp_t gfp);
|
||||
|
||||
void bch2_trans_unlock_noassert(struct btree_trans *);
|
||||
void bch2_trans_unlock_write(struct btree_trans *);
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/min_heap.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
struct find_btree_nodes_worker {
|
||||
@ -166,11 +167,17 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
|
||||
bio->bi_iter.bi_sector = offset;
|
||||
bch2_bio_map(bio, bn, PAGE_SIZE);
|
||||
|
||||
u64 submit_time = local_clock();
|
||||
submit_bio_wait(bio);
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
|
||||
"IO error in try_read_btree_node() at %llu: %s",
|
||||
offset, bch2_blk_status_to_str(bio->bi_status)))
|
||||
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
|
||||
|
||||
if (bio->bi_status) {
|
||||
bch_err_dev_ratelimited(ca,
|
||||
"IO error in try_read_btree_node() at %llu: %s",
|
||||
offset, bch2_blk_status_to_str(bio->bi_status));
|
||||
return;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(bn->magic) != bset_magic(c))
|
||||
return;
|
||||
@ -264,7 +271,7 @@ static int read_btree_nodes_worker(void *p)
|
||||
err:
|
||||
bio_put(bio);
|
||||
free_page((unsigned long) buf);
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
closure_put(w->cl);
|
||||
kfree(w);
|
||||
return 0;
|
||||
@ -283,32 +290,32 @@ static int read_btree_nodes(struct find_btree_nodes *f)
|
||||
continue;
|
||||
|
||||
struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
|
||||
struct task_struct *t;
|
||||
|
||||
if (!w) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
closure_get(&cl);
|
||||
w->cl = &cl;
|
||||
w->f = f;
|
||||
w->ca = ca;
|
||||
|
||||
t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
|
||||
struct task_struct *t = kthread_create(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
|
||||
ret = PTR_ERR_OR_ZERO(t);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
closure_put(&cl);
|
||||
f->ret = ret;
|
||||
bch_err(c, "error starting kthread: %i", ret);
|
||||
kfree(w);
|
||||
bch_err_msg(c, ret, "starting kthread");
|
||||
break;
|
||||
}
|
||||
|
||||
closure_get(&cl);
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
wake_up_process(t);
|
||||
}
|
||||
err:
|
||||
closure_sync(&cl);
|
||||
while (closure_sync_timeout(&cl, sysctl_hung_task_timeout_secs * HZ / 2))
|
||||
;
|
||||
return f->ret ?: ret;
|
||||
}
|
||||
|
||||
@ -572,10 +579,12 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
|
||||
|
||||
found_btree_node_to_key(&tmp.k, &n);
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
|
||||
bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
if (c->opts.verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
|
||||
bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k),
|
||||
(struct bkey_validate_context) {
|
||||
|
@ -164,6 +164,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
|
||||
EBUG_ON(bpos_gt(insert->k.p, b->data->max_key));
|
||||
EBUG_ON(insert->k.u64s > bch2_btree_keys_u64s_remaining(b));
|
||||
EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos));
|
||||
kmsan_check_memory(insert, bkey_bytes(&insert->k));
|
||||
|
||||
k = bch2_btree_node_iter_peek_all(node_iter, b);
|
||||
if (k && bkey_cmp_left_packed(b, k, &insert->k.p))
|
||||
@ -336,6 +337,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
|
||||
BUG_ON(i->cached != path->cached);
|
||||
BUG_ON(i->level != path->level);
|
||||
BUG_ON(i->btree_id != path->btree_id);
|
||||
BUG_ON(i->bkey_type != __btree_node_type(path->level, path->btree_id));
|
||||
EBUG_ON(!i->level &&
|
||||
btree_type_has_snapshots(i->btree_id) &&
|
||||
!(i->flags & BTREE_UPDATE_internal_snapshot_node) &&
|
||||
@ -517,69 +519,45 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
|
||||
}
|
||||
}
|
||||
|
||||
static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
|
||||
unsigned *btree_id_updates_start)
|
||||
{
|
||||
bool trans_trigger_run;
|
||||
|
||||
/*
|
||||
* Running triggers will append more updates to the list of updates as
|
||||
* we're walking it:
|
||||
*/
|
||||
do {
|
||||
trans_trigger_run = false;
|
||||
|
||||
for (unsigned i = *btree_id_updates_start;
|
||||
i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
|
||||
i++) {
|
||||
if (trans->updates[i].btree_id < btree_id) {
|
||||
*btree_id_updates_start = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
int ret = run_one_trans_trigger(trans, trans->updates + i);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
trans_trigger_run = true;
|
||||
}
|
||||
} while (trans_trigger_run);
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
BUG_ON(!(i->flags & BTREE_TRIGGER_norun) &&
|
||||
i->btree_id == btree_id &&
|
||||
btree_node_type_has_trans_triggers(i->bkey_type) &&
|
||||
(!i->insert_trigger_run || !i->overwrite_trigger_run));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
|
||||
{
|
||||
unsigned btree_id = 0, btree_id_updates_start = 0;
|
||||
int ret = 0;
|
||||
unsigned sort_id_start = 0;
|
||||
|
||||
/*
|
||||
*
|
||||
* For a given btree, this algorithm runs insert triggers before
|
||||
* overwrite triggers: this is so that when extents are being moved
|
||||
* (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before
|
||||
* they are re-added.
|
||||
*/
|
||||
for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
|
||||
if (btree_id == BTREE_ID_alloc)
|
||||
continue;
|
||||
while (sort_id_start < trans->nr_updates) {
|
||||
unsigned i, sort_id = trans->updates[sort_id_start].sort_order;
|
||||
bool trans_trigger_run;
|
||||
|
||||
ret = run_btree_triggers(trans, btree_id, &btree_id_updates_start);
|
||||
if (ret)
|
||||
return ret;
|
||||
/*
|
||||
* For a given btree, this algorithm runs insert triggers before
|
||||
* overwrite triggers: this is so that when extents are being
|
||||
* moved (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop
|
||||
* references before they are re-added.
|
||||
*
|
||||
* Running triggers will append more updates to the list of
|
||||
* updates as we're walking it:
|
||||
*/
|
||||
do {
|
||||
trans_trigger_run = false;
|
||||
|
||||
for (i = sort_id_start;
|
||||
i < trans->nr_updates && trans->updates[i].sort_order <= sort_id;
|
||||
i++) {
|
||||
if (trans->updates[i].sort_order < sort_id) {
|
||||
sort_id_start = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
int ret = run_one_trans_trigger(trans, trans->updates + i);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
trans_trigger_run = true;
|
||||
}
|
||||
} while (trans_trigger_run);
|
||||
|
||||
sort_id_start = i;
|
||||
}
|
||||
|
||||
btree_id_updates_start = 0;
|
||||
ret = run_btree_triggers(trans, BTREE_ID_alloc, &btree_id_updates_start);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans_for_each_update(trans, i)
|
||||
BUG_ON(!(i->flags & BTREE_TRIGGER_norun) &&
|
||||
@ -903,6 +881,24 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
struct bch_fs *c = trans->c;
|
||||
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_journal_res_blocked)) {
|
||||
/*
|
||||
* XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
|
||||
* flag
|
||||
*/
|
||||
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
|
||||
watermark < BCH_WATERMARK_reclaim) {
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_trans_journal_res_get(trans,
|
||||
(flags & BCH_WATERMARK_MASK)|
|
||||
JOURNAL_RES_GET_CHECK));
|
||||
goto out;
|
||||
}
|
||||
|
||||
switch (ret) {
|
||||
case -BCH_ERR_btree_insert_btree_node_full:
|
||||
ret = bch2_btree_split_leaf(trans, i->path, flags);
|
||||
@ -914,22 +910,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_accounting_update_sb(trans));
|
||||
break;
|
||||
case -BCH_ERR_journal_res_get_blocked:
|
||||
/*
|
||||
* XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
|
||||
* flag
|
||||
*/
|
||||
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
|
||||
watermark < BCH_WATERMARK_reclaim) {
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_trans_journal_res_get(trans,
|
||||
(flags & BCH_WATERMARK_MASK)|
|
||||
JOURNAL_RES_GET_CHECK));
|
||||
break;
|
||||
case -BCH_ERR_btree_insert_need_journal_reclaim:
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
@ -950,7 +930,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
BUG_ON(ret >= 0);
|
||||
break;
|
||||
}
|
||||
|
||||
out:
|
||||
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
|
||||
|
||||
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) &&
|
||||
@ -999,6 +979,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret))
|
||||
goto out_reset;
|
||||
|
||||
if (!trans->nr_updates &&
|
||||
!trans->journal_entries_u64s)
|
||||
goto out_reset;
|
||||
|
@ -423,6 +423,7 @@ static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
|
||||
|
||||
struct btree_insert_entry {
|
||||
unsigned flags;
|
||||
u8 sort_order;
|
||||
u8 bkey_type;
|
||||
enum btree_id btree_id:8;
|
||||
u8 level:4;
|
||||
@ -509,6 +510,9 @@ struct btree_trans {
|
||||
bool notrace_relock_fail:1;
|
||||
enum bch_errcode restarted:16;
|
||||
u32 restart_count;
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
u32 restart_count_this_trans;
|
||||
#endif
|
||||
|
||||
u64 last_begin_time;
|
||||
unsigned long last_begin_ip;
|
||||
@ -850,6 +854,18 @@ static inline bool btree_type_uses_write_buffer(enum btree_id btree)
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
static inline u8 btree_trigger_order(enum btree_id btree)
|
||||
{
|
||||
switch (btree) {
|
||||
case BTREE_ID_alloc:
|
||||
return U8_MAX;
|
||||
case BTREE_ID_stripes:
|
||||
return U8_MAX - 1;
|
||||
default:
|
||||
return btree;
|
||||
}
|
||||
}
|
||||
|
||||
struct btree_root {
|
||||
struct btree *b;
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
|
||||
const struct btree_insert_entry *r)
|
||||
{
|
||||
return cmp_int(l->btree_id, r->btree_id) ?:
|
||||
return cmp_int(l->sort_order, r->sort_order) ?:
|
||||
cmp_int(l->cached, r->cached) ?:
|
||||
-cmp_int(l->level, r->level) ?:
|
||||
bpos_cmp(l->k->k.p, r->k->k.p);
|
||||
@ -397,6 +397,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx,
|
||||
|
||||
n = (struct btree_insert_entry) {
|
||||
.flags = flags,
|
||||
.sort_order = btree_trigger_order(path->btree_id),
|
||||
.bkey_type = __btree_node_type(path->level, path->btree_id),
|
||||
.btree_id = path->btree_id,
|
||||
.level = path->level,
|
||||
@ -511,6 +512,8 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
|
||||
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
|
||||
{
|
||||
kmsan_check_memory(k, bkey_bytes(&k->k));
|
||||
|
||||
btree_path_idx_t path_idx = iter->update_path ?: iter->path;
|
||||
int ret;
|
||||
|
||||
@ -843,6 +846,19 @@ int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_trans_log_bkey(struct btree_trans *trans, enum btree_id btree,
|
||||
unsigned level, struct bkey_i *k)
|
||||
{
|
||||
struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s));
|
||||
int ret = PTR_ERR_OR_ZERO(e);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
journal_entry_init(e, BCH_JSET_ENTRY_log_bkey, btree, level, k->k.u64s);
|
||||
bkey_copy(e->start, k);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__printf(3, 0)
|
||||
static int
|
||||
__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
|
||||
|
@ -126,10 +126,20 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
|
||||
|
||||
int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);
|
||||
|
||||
int bch2_btree_write_buffer_insert_err(struct btree_trans *,
|
||||
enum btree_id, struct bkey_i *);
|
||||
|
||||
static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
|
||||
enum btree_id btree,
|
||||
struct bkey_i *k)
|
||||
{
|
||||
kmsan_check_memory(k, bkey_bytes(&k->k));
|
||||
|
||||
if (unlikely(!btree_type_uses_write_buffer(btree))) {
|
||||
int ret = bch2_btree_write_buffer_insert_err(trans, btree, k);
|
||||
dump_stack();
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* Most updates skip the btree write buffer until journal replay is
|
||||
* finished because synchronization with journal replay relies on having
|
||||
@ -160,6 +170,8 @@ void bch2_trans_commit_hook(struct btree_trans *,
|
||||
int __bch2_trans_commit(struct btree_trans *, unsigned);
|
||||
|
||||
int bch2_trans_log_msg(struct btree_trans *, struct printbuf *);
|
||||
int bch2_trans_log_bkey(struct btree_trans *, enum btree_id, unsigned, struct bkey_i *);
|
||||
|
||||
__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
|
||||
__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
|
||||
|
||||
|
@ -35,6 +35,8 @@ static const char * const bch2_btree_update_modes[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static void bch2_btree_update_to_text(struct printbuf *, struct btree_update *);
|
||||
|
||||
static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
|
||||
btree_path_idx_t, struct btree *, struct keylist *);
|
||||
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
|
||||
@ -54,6 +56,8 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
|
||||
struct bkey_buf prev;
|
||||
int ret = 0;
|
||||
|
||||
printbuf_indent_add_nextline(&buf, 2);
|
||||
|
||||
BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
!bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
|
||||
b->data->min_key));
|
||||
@ -64,19 +68,20 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
|
||||
|
||||
if (b == btree_node_root(c, b)) {
|
||||
if (!bpos_eq(b->data->min_key, POS_MIN)) {
|
||||
printbuf_reset(&buf);
|
||||
ret = __bch2_topology_error(c, &buf);
|
||||
|
||||
bch2_bpos_to_text(&buf, b->data->min_key);
|
||||
log_fsck_err(trans, btree_root_bad_min_key,
|
||||
"btree root with incorrect min_key: %s", buf.buf);
|
||||
goto topology_repair;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!bpos_eq(b->data->max_key, SPOS_MAX)) {
|
||||
printbuf_reset(&buf);
|
||||
ret = __bch2_topology_error(c, &buf);
|
||||
bch2_bpos_to_text(&buf, b->data->max_key);
|
||||
log_fsck_err(trans, btree_root_bad_max_key,
|
||||
"btree root with incorrect max_key: %s", buf.buf);
|
||||
goto topology_repair;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
@ -94,20 +99,19 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
|
||||
: bpos_successor(prev.k->k.p);
|
||||
|
||||
if (!bpos_eq(expected_min, bp.v->min_key)) {
|
||||
bch2_topology_error(c);
|
||||
ret = __bch2_topology_error(c, &buf);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "end of prev node doesn't match start of next node\n in ");
|
||||
prt_str(&buf, "end of prev node doesn't match start of next node\nin ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_str(&buf, " node ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
prt_str(&buf, "\n prev ");
|
||||
prt_str(&buf, "\nprev ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
|
||||
prt_str(&buf, "\n next ");
|
||||
prt_str(&buf, "\nnext ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
|
||||
log_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf);
|
||||
goto topology_repair;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bch2_bkey_buf_reassemble(&prev, c, k);
|
||||
@ -115,29 +119,25 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
|
||||
}
|
||||
|
||||
if (bkey_deleted(&prev.k->k)) {
|
||||
bch2_topology_error(c);
|
||||
ret = __bch2_topology_error(c, &buf);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "empty interior node\n in ");
|
||||
prt_str(&buf, "empty interior node\nin ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_str(&buf, " node ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
log_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf);
|
||||
goto topology_repair;
|
||||
} else if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
|
||||
bch2_topology_error(c);
|
||||
ret = __bch2_topology_error(c, &buf);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "last child node doesn't end at end of parent node\n in ");
|
||||
prt_str(&buf, "last child node doesn't end at end of parent node\nin ");
|
||||
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
|
||||
prt_str(&buf, " node ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
prt_str(&buf, "\n last key ");
|
||||
prt_str(&buf, "\nlast key ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
|
||||
|
||||
log_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf);
|
||||
goto topology_repair;
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
@ -145,9 +145,6 @@ fsck_err:
|
||||
bch2_bkey_buf_exit(&prev, c);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
topology_repair:
|
||||
ret = bch2_topology_error(c);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Calculate ideal packed bkey format for new btree nodes: */
|
||||
@ -649,6 +646,14 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If the node has been reused, we might be reading uninitialized memory - that's fine: */
|
||||
static noinline __no_kmsan_checks bool btree_node_seq_matches(struct btree *b, __le64 seq)
|
||||
{
|
||||
struct btree_node *b_data = READ_ONCE(b->data);
|
||||
|
||||
return (b_data ? b_data->keys.seq : 0) == seq;
|
||||
}
|
||||
|
||||
static void btree_update_nodes_written(struct btree_update *as)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
@ -677,15 +682,9 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
* on disk:
|
||||
*/
|
||||
for (i = 0; i < as->nr_old_nodes; i++) {
|
||||
__le64 seq;
|
||||
|
||||
b = as->old_nodes[i];
|
||||
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
|
||||
seq = b->data ? b->data->keys.seq : 0;
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
if (seq == as->old_nodes_seq[i])
|
||||
if (btree_node_seq_matches(b, as->old_nodes_seq[i]))
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
@ -1269,7 +1268,8 @@ err:
|
||||
bch2_btree_update_free(as, trans);
|
||||
if (!bch2_err_matches(ret, ENOSPC) &&
|
||||
!bch2_err_matches(ret, EROFS) &&
|
||||
ret != -BCH_ERR_journal_reclaim_would_deadlock)
|
||||
ret != -BCH_ERR_journal_reclaim_would_deadlock &&
|
||||
ret != -BCH_ERR_journal_shutdown)
|
||||
bch_err_fn_ratelimited(c, ret);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@ -1780,11 +1780,24 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->gc_lock);
|
||||
BUG_ON(!btree_node_intent_locked(path, b->c.level));
|
||||
BUG_ON(!b->c.level);
|
||||
BUG_ON(!as || as->b);
|
||||
bch2_verify_keylist_sorted(keys);
|
||||
|
||||
if (!btree_node_intent_locked(path, b->c.level)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf, "%s(): node not locked at level %u\n",
|
||||
__func__, b->c.level);
|
||||
bch2_btree_update_to_text(&buf, as);
|
||||
bch2_btree_path_to_text(&buf, trans, path_idx);
|
||||
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
bch2_fs_emergency_read_only(c);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
ret = bch2_btree_node_lock_write(trans, path, &b->c);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -2005,18 +2018,22 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) {
|
||||
struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bpos_to_text(&buf1, prev->data->max_key);
|
||||
bch2_bpos_to_text(&buf2, next->data->min_key);
|
||||
bch_err(c,
|
||||
"%s(): btree topology error:\n"
|
||||
" prev ends at %s\n"
|
||||
" next starts at %s",
|
||||
__func__, buf1.buf, buf2.buf);
|
||||
printbuf_exit(&buf1);
|
||||
printbuf_exit(&buf2);
|
||||
ret = bch2_topology_error(c);
|
||||
printbuf_indent_add_nextline(&buf, 2);
|
||||
prt_printf(&buf, "%s(): ", __func__);
|
||||
ret = __bch2_topology_error(c, &buf);
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_printf(&buf, "prev ends at ");
|
||||
bch2_bpos_to_text(&buf, prev->data->max_key);
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_printf(&buf, "next starts at ");
|
||||
bch2_bpos_to_text(&buf, next->data->min_key);
|
||||
|
||||
bch_err(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -2124,6 +2141,31 @@ err_free_update:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int get_iter_to_node(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct btree *b)
|
||||
{
|
||||
bch2_trans_node_iter_init(trans, iter, b->c.btree_id, b->key.k.p,
|
||||
BTREE_MAX_DEPTH, b->c.level,
|
||||
BTREE_ITER_intent);
|
||||
int ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* has node been freed? */
|
||||
if (btree_iter_path(trans, iter)->l[b->c.level].b != b) {
|
||||
/* node has been freed: */
|
||||
BUG_ON(!btree_node_dying(b));
|
||||
ret = -BCH_ERR_btree_node_dying;
|
||||
goto err;
|
||||
}
|
||||
|
||||
BUG_ON(!btree_node_hashed(b));
|
||||
return 0;
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct btree *b,
|
||||
@ -2189,7 +2231,29 @@ err:
|
||||
goto out;
|
||||
}
|
||||
|
||||
int bch2_btree_node_rewrite_key(struct btree_trans *trans,
|
||||
static int bch2_btree_node_rewrite_key(struct btree_trans *trans,
|
||||
enum btree_id btree, unsigned level,
|
||||
struct bkey_i *k, unsigned flags)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter,
|
||||
btree, k->k.p,
|
||||
BTREE_MAX_DEPTH, level, 0);
|
||||
struct btree *b = bch2_btree_iter_peek_node(&iter);
|
||||
int ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(k);
|
||||
ret = found
|
||||
? bch2_btree_node_rewrite(trans, &iter, b, flags)
|
||||
: -ENOENT;
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_node_rewrite_pos(struct btree_trans *trans,
|
||||
enum btree_id btree, unsigned level,
|
||||
struct bpos pos, unsigned flags)
|
||||
{
|
||||
@ -2209,6 +2273,19 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *trans,
|
||||
struct btree *b, unsigned flags)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
int ret = get_iter_to_node(trans, &iter, b);
|
||||
if (ret)
|
||||
return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
|
||||
|
||||
ret = bch2_btree_node_rewrite(trans, &iter, b, flags);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct async_btree_rewrite {
|
||||
struct bch_fs *c;
|
||||
struct work_struct work;
|
||||
@ -2218,58 +2295,17 @@ struct async_btree_rewrite {
|
||||
struct bkey_buf key;
|
||||
};
|
||||
|
||||
static int async_btree_node_rewrite_trans(struct btree_trans *trans,
|
||||
struct async_btree_rewrite *a)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter,
|
||||
a->btree_id, a->key.k->k.p,
|
||||
BTREE_MAX_DEPTH, a->level, 0);
|
||||
struct btree *b = bch2_btree_iter_peek_node(&iter);
|
||||
int ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(a->key.k);
|
||||
ret = found
|
||||
? bch2_btree_node_rewrite(trans, &iter, b, 0)
|
||||
: -ENOENT;
|
||||
|
||||
#if 0
|
||||
/* Tracepoint... */
|
||||
if (!ret || ret == -ENOENT) {
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
if (!ret) {
|
||||
prt_printf(&buf, "rewrite node:\n ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k));
|
||||
} else {
|
||||
prt_printf(&buf, "node to rewrite not found:\n want: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k));
|
||||
prt_printf(&buf, "\n got: ");
|
||||
if (b)
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
else
|
||||
prt_str(&buf, "(null)");
|
||||
}
|
||||
bch_info(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
#endif
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void async_btree_node_rewrite_work(struct work_struct *work)
|
||||
{
|
||||
struct async_btree_rewrite *a =
|
||||
container_of(work, struct async_btree_rewrite, work);
|
||||
struct bch_fs *c = a->c;
|
||||
|
||||
int ret = bch2_trans_do(c, async_btree_node_rewrite_trans(trans, a));
|
||||
if (ret != -ENOENT)
|
||||
int ret = bch2_trans_do(c, bch2_btree_node_rewrite_key(trans,
|
||||
a->btree_id, a->level, a->key.k, 0));
|
||||
if (ret != -ENOENT &&
|
||||
!bch2_err_matches(ret, EROFS) &&
|
||||
ret != -BCH_ERR_journal_shutdown)
|
||||
bch_err_fn_ratelimited(c, ret);
|
||||
|
||||
spin_lock(&c->btree_node_rewrites_lock);
|
||||
@ -2512,30 +2548,15 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
|
||||
unsigned commit_flags, bool skip_triggers)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
int ret;
|
||||
|
||||
bch2_trans_node_iter_init(trans, &iter, b->c.btree_id, b->key.k.p,
|
||||
BTREE_MAX_DEPTH, b->c.level,
|
||||
BTREE_ITER_intent);
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
int ret = get_iter_to_node(trans, &iter, b);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* has node been freed? */
|
||||
if (btree_iter_path(trans, &iter)->l[b->c.level].b != b) {
|
||||
/* node has been freed: */
|
||||
BUG_ON(!btree_node_dying(b));
|
||||
goto out;
|
||||
}
|
||||
|
||||
BUG_ON(!btree_node_hashed(b));
|
||||
return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
|
||||
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr,
|
||||
!bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev));
|
||||
|
||||
ret = bch2_btree_node_update_key(trans, &iter, b, new_key,
|
||||
commit_flags, skip_triggers);
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
@ -169,9 +169,12 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
|
||||
int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
|
||||
struct btree *, unsigned);
|
||||
int bch2_btree_node_rewrite_key(struct btree_trans *,
|
||||
int bch2_btree_node_rewrite_pos(struct btree_trans *,
|
||||
enum btree_id, unsigned,
|
||||
struct bpos, unsigned);
|
||||
int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *,
|
||||
struct btree *, unsigned);
|
||||
|
||||
void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
|
||||
|
||||
int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
|
||||
@ -282,12 +285,12 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct bt
|
||||
{
|
||||
struct bset_tree *t = bset_tree_last(b);
|
||||
struct btree_node_entry *bne = max(write_block(b),
|
||||
(void *) btree_bkey_last(b, bset_tree_last(b)));
|
||||
(void *) btree_bkey_last(b, t));
|
||||
ssize_t remaining_space =
|
||||
__bch2_btree_u64s_remaining(b, bne->keys.start);
|
||||
|
||||
if (unlikely(bset_written(b, bset(b, t)))) {
|
||||
if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
|
||||
if (b->written + block_sectors(c) <= btree_sectors(c))
|
||||
return bne;
|
||||
} else {
|
||||
if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) &&
|
||||
|
@ -264,6 +264,22 @@ out:
|
||||
BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
|
||||
}
|
||||
|
||||
int bch2_btree_write_buffer_insert_err(struct btree_trans *trans,
|
||||
enum btree_id btree, struct bkey_i *k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_printf(&buf, "attempting to do write buffer update on non wb btree=");
|
||||
bch2_btree_id_to_text(&buf, btree);
|
||||
prt_str(&buf, "\n");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
|
||||
|
||||
bch2_fs_inconsistent(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EROFS;
|
||||
}
|
||||
|
||||
static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -312,7 +328,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
darray_for_each(wb->sorted, i) {
|
||||
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
|
||||
|
||||
BUG_ON(!btree_type_uses_write_buffer(k->btree));
|
||||
if (unlikely(!btree_type_uses_write_buffer(k->btree))) {
|
||||
ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k);
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
|
||||
prefetch(&wb->flushing.keys.data[n->idx]);
|
||||
|
@ -381,6 +381,36 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf,
|
||||
struct bkey_s_c k, bool insert, enum bch_sb_error_id id)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
bool repeat = false, print = true, suppress = false;
|
||||
|
||||
prt_printf(buf, "\nwhile marking ");
|
||||
bch2_bkey_val_to_text(buf, c, k);
|
||||
prt_newline(buf);
|
||||
|
||||
__bch2_count_fsck_err(c, id, buf->buf, &repeat, &print, &suppress);
|
||||
|
||||
int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
|
||||
|
||||
if (insert) {
|
||||
print = true;
|
||||
suppress = false;
|
||||
|
||||
bch2_trans_updates_to_text(buf, trans);
|
||||
__bch2_inconsistent_error(c, buf);
|
||||
ret = -BCH_ERR_bucket_ref_update;
|
||||
}
|
||||
|
||||
if (suppress)
|
||||
prt_printf(buf, "Ratelimiting new instances of previous error\n");
|
||||
if (print)
|
||||
bch2_print_string_as_lines(KERN_ERR, buf->buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
|
||||
struct bkey_s_c k,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
@ -396,32 +426,29 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
|
||||
|
||||
BUG_ON(!sectors);
|
||||
|
||||
if (gen_after(ptr->gen, b_gen)) {
|
||||
bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
|
||||
log_fsck_err(trans, ptr_gen_newer_than_bucket_gen,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
|
||||
"while marking %s",
|
||||
if (unlikely(gen_after(ptr->gen, b_gen))) {
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
|
||||
ptr->gen,
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
if (inserting)
|
||||
goto err;
|
||||
ptr->gen);
|
||||
|
||||
ret = bucket_ref_update_err(trans, &buf, k, inserting,
|
||||
BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
|
||||
bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
|
||||
log_fsck_err(trans, ptr_too_stale,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
|
||||
"while marking %s",
|
||||
if (unlikely(gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX)) {
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
|
||||
ptr->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
if (inserting)
|
||||
goto err;
|
||||
ptr->gen);
|
||||
|
||||
ret = bucket_ref_update_err(trans, &buf, k, inserting,
|
||||
BCH_FSCK_ERR_ptr_too_stale);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -430,62 +457,50 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (b_gen != ptr->gen) {
|
||||
bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
|
||||
log_fsck_err(trans, stale_dirty_ptr,
|
||||
"bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n"
|
||||
"while marking %s",
|
||||
if (unlikely(b_gen != ptr->gen)) {
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf,
|
||||
"bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bucket_gen_get(ca, bucket_nr),
|
||||
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
|
||||
ptr->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
if (inserting)
|
||||
goto err;
|
||||
ptr->gen);
|
||||
|
||||
ret = bucket_ref_update_err(trans, &buf, k, inserting,
|
||||
BCH_FSCK_ERR_stale_dirty_ptr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (bucket_data_type_mismatch(bucket_data_type, ptr_data_type)) {
|
||||
bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
|
||||
log_fsck_err(trans, ptr_bucket_data_type_mismatch,
|
||||
"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
|
||||
"while marking %s",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_type_str(bucket_data_type),
|
||||
bch2_data_type_str(ptr_data_type),
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
if (inserting)
|
||||
goto err;
|
||||
if (unlikely(bucket_data_type_mismatch(bucket_data_type, ptr_data_type))) {
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_type_str(bucket_data_type),
|
||||
bch2_data_type_str(ptr_data_type));
|
||||
|
||||
ret = bucket_ref_update_err(trans, &buf, k, inserting,
|
||||
BCH_FSCK_ERR_ptr_bucket_data_type_mismatch);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((u64) *bucket_sectors + sectors > U32_MAX) {
|
||||
bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
|
||||
log_fsck_err(trans, bucket_sector_count_overflow,
|
||||
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
|
||||
"while marking %s",
|
||||
if (unlikely((u64) *bucket_sectors + sectors > U32_MAX)) {
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf,
|
||||
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX",
|
||||
ptr->dev, bucket_nr, b_gen,
|
||||
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
|
||||
*bucket_sectors, sectors,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
if (inserting)
|
||||
goto err;
|
||||
*bucket_sectors, sectors);
|
||||
|
||||
ret = bucket_ref_update_err(trans, &buf, k, inserting,
|
||||
BCH_FSCK_ERR_bucket_sector_count_overflow);
|
||||
sectors = -*bucket_sectors;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*bucket_sectors += sectors;
|
||||
out:
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_dump_trans_updates(trans);
|
||||
bch2_inconsistent_error(c);
|
||||
ret = -BCH_ERR_bucket_ref_update;
|
||||
goto out;
|
||||
}
|
||||
|
||||
void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
|
||||
@ -590,11 +605,9 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!p.ptr.cached) {
|
||||
ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (flags & BTREE_TRIGGER_gc) {
|
||||
@ -653,9 +666,9 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
|
||||
stripe_blockcount_get(&s->v, p.ec.block) +
|
||||
sectors);
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||
};
|
||||
struct disk_accounting_pos acc;
|
||||
memset(&acc, 0, sizeof(acc));
|
||||
acc.type = BCH_DISK_ACCOUNTING_replicas;
|
||||
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
|
||||
acc.replicas.data_type = data_type;
|
||||
ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false);
|
||||
@ -674,26 +687,28 @@ err:
|
||||
return -BCH_ERR_ENOMEM_mark_stripe_ptr;
|
||||
}
|
||||
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
gc_stripe_lock(m);
|
||||
|
||||
if (!m || !m->alive) {
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
gc_stripe_unlock(m);
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_log_msg_start(c, &buf);
|
||||
prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ",
|
||||
(u64) p.ec.idx);
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s",
|
||||
(u64) p.ec.idx, buf.buf);
|
||||
__bch2_inconsistent_error(c, &buf);
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
bch2_inconsistent_error(c);
|
||||
return -BCH_ERR_trigger_stripe_pointer;
|
||||
}
|
||||
|
||||
m->block_sectors[p.ec.block] += sectors;
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||
};
|
||||
struct disk_accounting_pos acc;
|
||||
memset(&acc, 0, sizeof(acc));
|
||||
acc.type = BCH_DISK_ACCOUNTING_replicas;
|
||||
memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e));
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
gc_stripe_unlock(m);
|
||||
|
||||
acc.replicas.data_type = data_type;
|
||||
int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, true);
|
||||
@ -719,16 +734,14 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
: BCH_DATA_user;
|
||||
int ret = 0;
|
||||
|
||||
struct disk_accounting_pos acc_replicas_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||
.replicas.data_type = data_type,
|
||||
.replicas.nr_devs = 0,
|
||||
.replicas.nr_required = 1,
|
||||
};
|
||||
struct disk_accounting_pos acc_replicas_key;
|
||||
memset(&acc_replicas_key, 0, sizeof(acc_replicas_key));
|
||||
acc_replicas_key.type = BCH_DISK_ACCOUNTING_replicas;
|
||||
acc_replicas_key.replicas.data_type = data_type;
|
||||
acc_replicas_key.replicas.nr_devs = 0;
|
||||
acc_replicas_key.replicas.nr_required = 1;
|
||||
|
||||
struct disk_accounting_pos acct_compression_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_compression,
|
||||
};
|
||||
unsigned cur_compression_type = 0;
|
||||
u64 compression_acct[3] = { 1, 0, 0 };
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
@ -762,13 +775,13 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
acc_replicas_key.replicas.nr_required = 0;
|
||||
}
|
||||
|
||||
if (acct_compression_key.compression.type &&
|
||||
acct_compression_key.compression.type != p.crc.compression_type) {
|
||||
if (cur_compression_type &&
|
||||
cur_compression_type != p.crc.compression_type) {
|
||||
if (flags & BTREE_TRIGGER_overwrite)
|
||||
bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
|
||||
|
||||
ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct,
|
||||
ARRAY_SIZE(compression_acct), gc);
|
||||
ret = bch2_disk_accounting_mod2(trans, gc, compression_acct,
|
||||
compression, cur_compression_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -777,7 +790,7 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
compression_acct[2] = 0;
|
||||
}
|
||||
|
||||
acct_compression_key.compression.type = p.crc.compression_type;
|
||||
cur_compression_type = p.crc.compression_type;
|
||||
if (p.crc.compression_type) {
|
||||
compression_acct[1] += p.crc.uncompressed_size;
|
||||
compression_acct[2] += p.crc.compressed_size;
|
||||
@ -791,45 +804,34 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (acc_replicas_key.replicas.nr_devs && !level && k.k->p.snapshot) {
|
||||
struct disk_accounting_pos acc_snapshot_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_snapshot,
|
||||
.snapshot.id = k.k->p.snapshot,
|
||||
};
|
||||
ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc);
|
||||
ret = bch2_disk_accounting_mod2_nr(trans, gc, replicas_sectors, 1, snapshot, k.k->p.snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (acct_compression_key.compression.type) {
|
||||
if (cur_compression_type) {
|
||||
if (flags & BTREE_TRIGGER_overwrite)
|
||||
bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
|
||||
|
||||
ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct,
|
||||
ARRAY_SIZE(compression_acct), gc);
|
||||
ret = bch2_disk_accounting_mod2(trans, gc, compression_acct,
|
||||
compression, cur_compression_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (level) {
|
||||
struct disk_accounting_pos acc_btree_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_btree,
|
||||
.btree.id = btree_id,
|
||||
};
|
||||
ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc);
|
||||
ret = bch2_disk_accounting_mod2_nr(trans, gc, replicas_sectors, 1, btree, btree_id);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
bool insert = !(flags & BTREE_TRIGGER_overwrite);
|
||||
struct disk_accounting_pos acc_inum_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_inum,
|
||||
.inum.inum = k.k->p.inode,
|
||||
};
|
||||
|
||||
s64 v[3] = {
|
||||
insert ? 1 : -1,
|
||||
insert ? k.k->size : -((s64) k.k->size),
|
||||
*replicas_sectors,
|
||||
};
|
||||
ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc);
|
||||
ret = bch2_disk_accounting_mod2(trans, gc, v, inum, k.k->p.inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -878,15 +880,15 @@ int bch2_trigger_extent(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
int need_rebalance_delta = 0;
|
||||
s64 need_rebalance_sectors_delta = 0;
|
||||
s64 need_rebalance_sectors_delta[1] = { 0 };
|
||||
|
||||
s64 s = bch2_bkey_sectors_need_rebalance(c, old);
|
||||
need_rebalance_delta -= s != 0;
|
||||
need_rebalance_sectors_delta -= s;
|
||||
need_rebalance_sectors_delta[0] -= s;
|
||||
|
||||
s = bch2_bkey_sectors_need_rebalance(c, new.s_c);
|
||||
need_rebalance_delta += s != 0;
|
||||
need_rebalance_sectors_delta += s;
|
||||
need_rebalance_sectors_delta[0] += s;
|
||||
|
||||
if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) {
|
||||
int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
|
||||
@ -895,12 +897,9 @@ int bch2_trigger_extent(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (need_rebalance_sectors_delta) {
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_rebalance_work,
|
||||
};
|
||||
int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1,
|
||||
flags & BTREE_TRIGGER_gc);
|
||||
if (need_rebalance_sectors_delta[0]) {
|
||||
int ret = bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc,
|
||||
need_rebalance_sectors_delta, rebalance_work);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -916,17 +915,13 @@ static int __trigger_reservation(struct btree_trans *trans,
|
||||
enum btree_iter_update_trigger_flags flags)
|
||||
{
|
||||
if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
|
||||
s64 sectors = k.k->size;
|
||||
s64 sectors[1] = { k.k->size };
|
||||
|
||||
if (flags & BTREE_TRIGGER_overwrite)
|
||||
sectors = -sectors;
|
||||
sectors[0] = -sectors[0];
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_persistent_reserved,
|
||||
.persistent_reserved.nr_replicas = bkey_s_c_to_reservation(k).v->nr_replicas,
|
||||
};
|
||||
|
||||
return bch2_disk_accounting_mod(trans, &acc, §ors, 1, flags & BTREE_TRIGGER_gc);
|
||||
return bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, sectors,
|
||||
persistent_reserved, bkey_s_c_to_reservation(k).v->nr_replicas);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -39,33 +39,6 @@ static inline u64 sector_to_bucket_and_offset(const struct bch_dev *ca, sector_t
|
||||
for (_b = (_buckets)->b + (_buckets)->first_bucket; \
|
||||
_b < (_buckets)->b + (_buckets)->nbuckets; _b++)
|
||||
|
||||
/*
|
||||
* Ugly hack alert:
|
||||
*
|
||||
* We need to cram a spinlock in a single byte, because that's what we have left
|
||||
* in struct bucket, and we care about the size of these - during fsck, we need
|
||||
* in memory state for every single bucket on every device.
|
||||
*
|
||||
* We used to do
|
||||
* while (xchg(&b->lock, 1) cpu_relax();
|
||||
* but, it turns out not all architectures support xchg on a single byte.
|
||||
*
|
||||
* So now we use bit_spin_lock(), with fun games since we can't burn a whole
|
||||
* ulong for this - we just need to make sure the lock bit always ends up in the
|
||||
* first byte.
|
||||
*/
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define BUCKET_LOCK_BITNR 0
|
||||
#else
|
||||
#define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1)
|
||||
#endif
|
||||
|
||||
union ulong_byte_assert {
|
||||
ulong ulong;
|
||||
u8 byte;
|
||||
};
|
||||
|
||||
static inline void bucket_unlock(struct bucket *b)
|
||||
{
|
||||
BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte);
|
||||
@ -167,9 +140,7 @@ static inline int gen_cmp(u8 a, u8 b)
|
||||
|
||||
static inline int gen_after(u8 a, u8 b)
|
||||
{
|
||||
int r = gen_cmp(a, b);
|
||||
|
||||
return r > 0 ? r : 0;
|
||||
return max(0, gen_cmp(a, b));
|
||||
}
|
||||
|
||||
static inline int dev_ptr_stale_rcu(struct bch_dev *ca, const struct bch_extent_ptr *ptr)
|
||||
|
@ -7,6 +7,33 @@
|
||||
|
||||
#define BUCKET_JOURNAL_SEQ_BITS 16
|
||||
|
||||
/*
|
||||
* Ugly hack alert:
|
||||
*
|
||||
* We need to cram a spinlock in a single byte, because that's what we have left
|
||||
* in struct bucket, and we care about the size of these - during fsck, we need
|
||||
* in memory state for every single bucket on every device.
|
||||
*
|
||||
* We used to do
|
||||
* while (xchg(&b->lock, 1) cpu_relax();
|
||||
* but, it turns out not all architectures support xchg on a single byte.
|
||||
*
|
||||
* So now we use bit_spin_lock(), with fun games since we can't burn a whole
|
||||
* ulong for this - we just need to make sure the lock bit always ends up in the
|
||||
* first byte.
|
||||
*/
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define BUCKET_LOCK_BITNR 0
|
||||
#else
|
||||
#define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1)
|
||||
#endif
|
||||
|
||||
union ulong_byte_assert {
|
||||
ulong ulong;
|
||||
u8 byte;
|
||||
};
|
||||
|
||||
struct bucket {
|
||||
u8 lock;
|
||||
u8 gen_valid:1;
|
||||
|
@ -315,8 +315,10 @@ static int bch2_data_thread(void *arg)
|
||||
ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
|
||||
if (ctx->thr.ret == -BCH_ERR_device_offline)
|
||||
ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline;
|
||||
else
|
||||
else {
|
||||
ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done;
|
||||
ctx->stats.data_type = (int) DATA_PROGRESS_DATA_TYPE_done;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -424,10 +426,8 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
|
||||
arg.replica_entries_bytes = replicas.nr;
|
||||
|
||||
for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) {
|
||||
struct disk_accounting_pos k = {
|
||||
.type = BCH_DISK_ACCOUNTING_persistent_reserved,
|
||||
.persistent_reserved.nr_replicas = i,
|
||||
};
|
||||
struct disk_accounting_pos k;
|
||||
disk_accounting_key_init(k, persistent_reserved, .nr_replicas = i);
|
||||
|
||||
bch2_accounting_mem_read(c,
|
||||
disk_accounting_pos_to_bpos(&k),
|
||||
|
@ -466,7 +466,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
|
||||
prt_str(&buf, ")");
|
||||
WARN_RATELIMIT(1, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EIO;
|
||||
return -BCH_ERR_recompute_checksum;
|
||||
}
|
||||
|
||||
for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
|
||||
@ -693,6 +693,14 @@ static int bch2_alloc_ciphers(struct bch_fs *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
/*
|
||||
* This seems to be duplicating code in cmd_remove_passphrase() in
|
||||
* bcachefs-tools, but we might want to switch userspace to use this - and
|
||||
* perhaps add an ioctl for calling this at runtime, so we can take the
|
||||
* passphrase off of a mounted filesystem (which has come up).
|
||||
*/
|
||||
int bch2_disable_encryption(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_crypt *crypt;
|
||||
@ -725,6 +733,10 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* For enabling encryption on an existing filesystem: not hooked up yet, but it
|
||||
* should be
|
||||
*/
|
||||
int bch2_enable_encryption(struct bch_fs *c, bool keyed)
|
||||
{
|
||||
struct bch_encrypted_key key;
|
||||
@ -781,6 +793,7 @@ err:
|
||||
memzero_explicit(&key, sizeof(key));
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
void bch2_fs_encryption_exit(struct bch_fs *c)
|
||||
{
|
||||
@ -788,8 +801,6 @@ void bch2_fs_encryption_exit(struct bch_fs *c)
|
||||
crypto_free_shash(c->poly1305);
|
||||
if (c->chacha20)
|
||||
crypto_free_sync_skcipher(c->chacha20);
|
||||
if (c->sha256)
|
||||
crypto_free_shash(c->sha256);
|
||||
}
|
||||
|
||||
int bch2_fs_encryption_init(struct bch_fs *c)
|
||||
@ -798,14 +809,6 @@ int bch2_fs_encryption_init(struct bch_fs *c)
|
||||
struct bch_key key;
|
||||
int ret = 0;
|
||||
|
||||
c->sha256 = crypto_alloc_shash("sha256", 0, 0);
|
||||
ret = PTR_ERR_OR_ZERO(c->sha256);
|
||||
if (ret) {
|
||||
c->sha256 = NULL;
|
||||
bch_err(c, "error requesting sha256 module: %s", bch2_err_str(ret));
|
||||
goto out;
|
||||
}
|
||||
|
||||
crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
|
||||
if (!crypt)
|
||||
goto out;
|
||||
|
@ -103,8 +103,10 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_crypt;
|
||||
int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *,
|
||||
struct bch_key *);
|
||||
|
||||
#if 0
|
||||
int bch2_disable_encryption(struct bch_fs *);
|
||||
int bch2_enable_encryption(struct bch_fs *, bool);
|
||||
#endif
|
||||
|
||||
void bch2_fs_encryption_exit(struct bch_fs *);
|
||||
int bch2_fs_encryption_init(struct bch_fs *);
|
||||
|
@ -177,7 +177,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
size_t src_len = src->bi_iter.bi_size;
|
||||
size_t dst_len = crc.uncompressed_size << 9;
|
||||
void *workspace;
|
||||
int ret;
|
||||
int ret = 0, ret2;
|
||||
|
||||
enum bch_compression_opts opt = bch2_compression_type_to_opt(crc.compression_type);
|
||||
mempool_t *workspace_pool = &c->compress_workspace[opt];
|
||||
@ -189,7 +189,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
else
|
||||
ret = -BCH_ERR_compression_workspace_not_initialized;
|
||||
if (ret)
|
||||
goto out;
|
||||
goto err;
|
||||
}
|
||||
|
||||
src_data = bio_map_or_bounce(c, src, READ);
|
||||
@ -197,10 +197,10 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
switch (crc.compression_type) {
|
||||
case BCH_COMPRESSION_TYPE_lz4_old:
|
||||
case BCH_COMPRESSION_TYPE_lz4:
|
||||
ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
|
||||
src_len, dst_len, dst_len);
|
||||
if (ret != dst_len)
|
||||
goto err;
|
||||
ret2 = LZ4_decompress_safe_partial(src_data.b, dst_data,
|
||||
src_len, dst_len, dst_len);
|
||||
if (ret2 != dst_len)
|
||||
ret = -BCH_ERR_decompress_lz4;
|
||||
break;
|
||||
case BCH_COMPRESSION_TYPE_gzip: {
|
||||
z_stream strm = {
|
||||
@ -214,45 +214,43 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
|
||||
zlib_set_workspace(&strm, workspace);
|
||||
zlib_inflateInit2(&strm, -MAX_WBITS);
|
||||
ret = zlib_inflate(&strm, Z_FINISH);
|
||||
ret2 = zlib_inflate(&strm, Z_FINISH);
|
||||
|
||||
mempool_free(workspace, workspace_pool);
|
||||
|
||||
if (ret != Z_STREAM_END)
|
||||
goto err;
|
||||
if (ret2 != Z_STREAM_END)
|
||||
ret = -BCH_ERR_decompress_gzip;
|
||||
break;
|
||||
}
|
||||
case BCH_COMPRESSION_TYPE_zstd: {
|
||||
ZSTD_DCtx *ctx;
|
||||
size_t real_src_len = le32_to_cpup(src_data.b);
|
||||
|
||||
if (real_src_len > src_len - 4)
|
||||
if (real_src_len > src_len - 4) {
|
||||
ret = -BCH_ERR_decompress_zstd_src_len_bad;
|
||||
goto err;
|
||||
}
|
||||
|
||||
workspace = mempool_alloc(workspace_pool, GFP_NOFS);
|
||||
ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
|
||||
|
||||
ret = zstd_decompress_dctx(ctx,
|
||||
ret2 = zstd_decompress_dctx(ctx,
|
||||
dst_data, dst_len,
|
||||
src_data.b + 4, real_src_len);
|
||||
|
||||
mempool_free(workspace, workspace_pool);
|
||||
|
||||
if (ret != dst_len)
|
||||
goto err;
|
||||
if (ret2 != dst_len)
|
||||
ret = -BCH_ERR_decompress_zstd;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
ret = 0;
|
||||
err:
|
||||
fsck_err:
|
||||
out:
|
||||
bio_unmap_or_unbounce(c, src_data);
|
||||
return ret;
|
||||
err:
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
int bch2_bio_uncompress_inplace(struct bch_write_op *op,
|
||||
@ -268,27 +266,22 @@ int bch2_bio_uncompress_inplace(struct bch_write_op *op,
|
||||
BUG_ON(!bio->bi_vcnt);
|
||||
BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
|
||||
|
||||
if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
|
||||
crc->compressed_size << 9 > c->opts.encoded_extent_max) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "error rewriting existing data: extent too big");
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EIO;
|
||||
if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max) {
|
||||
bch2_write_op_error(op, op->pos.offset,
|
||||
"extent too big to decompress (%u > %u)",
|
||||
crc->uncompressed_size << 9, c->opts.encoded_extent_max);
|
||||
return -BCH_ERR_decompress_exceeded_max_encoded_extent;
|
||||
}
|
||||
|
||||
data = __bounce_alloc(c, dst_len, WRITE);
|
||||
|
||||
if (__bio_uncompress(c, bio, data.b, *crc)) {
|
||||
if (!c->opts.no_data_io) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "error rewriting existing data: decompression error");
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
ret = -EIO;
|
||||
ret = __bio_uncompress(c, bio, data.b, *crc);
|
||||
|
||||
if (c->opts.no_data_io)
|
||||
ret = 0;
|
||||
|
||||
if (ret) {
|
||||
bch2_write_op_error(op, op->pos.offset, "%s", bch2_err_str(ret));
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -321,7 +314,7 @@ int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
|
||||
if (crc.uncompressed_size << 9 > c->opts.encoded_extent_max ||
|
||||
crc.compressed_size << 9 > c->opts.encoded_extent_max)
|
||||
return -EIO;
|
||||
return -BCH_ERR_decompress_exceeded_max_encoded_extent;
|
||||
|
||||
dst_data = dst_len == dst_iter.bi_size
|
||||
? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
|
||||
|
@ -22,6 +22,13 @@
|
||||
|
||||
#include <linux/ioprio.h>
|
||||
|
||||
static const char * const bch2_data_update_type_strs[] = {
|
||||
#define x(t, n, ...) [n] = #t,
|
||||
BCH_DATA_UPDATE_TYPES()
|
||||
#undef x
|
||||
NULL
|
||||
};
|
||||
|
||||
static void bkey_put_dev_refs(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
@ -93,7 +100,7 @@ static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struc
|
||||
return true;
|
||||
}
|
||||
|
||||
static noinline void trace_move_extent_finish2(struct data_update *u,
|
||||
static noinline void trace_io_move_finish2(struct data_update *u,
|
||||
struct bkey_i *new,
|
||||
struct bkey_i *insert)
|
||||
{
|
||||
@ -113,11 +120,11 @@ static noinline void trace_move_extent_finish2(struct data_update *u,
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
|
||||
prt_newline(&buf);
|
||||
|
||||
trace_move_extent_finish(c, buf.buf);
|
||||
trace_io_move_finish(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
static void trace_move_extent_fail2(struct data_update *m,
|
||||
static void trace_io_move_fail2(struct data_update *m,
|
||||
struct bkey_s_c new,
|
||||
struct bkey_s_c wrote,
|
||||
struct bkey_i *insert,
|
||||
@ -128,7 +135,7 @@ static void trace_move_extent_fail2(struct data_update *m,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
unsigned rewrites_found = 0;
|
||||
|
||||
if (!trace_move_extent_fail_enabled())
|
||||
if (!trace_io_move_fail_enabled())
|
||||
return;
|
||||
|
||||
prt_str(&buf, msg);
|
||||
@ -168,7 +175,7 @@ static void trace_move_extent_fail2(struct data_update *m,
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
|
||||
}
|
||||
|
||||
trace_move_extent_fail(c, buf.buf);
|
||||
trace_io_move_fail(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
@ -181,6 +188,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
container_of(op, struct data_update, op);
|
||||
struct keylist *keys = &op->insert_keys;
|
||||
struct bkey_buf _new, _insert;
|
||||
struct printbuf journal_msg = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&_new);
|
||||
@ -216,7 +224,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
new = bkey_i_to_extent(bch2_keylist_front(keys));
|
||||
|
||||
if (!bch2_extents_match(k, old)) {
|
||||
trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i),
|
||||
trace_io_move_fail2(m, k, bkey_i_to_s_c(&new->k_i),
|
||||
NULL, "no match:");
|
||||
goto nowork;
|
||||
}
|
||||
@ -256,7 +264,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
if (m->data_opts.rewrite_ptrs &&
|
||||
!rewrites_found &&
|
||||
bch2_bkey_durability(c, k) >= m->op.opts.data_replicas) {
|
||||
trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "no rewrites found:");
|
||||
trace_io_move_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "no rewrites found:");
|
||||
goto nowork;
|
||||
}
|
||||
|
||||
@ -273,7 +281,7 @@ restart_drop_conflicting_replicas:
|
||||
}
|
||||
|
||||
if (!bkey_val_u64s(&new->k)) {
|
||||
trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "new replicas conflicted:");
|
||||
trace_io_move_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "new replicas conflicted:");
|
||||
goto nowork;
|
||||
}
|
||||
|
||||
@ -342,6 +350,7 @@ restart_drop_extra_replicas:
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_str(&buf, "about to insert invalid key in data update path");
|
||||
prt_printf(&buf, "\nop.nonce: %u", m->op.nonce);
|
||||
prt_str(&buf, "\nold: ");
|
||||
bch2_bkey_val_to_text(&buf, c, old);
|
||||
prt_str(&buf, "\nk: ");
|
||||
@ -353,7 +362,7 @@ restart_drop_extra_replicas:
|
||||
printbuf_exit(&buf);
|
||||
|
||||
bch2_fatal_error(c);
|
||||
ret = -EIO;
|
||||
ret = -BCH_ERR_invalid_bkey;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -371,7 +380,12 @@ restart_drop_extra_replicas:
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
|
||||
printbuf_reset(&journal_msg);
|
||||
prt_str(&journal_msg, bch2_data_update_type_strs[m->type]);
|
||||
|
||||
ret = bch2_trans_log_msg(trans, &journal_msg) ?:
|
||||
bch2_trans_log_bkey(trans, m->btree_id, 0, m->k.k) ?:
|
||||
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
|
||||
k.k->p, bkey_start_pos(&insert->k)) ?:
|
||||
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
|
||||
k.k->p, insert->k.p) ?:
|
||||
@ -386,9 +400,9 @@ restart_drop_extra_replicas:
|
||||
if (!ret) {
|
||||
bch2_btree_iter_set_pos(&iter, next_pos);
|
||||
|
||||
this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size);
|
||||
if (trace_move_extent_finish_enabled())
|
||||
trace_move_extent_finish2(m, &new->k_i, insert);
|
||||
this_cpu_add(c->counters[BCH_COUNTER_io_move_finish], new->k.size);
|
||||
if (trace_io_move_finish_enabled())
|
||||
trace_io_move_finish2(m, &new->k_i, insert);
|
||||
}
|
||||
err:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
@ -410,12 +424,13 @@ nowork:
|
||||
&m->stats->sectors_raced);
|
||||
}
|
||||
|
||||
count_event(c, move_extent_fail);
|
||||
count_event(c, io_move_fail);
|
||||
|
||||
bch2_btree_iter_advance(&iter);
|
||||
goto next;
|
||||
}
|
||||
out:
|
||||
printbuf_exit(&journal_msg);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
bch2_bkey_buf_exit(&_insert, c);
|
||||
bch2_bkey_buf_exit(&_new, c);
|
||||
@ -438,7 +453,7 @@ void bch2_data_update_read_done(struct data_update *m)
|
||||
m->op.crc = m->rbio.pick.crc;
|
||||
m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
|
||||
|
||||
this_cpu_add(m->op.c->counters[BCH_COUNTER_move_extent_write], m->k.k->k.size);
|
||||
this_cpu_add(m->op.c->counters[BCH_COUNTER_io_move_write], m->k.k->k.size);
|
||||
|
||||
closure_call(&m->op.cl, bch2_write, NULL, NULL);
|
||||
}
|
||||
@ -572,11 +587,13 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
|
||||
prt_str_indented(out, "extra replicas:\t");
|
||||
prt_u64(out, data_opts->extra_replicas);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
|
||||
{
|
||||
prt_str(out, bch2_data_update_type_strs[m->type]);
|
||||
prt_newline(out);
|
||||
|
||||
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
|
||||
prt_newline(out);
|
||||
|
||||
@ -638,11 +655,46 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
}
|
||||
|
||||
static bool can_allocate_without_blocking(struct bch_fs *c,
|
||||
struct data_update *m)
|
||||
int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
|
||||
struct bch_io_opts *io_opts)
|
||||
{
|
||||
if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(m->op.watermark)))
|
||||
return false;
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(m->k.k));
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
|
||||
/* write path might have to decompress data: */
|
||||
unsigned buf_bytes = 0;
|
||||
bkey_for_each_ptr_decode(&m->k.k->k, ptrs, p, entry)
|
||||
buf_bytes = max_t(unsigned, buf_bytes, p.crc.uncompressed_size << 9);
|
||||
|
||||
unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE);
|
||||
|
||||
m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL);
|
||||
if (!m->bvecs)
|
||||
return -ENOMEM;
|
||||
|
||||
bio_init(&m->rbio.bio, NULL, m->bvecs, nr_vecs, REQ_OP_READ);
|
||||
bio_init(&m->op.wbio.bio, NULL, m->bvecs, nr_vecs, 0);
|
||||
|
||||
if (bch2_bio_alloc_pages(&m->op.wbio.bio, buf_bytes, GFP_KERNEL)) {
|
||||
kfree(m->bvecs);
|
||||
m->bvecs = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rbio_init(&m->rbio.bio, c, *io_opts, NULL);
|
||||
m->rbio.data_update = true;
|
||||
m->rbio.bio.bi_iter.bi_size = buf_bytes;
|
||||
m->rbio.bio.bi_iter.bi_sector = bkey_start_offset(&m->k.k->k);
|
||||
m->op.wbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int can_write_extent(struct bch_fs *c, struct data_update *m)
|
||||
{
|
||||
if ((m->op.flags & BCH_WRITE_alloc_nowait) &&
|
||||
unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(m->op.watermark)))
|
||||
return -BCH_ERR_data_update_done_would_block;
|
||||
|
||||
unsigned target = m->op.flags & BCH_WRITE_only_specified_devs
|
||||
? m->op.target
|
||||
@ -669,7 +721,11 @@ static bool can_allocate_without_blocking(struct bch_fs *c,
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return nr_replicas >= m->op.nr_replicas;
|
||||
if (!nr_replicas)
|
||||
return -BCH_ERR_data_update_done_no_rw_devs;
|
||||
if (nr_replicas < m->op.nr_replicas)
|
||||
return -BCH_ERR_insufficient_devices;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_data_update_init(struct btree_trans *trans,
|
||||
@ -677,7 +733,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
struct moving_context *ctxt,
|
||||
struct data_update *m,
|
||||
struct write_point_specifier wp,
|
||||
struct bch_io_opts io_opts,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_update_opts data_opts,
|
||||
enum btree_id btree_id,
|
||||
struct bkey_s_c k)
|
||||
@ -699,12 +755,15 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
|
||||
bch2_bkey_buf_init(&m->k);
|
||||
bch2_bkey_buf_reassemble(&m->k, c, k);
|
||||
m->type = data_opts.btree_insert_flags & BCH_WATERMARK_copygc
|
||||
? BCH_DATA_UPDATE_copygc
|
||||
: BCH_DATA_UPDATE_rebalance;
|
||||
m->btree_id = btree_id;
|
||||
m->data_opts = data_opts;
|
||||
m->ctxt = ctxt;
|
||||
m->stats = ctxt ? ctxt->stats : NULL;
|
||||
|
||||
bch2_write_op_init(&m->op, c, io_opts);
|
||||
bch2_write_op_init(&m->op, c, *io_opts);
|
||||
m->op.pos = bkey_start_pos(k.k);
|
||||
m->op.version = k.k->bversion;
|
||||
m->op.target = data_opts.target;
|
||||
@ -715,7 +774,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
BCH_WRITE_data_encoded|
|
||||
BCH_WRITE_move|
|
||||
m->data_opts.write_flags;
|
||||
m->op.compression_opt = io_opts.background_compression;
|
||||
m->op.compression_opt = io_opts->background_compression;
|
||||
m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK;
|
||||
|
||||
unsigned durability_have = 0, durability_removing = 0;
|
||||
@ -753,7 +812,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
ptr_bit <<= 1;
|
||||
}
|
||||
|
||||
unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have));
|
||||
unsigned durability_required = max(0, (int) (io_opts->data_replicas - durability_have));
|
||||
|
||||
/*
|
||||
* If current extent durability is less than io_opts.data_replicas,
|
||||
@ -786,17 +845,28 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
m->data_opts.rewrite_ptrs = 0;
|
||||
/* if iter == NULL, it's just a promote */
|
||||
if (iter)
|
||||
ret = bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &m->data_opts);
|
||||
ret = bch2_extent_drop_ptrs(trans, iter, k, io_opts, &m->data_opts);
|
||||
if (!ret)
|
||||
ret = -BCH_ERR_data_update_done_no_writes_needed;
|
||||
goto out_bkey_buf_exit;
|
||||
}
|
||||
|
||||
if ((m->op.flags & BCH_WRITE_alloc_nowait) &&
|
||||
!can_allocate_without_blocking(c, m)) {
|
||||
ret = -BCH_ERR_data_update_done_would_block;
|
||||
/*
|
||||
* Check if the allocation will succeed, to avoid getting an error later
|
||||
* in bch2_write() -> bch2_alloc_sectors_start() and doing a useless
|
||||
* read:
|
||||
*
|
||||
* This guards against
|
||||
* - BCH_WRITE_alloc_nowait allocations failing (promotes)
|
||||
* - Destination target full
|
||||
* - Device(s) in destination target offline
|
||||
* - Insufficient durability available in destination target
|
||||
* (i.e. trying to move a durability=2 replica to a target with a
|
||||
* single durability=2 device)
|
||||
*/
|
||||
ret = can_write_extent(c, m);
|
||||
if (ret)
|
||||
goto out_bkey_buf_exit;
|
||||
}
|
||||
|
||||
if (reserve_sectors) {
|
||||
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
|
||||
@ -824,33 +894,11 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
goto out_nocow_unlock;
|
||||
}
|
||||
|
||||
/* write path might have to decompress data: */
|
||||
unsigned buf_bytes = 0;
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
|
||||
buf_bytes = max_t(unsigned, buf_bytes, p.crc.uncompressed_size << 9);
|
||||
|
||||
unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE);
|
||||
|
||||
m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL);
|
||||
if (!m->bvecs)
|
||||
goto enomem;
|
||||
|
||||
bio_init(&m->rbio.bio, NULL, m->bvecs, nr_vecs, REQ_OP_READ);
|
||||
bio_init(&m->op.wbio.bio, NULL, m->bvecs, nr_vecs, 0);
|
||||
|
||||
if (bch2_bio_alloc_pages(&m->op.wbio.bio, buf_bytes, GFP_KERNEL))
|
||||
goto enomem;
|
||||
|
||||
rbio_init(&m->rbio.bio, c, io_opts, NULL);
|
||||
m->rbio.bio.bi_iter.bi_size = buf_bytes;
|
||||
m->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
|
||||
m->op.wbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
|
||||
ret = bch2_data_update_bios_init(m, c, io_opts);
|
||||
if (ret)
|
||||
goto out_nocow_unlock;
|
||||
|
||||
return 0;
|
||||
enomem:
|
||||
ret = -ENOMEM;
|
||||
kfree(m->bvecs);
|
||||
m->bvecs = NULL;
|
||||
out_nocow_unlock:
|
||||
if (c->opts.nocow_enabled)
|
||||
bkey_nocow_unlock(c, k);
|
||||
|
@ -24,7 +24,19 @@ struct data_update_opts {
|
||||
void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct bch_io_opts *, struct data_update_opts *);
|
||||
|
||||
#define BCH_DATA_UPDATE_TYPES() \
|
||||
x(copygc, 0) \
|
||||
x(rebalance, 1) \
|
||||
x(promote, 2)
|
||||
|
||||
enum bch_data_update_types {
|
||||
#define x(n, id) BCH_DATA_UPDATE_##n = id,
|
||||
BCH_DATA_UPDATE_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct data_update {
|
||||
enum bch_data_update_types type;
|
||||
/* extent being updated: */
|
||||
bool read_done;
|
||||
enum btree_id btree_id;
|
||||
@ -51,12 +63,15 @@ int bch2_extent_drop_ptrs(struct btree_trans *,
|
||||
struct bch_io_opts *,
|
||||
struct data_update_opts *);
|
||||
|
||||
int bch2_data_update_bios_init(struct data_update *, struct bch_fs *,
|
||||
struct bch_io_opts *);
|
||||
|
||||
void bch2_data_update_exit(struct data_update *);
|
||||
int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
|
||||
struct moving_context *,
|
||||
struct data_update *,
|
||||
struct write_point_specifier,
|
||||
struct bch_io_opts, struct data_update_opts,
|
||||
struct bch_io_opts *, struct data_update_opts,
|
||||
enum btree_id, struct bkey_s_c);
|
||||
void bch2_data_update_opts_normalize(struct bkey_s_c, struct data_update_opts *);
|
||||
|
||||
|
@ -13,6 +13,40 @@
|
||||
|
||||
#include <linux/dcache.h>
|
||||
|
||||
static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
|
||||
const struct qstr *str, struct qstr *out_cf)
|
||||
{
|
||||
*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1);
|
||||
int ret = PTR_ERR_OR_ZERO(buf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = utf8_casefold(info->cf_encoding, str, buf, BCH_NAME_MAX + 1);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
*out_cf = (struct qstr) QSTR_INIT(buf, ret);
|
||||
return 0;
|
||||
#else
|
||||
return -EOPNOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int bch2_maybe_casefold(struct btree_trans *trans,
|
||||
const struct bch_hash_info *info,
|
||||
const struct qstr *str, struct qstr *out_cf)
|
||||
{
|
||||
if (likely(!info->cf_encoding)) {
|
||||
*out_cf = *str;
|
||||
return 0;
|
||||
} else {
|
||||
return bch2_casefold(trans, info, str, out_cf);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
|
||||
{
|
||||
if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name))
|
||||
@ -28,13 +62,38 @@ static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
|
||||
#endif
|
||||
|
||||
return bkey_bytes -
|
||||
offsetof(struct bch_dirent, d_name) -
|
||||
(d.v->d_casefold
|
||||
? offsetof(struct bch_dirent, d_cf_name_block.d_names)
|
||||
: offsetof(struct bch_dirent, d_name)) -
|
||||
trailing_nuls;
|
||||
}
|
||||
|
||||
struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d)
|
||||
{
|
||||
return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
|
||||
if (d.v->d_casefold) {
|
||||
unsigned name_len = le16_to_cpu(d.v->d_cf_name_block.d_name_len);
|
||||
return (struct qstr) QSTR_INIT(&d.v->d_cf_name_block.d_names[0], name_len);
|
||||
} else {
|
||||
return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
|
||||
}
|
||||
}
|
||||
|
||||
static struct qstr bch2_dirent_get_casefold_name(struct bkey_s_c_dirent d)
|
||||
{
|
||||
if (d.v->d_casefold) {
|
||||
unsigned name_len = le16_to_cpu(d.v->d_cf_name_block.d_name_len);
|
||||
unsigned cf_name_len = le16_to_cpu(d.v->d_cf_name_block.d_cf_name_len);
|
||||
return (struct qstr) QSTR_INIT(&d.v->d_cf_name_block.d_names[name_len], cf_name_len);
|
||||
} else {
|
||||
return (struct qstr) QSTR_INIT(NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct qstr bch2_dirent_get_lookup_name(struct bkey_s_c_dirent d)
|
||||
{
|
||||
return d.v->d_casefold
|
||||
? bch2_dirent_get_casefold_name(d)
|
||||
: bch2_dirent_get_name(d);
|
||||
}
|
||||
|
||||
static u64 bch2_dirent_hash(const struct bch_hash_info *info,
|
||||
@ -57,7 +116,7 @@ static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
|
||||
static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
|
||||
struct qstr name = bch2_dirent_get_name(d);
|
||||
struct qstr name = bch2_dirent_get_lookup_name(d);
|
||||
|
||||
return bch2_dirent_hash(info, &name);
|
||||
}
|
||||
@ -65,7 +124,7 @@ static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
|
||||
static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
|
||||
{
|
||||
struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
|
||||
const struct qstr l_name = bch2_dirent_get_name(l);
|
||||
const struct qstr l_name = bch2_dirent_get_lookup_name(l);
|
||||
const struct qstr *r_name = _r;
|
||||
|
||||
return !qstr_eq(l_name, *r_name);
|
||||
@ -75,8 +134,8 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
|
||||
{
|
||||
struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
|
||||
struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
|
||||
const struct qstr l_name = bch2_dirent_get_name(l);
|
||||
const struct qstr r_name = bch2_dirent_get_name(r);
|
||||
const struct qstr l_name = bch2_dirent_get_lookup_name(l);
|
||||
const struct qstr r_name = bch2_dirent_get_lookup_name(r);
|
||||
|
||||
return !qstr_eq(l_name, r_name);
|
||||
}
|
||||
@ -104,17 +163,19 @@ int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
|
||||
unsigned name_block_len = bch2_dirent_name_bytes(d);
|
||||
struct qstr d_name = bch2_dirent_get_name(d);
|
||||
struct qstr d_cf_name = bch2_dirent_get_casefold_name(d);
|
||||
int ret = 0;
|
||||
|
||||
bkey_fsck_err_on(!d_name.len,
|
||||
c, dirent_empty_name,
|
||||
"empty name");
|
||||
|
||||
bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len),
|
||||
bkey_fsck_err_on(d_name.len + d_cf_name.len > name_block_len,
|
||||
c, dirent_val_too_big,
|
||||
"value too big (%zu > %u)",
|
||||
bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
|
||||
"dirent names exceed bkey size (%d + %d > %d)",
|
||||
d_name.len, d_cf_name.len, name_block_len);
|
||||
|
||||
/*
|
||||
* Check new keys don't exceed the max length
|
||||
@ -142,6 +203,18 @@ int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
le64_to_cpu(d.v->d_inum) == d.k->p.inode,
|
||||
c, dirent_to_itself,
|
||||
"dirent points to own directory");
|
||||
|
||||
if (d.v->d_casefold) {
|
||||
bkey_fsck_err_on(from.from == BKEY_VALIDATE_commit &&
|
||||
d_cf_name.len > BCH_NAME_MAX,
|
||||
c, dirent_cf_name_too_big,
|
||||
"dirent w/ cf name too big (%u > %u)",
|
||||
d_cf_name.len, BCH_NAME_MAX);
|
||||
|
||||
bkey_fsck_err_on(d_cf_name.len != strnlen(d_cf_name.name, d_cf_name.len),
|
||||
c, dirent_stray_data_after_cf_name,
|
||||
"dirent has stray data after cf name's NUL");
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
@ -163,15 +236,14 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
|
||||
prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type));
|
||||
}
|
||||
|
||||
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
|
||||
subvol_inum dir, u8 type,
|
||||
const struct qstr *name, u64 dst)
|
||||
static struct bkey_i_dirent *dirent_alloc_key(struct btree_trans *trans,
|
||||
subvol_inum dir,
|
||||
u8 type,
|
||||
int name_len, int cf_name_len,
|
||||
u64 dst)
|
||||
{
|
||||
struct bkey_i_dirent *dirent;
|
||||
unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
|
||||
|
||||
if (name->len > BCH_NAME_MAX)
|
||||
return ERR_PTR(-ENAMETOOLONG);
|
||||
unsigned u64s = BKEY_U64s + dirent_val_u64s(name_len, cf_name_len);
|
||||
|
||||
BUG_ON(u64s > U8_MAX);
|
||||
|
||||
@ -190,14 +262,65 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
dirent->v.d_type = type;
|
||||
dirent->v.d_unused = 0;
|
||||
dirent->v.d_casefold = cf_name_len ? 1 : 0;
|
||||
|
||||
memcpy(dirent->v.d_name, name->name, name->len);
|
||||
memset(dirent->v.d_name + name->len, 0,
|
||||
bkey_val_bytes(&dirent->k) -
|
||||
offsetof(struct bch_dirent, d_name) -
|
||||
name->len);
|
||||
return dirent;
|
||||
}
|
||||
|
||||
EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
|
||||
static void dirent_init_regular_name(struct bkey_i_dirent *dirent,
|
||||
const struct qstr *name)
|
||||
{
|
||||
EBUG_ON(dirent->v.d_casefold);
|
||||
|
||||
memcpy(&dirent->v.d_name[0], name->name, name->len);
|
||||
memset(&dirent->v.d_name[name->len], 0,
|
||||
bkey_val_bytes(&dirent->k) -
|
||||
offsetof(struct bch_dirent, d_name) -
|
||||
name->len);
|
||||
}
|
||||
|
||||
static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent,
|
||||
const struct qstr *name,
|
||||
const struct qstr *cf_name)
|
||||
{
|
||||
EBUG_ON(!dirent->v.d_casefold);
|
||||
EBUG_ON(!cf_name->len);
|
||||
|
||||
dirent->v.d_cf_name_block.d_name_len = name->len;
|
||||
dirent->v.d_cf_name_block.d_cf_name_len = cf_name->len;
|
||||
memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
|
||||
memcpy(&dirent->v.d_cf_name_block.d_names[name->len], cf_name->name, cf_name->len);
|
||||
memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_name->len], 0,
|
||||
bkey_val_bytes(&dirent->k) -
|
||||
offsetof(struct bch_dirent, d_cf_name_block.d_names) -
|
||||
name->len + cf_name->len);
|
||||
|
||||
EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_name->len);
|
||||
}
|
||||
|
||||
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
|
||||
subvol_inum dir,
|
||||
u8 type,
|
||||
const struct qstr *name,
|
||||
const struct qstr *cf_name,
|
||||
u64 dst)
|
||||
{
|
||||
struct bkey_i_dirent *dirent;
|
||||
|
||||
if (name->len > BCH_NAME_MAX)
|
||||
return ERR_PTR(-ENAMETOOLONG);
|
||||
|
||||
dirent = dirent_alloc_key(trans, dir, type, name->len, cf_name ? cf_name->len : 0, dst);
|
||||
if (IS_ERR(dirent))
|
||||
return dirent;
|
||||
|
||||
if (cf_name)
|
||||
dirent_init_casefolded_name(dirent, name, cf_name);
|
||||
else
|
||||
dirent_init_regular_name(dirent, name);
|
||||
|
||||
EBUG_ON(bch2_dirent_get_name(dirent_i_to_s_c(dirent)).len != name->len);
|
||||
|
||||
return dirent;
|
||||
}
|
||||
@ -213,7 +336,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
|
||||
struct bkey_i_dirent *dirent;
|
||||
int ret;
|
||||
|
||||
dirent = dirent_create_key(trans, dir_inum, type, name, dst_inum);
|
||||
dirent = dirent_create_key(trans, dir_inum, type, name, NULL, dst_inum);
|
||||
ret = PTR_ERR_OR_ZERO(dirent);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -233,16 +356,28 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
|
||||
const struct bch_hash_info *hash_info,
|
||||
u8 type, const struct qstr *name, u64 dst_inum,
|
||||
u64 *dir_offset,
|
||||
u64 *i_size,
|
||||
enum btree_iter_update_trigger_flags flags)
|
||||
{
|
||||
struct bkey_i_dirent *dirent;
|
||||
int ret;
|
||||
|
||||
dirent = dirent_create_key(trans, dir, type, name, dst_inum);
|
||||
if (hash_info->cf_encoding) {
|
||||
struct qstr cf_name;
|
||||
ret = bch2_casefold(trans, hash_info, name, &cf_name);
|
||||
if (ret)
|
||||
return ret;
|
||||
dirent = dirent_create_key(trans, dir, type, name, &cf_name, dst_inum);
|
||||
} else {
|
||||
dirent = dirent_create_key(trans, dir, type, name, NULL, dst_inum);
|
||||
}
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(dirent);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
*i_size += bkey_bytes(&dirent->k);
|
||||
|
||||
ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
|
||||
dir, &dirent->k_i, flags);
|
||||
*dir_offset = dirent->k.p.offset;
|
||||
@ -275,12 +410,13 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
|
||||
}
|
||||
|
||||
int bch2_dirent_rename(struct btree_trans *trans,
|
||||
subvol_inum src_dir, struct bch_hash_info *src_hash,
|
||||
subvol_inum dst_dir, struct bch_hash_info *dst_hash,
|
||||
subvol_inum src_dir, struct bch_hash_info *src_hash, u64 *src_dir_i_size,
|
||||
subvol_inum dst_dir, struct bch_hash_info *dst_hash, u64 *dst_dir_i_size,
|
||||
const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset,
|
||||
const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset,
|
||||
enum bch_rename_mode mode)
|
||||
{
|
||||
struct qstr src_name_lookup, dst_name_lookup;
|
||||
struct btree_iter src_iter = { NULL };
|
||||
struct btree_iter dst_iter = { NULL };
|
||||
struct bkey_s_c old_src, old_dst = bkey_s_c_null;
|
||||
@ -295,8 +431,11 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
memset(dst_inum, 0, sizeof(*dst_inum));
|
||||
|
||||
/* Lookup src: */
|
||||
ret = bch2_maybe_casefold(trans, src_hash, src_name, &src_name_lookup);
|
||||
if (ret)
|
||||
goto out;
|
||||
old_src = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc,
|
||||
src_hash, src_dir, src_name,
|
||||
src_hash, src_dir, &src_name_lookup,
|
||||
BTREE_ITER_intent);
|
||||
ret = bkey_err(old_src);
|
||||
if (ret)
|
||||
@ -308,6 +447,9 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
goto out;
|
||||
|
||||
/* Lookup dst: */
|
||||
ret = bch2_maybe_casefold(trans, dst_hash, dst_name, &dst_name_lookup);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (mode == BCH_RENAME) {
|
||||
/*
|
||||
* Note that we're _not_ checking if the target already exists -
|
||||
@ -315,12 +457,12 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
* correctness:
|
||||
*/
|
||||
ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc,
|
||||
dst_hash, dst_dir, dst_name);
|
||||
dst_hash, dst_dir, &dst_name_lookup);
|
||||
if (ret)
|
||||
goto out;
|
||||
} else {
|
||||
old_dst = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc,
|
||||
dst_hash, dst_dir, dst_name,
|
||||
dst_hash, dst_dir, &dst_name_lookup,
|
||||
BTREE_ITER_intent);
|
||||
ret = bkey_err(old_dst);
|
||||
if (ret)
|
||||
@ -336,7 +478,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
*src_offset = dst_iter.pos.offset;
|
||||
|
||||
/* Create new dst key: */
|
||||
new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, 0);
|
||||
new_dst = dirent_create_key(trans, dst_dir, 0, dst_name,
|
||||
dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0);
|
||||
ret = PTR_ERR_OR_ZERO(new_dst);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -346,7 +489,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
|
||||
/* Create new src key: */
|
||||
if (mode == BCH_RENAME_EXCHANGE) {
|
||||
new_src = dirent_create_key(trans, src_dir, 0, src_name, 0);
|
||||
new_src = dirent_create_key(trans, src_dir, 0, src_name,
|
||||
src_hash->cf_encoding ? &src_name_lookup : NULL, 0);
|
||||
ret = PTR_ERR_OR_ZERO(new_src);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -406,6 +550,14 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
new_src->v.d_type == DT_SUBVOL)
|
||||
new_src->v.d_parent_subvol = cpu_to_le32(src_dir.subvol);
|
||||
|
||||
if (old_dst.k)
|
||||
*dst_dir_i_size -= bkey_bytes(old_dst.k);
|
||||
*src_dir_i_size -= bkey_bytes(old_src.k);
|
||||
|
||||
if (mode == BCH_RENAME_EXCHANGE)
|
||||
*src_dir_i_size += bkey_bytes(&new_src->k);
|
||||
*dst_dir_i_size += bkey_bytes(&new_dst->k);
|
||||
|
||||
ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -465,9 +617,14 @@ int bch2_dirent_lookup_trans(struct btree_trans *trans,
|
||||
const struct qstr *name, subvol_inum *inum,
|
||||
unsigned flags)
|
||||
{
|
||||
struct qstr lookup_name;
|
||||
int ret = bch2_maybe_casefold(trans, hash_info, name, &lookup_name);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct bkey_s_c k = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
|
||||
hash_info, dir, name, flags);
|
||||
int ret = bkey_err(k);
|
||||
hash_info, dir, &lookup_name, flags);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -572,3 +729,54 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
|
||||
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
|
||||
/* fsck */
|
||||
|
||||
static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr),
|
||||
BTREE_ITER_all_snapshots, k, ret) {
|
||||
if (k.k->p.offset != inode_nr)
|
||||
break;
|
||||
if (!bkey_is_inode(k.k))
|
||||
continue;
|
||||
ret = bch2_inode_unpack(k, inode);
|
||||
goto found;
|
||||
}
|
||||
ret = -BCH_ERR_ENOENT_inode;
|
||||
found:
|
||||
bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_fsck_remove_dirent(struct btree_trans *trans, struct bpos pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bch_inode_unpacked dir_inode;
|
||||
struct bch_hash_info dir_hash_info;
|
||||
int ret;
|
||||
|
||||
ret = lookup_first_inode(trans, pos.inode, &dir_inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
dir_hash_info = bch2_hash_info_init(c, &dir_inode);
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent);
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
|
||||
&dir_hash_info, &iter,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
err:
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
@ -25,15 +25,13 @@ struct bch_inode_info;
|
||||
|
||||
struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d);
|
||||
|
||||
static inline unsigned dirent_val_u64s(unsigned len)
|
||||
static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len)
|
||||
{
|
||||
return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len,
|
||||
sizeof(u64));
|
||||
}
|
||||
unsigned bytes = cf_len
|
||||
? offsetof(struct bch_dirent, d_cf_name_block.d_names) + len + cf_len
|
||||
: offsetof(struct bch_dirent, d_name) + len;
|
||||
|
||||
static inline unsigned int dirent_occupied_size(const struct qstr *name)
|
||||
{
|
||||
return (BKEY_U64s + dirent_val_u64s(name->len)) * sizeof(u64);
|
||||
return DIV_ROUND_UP(bytes, sizeof(u64));
|
||||
}
|
||||
|
||||
int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
|
||||
@ -52,7 +50,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
|
||||
enum btree_iter_update_trigger_flags);
|
||||
int bch2_dirent_create(struct btree_trans *, subvol_inum,
|
||||
const struct bch_hash_info *, u8,
|
||||
const struct qstr *, u64, u64 *,
|
||||
const struct qstr *, u64, u64 *, u64 *,
|
||||
enum btree_iter_update_trigger_flags);
|
||||
|
||||
static inline unsigned vfs_d_type(unsigned type)
|
||||
@ -67,8 +65,8 @@ enum bch_rename_mode {
|
||||
};
|
||||
|
||||
int bch2_dirent_rename(struct btree_trans *,
|
||||
subvol_inum, struct bch_hash_info *,
|
||||
subvol_inum, struct bch_hash_info *,
|
||||
subvol_inum, struct bch_hash_info *, u64 *,
|
||||
subvol_inum, struct bch_hash_info *, u64 *,
|
||||
const struct qstr *, subvol_inum *, u64 *,
|
||||
const struct qstr *, subvol_inum *, u64 *,
|
||||
enum bch_rename_mode);
|
||||
@ -84,4 +82,6 @@ int bch2_empty_dir_snapshot(struct btree_trans *, u64, u32, u32);
|
||||
int bch2_empty_dir_trans(struct btree_trans *, subvol_inum);
|
||||
int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *);
|
||||
|
||||
int bch2_fsck_remove_dirent(struct btree_trans *, struct bpos);
|
||||
|
||||
#endif /* _BCACHEFS_DIRENT_H */
|
||||
|
@ -29,9 +29,25 @@ struct bch_dirent {
|
||||
* Copy of mode bits 12-15 from the target inode - so userspace can get
|
||||
* the filetype without having to do a stat()
|
||||
*/
|
||||
__u8 d_type;
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u8 d_type:5,
|
||||
d_unused:2,
|
||||
d_casefold:1;
|
||||
#elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
__u8 d_casefold:1,
|
||||
d_unused:2,
|
||||
d_type:5;
|
||||
#endif
|
||||
|
||||
__u8 d_name[];
|
||||
union {
|
||||
struct {
|
||||
__u8 d_pad;
|
||||
__le16 d_name_len;
|
||||
__le16 d_cf_name_len;
|
||||
__u8 d_names[];
|
||||
} d_cf_name_block __packed;
|
||||
__DECLARE_FLEX_ARRAY(__u8, d_name);
|
||||
} __packed;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define DT_SUBVOL 16
|
||||
|
@ -114,10 +114,9 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans,
|
||||
unsigned dev, s64 sectors,
|
||||
bool gc)
|
||||
{
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||
};
|
||||
|
||||
struct disk_accounting_pos acc;
|
||||
memset(&acc, 0, sizeof(acc));
|
||||
acc.type = BCH_DISK_ACCOUNTING_replicas;
|
||||
bch2_replicas_entry_cached(&acc.replicas, dev);
|
||||
|
||||
return bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc);
|
||||
@ -135,6 +134,12 @@ static inline bool is_zero(char *start, char *end)
|
||||
|
||||
#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member))
|
||||
|
||||
static const unsigned bch2_accounting_type_nr_counters[] = {
|
||||
#define x(f, id, nr) [BCH_DISK_ACCOUNTING_##f] = nr,
|
||||
BCH_DISK_ACCOUNTING_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
@ -193,6 +198,11 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)),
|
||||
c, accounting_key_junk_at_end,
|
||||
"junk at end of accounting key");
|
||||
|
||||
bkey_fsck_err_on(bch2_accounting_counters(k.k) != bch2_accounting_type_nr_counters[acc_k.type],
|
||||
c, accounting_key_nr_counters_wrong,
|
||||
"accounting key with %u counters, should be %u",
|
||||
bch2_accounting_counters(k.k), bch2_accounting_type_nr_counters[acc_k.type]);
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
@ -635,7 +645,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
|
||||
|
||||
if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e),
|
||||
trans, accounting_replicas_not_marked,
|
||||
"accounting not marked in superblock replicas\n %s",
|
||||
"accounting not marked in superblock replicas\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_accounting_key_to_text(&buf, &acc),
|
||||
buf.buf))) {
|
||||
@ -665,7 +675,7 @@ fsck_err:
|
||||
return ret;
|
||||
invalid_device:
|
||||
if (fsck_err(trans, accounting_to_invalid_device,
|
||||
"accounting entry points to invalid device %i\n %s",
|
||||
"accounting entry points to invalid device %i\n%s",
|
||||
invalid_dev,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_accounting_key_to_text(&buf, &acc),
|
||||
@ -726,7 +736,9 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
break;
|
||||
|
||||
if (!bch2_accounting_is_mem(acc_k)) {
|
||||
struct disk_accounting_pos next = { .type = acc_k.type + 1 };
|
||||
struct disk_accounting_pos next;
|
||||
memset(&next, 0, sizeof(next));
|
||||
next.type = acc_k.type + 1;
|
||||
bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
|
||||
continue;
|
||||
}
|
||||
@ -882,15 +894,13 @@ int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev)
|
||||
int bch2_dev_usage_init(struct bch_dev *ca, bool gc)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_dev_data_type,
|
||||
.dev_data_type.dev = ca->dev_idx,
|
||||
.dev_data_type.data_type = BCH_DATA_free,
|
||||
};
|
||||
u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 };
|
||||
|
||||
int ret = bch2_trans_do(c, ({
|
||||
bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc) ?:
|
||||
bch2_disk_accounting_mod2(trans, gc,
|
||||
v, dev_data_type,
|
||||
.dev = ca->dev_idx,
|
||||
.data_type = BCH_DATA_free) ?:
|
||||
(!gc ? bch2_trans_commit(trans, NULL, NULL, 0) : 0);
|
||||
}));
|
||||
bch_err_fn(c, ret);
|
||||
@ -917,7 +927,9 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
|
||||
break;
|
||||
|
||||
if (!bch2_accounting_is_mem(acc_k)) {
|
||||
struct disk_accounting_pos next = { .type = acc_k.type + 1 };
|
||||
struct disk_accounting_pos next;
|
||||
memset(&next, 0, sizeof(next));
|
||||
next.type = acc_k.type + 1;
|
||||
bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
|
||||
continue;
|
||||
}
|
||||
|
@ -33,10 +33,12 @@ static inline bool bch2_accounting_key_is_zero(struct bkey_s_c_accounting a)
|
||||
static inline void bch2_accounting_accumulate(struct bkey_i_accounting *dst,
|
||||
struct bkey_s_c_accounting src)
|
||||
{
|
||||
EBUG_ON(dst->k.u64s != src.k->u64s);
|
||||
|
||||
for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++)
|
||||
for (unsigned i = 0;
|
||||
i < min(bch2_accounting_counters(&dst->k),
|
||||
bch2_accounting_counters(src.k));
|
||||
i++)
|
||||
dst->v.d[i] += src.v->d[i];
|
||||
|
||||
if (bversion_cmp(dst->k.bversion, src.k->bversion) < 0)
|
||||
dst->k.bversion = src.k->bversion;
|
||||
}
|
||||
@ -85,6 +87,24 @@ static inline struct bpos disk_accounting_pos_to_bpos(struct disk_accounting_pos
|
||||
|
||||
int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *,
|
||||
s64 *, unsigned, bool);
|
||||
|
||||
#define disk_accounting_key_init(_k, _type, ...) \
|
||||
do { \
|
||||
memset(&(_k), 0, sizeof(_k)); \
|
||||
(_k).type = BCH_DISK_ACCOUNTING_##_type; \
|
||||
(_k)._type = (struct bch_acct_##_type) { __VA_ARGS__ }; \
|
||||
} while (0)
|
||||
|
||||
#define bch2_disk_accounting_mod2_nr(_trans, _gc, _v, _nr, ...) \
|
||||
({ \
|
||||
struct disk_accounting_pos pos; \
|
||||
disk_accounting_key_init(pos, __VA_ARGS__); \
|
||||
bch2_disk_accounting_mod(trans, &pos, _v, _nr, _gc); \
|
||||
})
|
||||
|
||||
#define bch2_disk_accounting_mod2(_trans, _gc, _v, ...) \
|
||||
bch2_disk_accounting_mod2_nr(_trans, _gc, _v, ARRAY_SIZE(_v), __VA_ARGS__)
|
||||
|
||||
int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool);
|
||||
|
||||
int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c,
|
||||
@ -210,11 +230,13 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *
|
||||
static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p,
|
||||
u64 *v, unsigned nr)
|
||||
{
|
||||
percpu_down_read(&c->mark_lock);
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, &p);
|
||||
|
||||
bch2_accounting_mem_read_counters(acc, idx, v, nr, false);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
}
|
||||
|
||||
static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset)
|
||||
|
@ -95,40 +95,81 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* field 1: name
|
||||
* field 2: id
|
||||
* field 3: number of counters (max 3)
|
||||
*/
|
||||
|
||||
#define BCH_DISK_ACCOUNTING_TYPES() \
|
||||
x(nr_inodes, 0) \
|
||||
x(persistent_reserved, 1) \
|
||||
x(replicas, 2) \
|
||||
x(dev_data_type, 3) \
|
||||
x(compression, 4) \
|
||||
x(snapshot, 5) \
|
||||
x(btree, 6) \
|
||||
x(rebalance_work, 7) \
|
||||
x(inum, 8)
|
||||
x(nr_inodes, 0, 1) \
|
||||
x(persistent_reserved, 1, 1) \
|
||||
x(replicas, 2, 1) \
|
||||
x(dev_data_type, 3, 3) \
|
||||
x(compression, 4, 3) \
|
||||
x(snapshot, 5, 1) \
|
||||
x(btree, 6, 1) \
|
||||
x(rebalance_work, 7, 1) \
|
||||
x(inum, 8, 3)
|
||||
|
||||
enum disk_accounting_type {
|
||||
#define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr,
|
||||
#define x(f, nr, ...) BCH_DISK_ACCOUNTING_##f = nr,
|
||||
BCH_DISK_ACCOUNTING_TYPES()
|
||||
#undef x
|
||||
BCH_DISK_ACCOUNTING_TYPE_NR,
|
||||
};
|
||||
|
||||
struct bch_nr_inodes {
|
||||
/*
|
||||
* No subtypes - number of inodes in the entire filesystem
|
||||
*
|
||||
* XXX: perhaps we could add a per-subvolume counter?
|
||||
*/
|
||||
struct bch_acct_nr_inodes {
|
||||
};
|
||||
|
||||
struct bch_persistent_reserved {
|
||||
/*
|
||||
* Tracks KEY_TYPE_reservation sectors, broken out by number of replicas for the
|
||||
* reservation:
|
||||
*/
|
||||
struct bch_acct_persistent_reserved {
|
||||
__u8 nr_replicas;
|
||||
};
|
||||
|
||||
struct bch_dev_data_type {
|
||||
/*
|
||||
* device, data type counter fields:
|
||||
* [
|
||||
* nr_buckets
|
||||
* live sectors (in buckets of that data type)
|
||||
* sectors of internal fragmentation
|
||||
* ]
|
||||
*
|
||||
* XXX: live sectors should've been done differently, you can have multiple data
|
||||
* types in the same bucket (user, stripe, cached) and this collapses them to
|
||||
* the bucket data type, and makes the internal fragmentation counter redundant
|
||||
*/
|
||||
struct bch_acct_dev_data_type {
|
||||
__u8 dev;
|
||||
__u8 data_type;
|
||||
};
|
||||
|
||||
/*
|
||||
* Compression type fields:
|
||||
* [
|
||||
* number of extents
|
||||
* uncompressed size
|
||||
* compressed size
|
||||
* ]
|
||||
*
|
||||
* Compression ratio, average extent size (fragmentation).
|
||||
*/
|
||||
struct bch_acct_compression {
|
||||
__u8 type;
|
||||
};
|
||||
|
||||
/*
|
||||
* On disk usage by snapshot id; counts same values as replicas counter, but
|
||||
* aggregated differently
|
||||
*/
|
||||
struct bch_acct_snapshot {
|
||||
__u32 id;
|
||||
} __packed;
|
||||
@ -137,10 +178,27 @@ struct bch_acct_btree {
|
||||
__u32 id;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* inum counter fields:
|
||||
* [
|
||||
* number of extents
|
||||
* sum of extent sizes - bkey size
|
||||
* this field is similar to inode.bi_sectors, except here extents in
|
||||
* different snapshots but the same inode number are all collapsed to the
|
||||
* same counter
|
||||
* sum of on disk size - same values tracked by replicas counters
|
||||
* ]
|
||||
*
|
||||
* This tracks on disk fragmentation.
|
||||
*/
|
||||
struct bch_acct_inum {
|
||||
__u64 inum;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Simple counter of the amount of data (on disk sectors) rebalance needs to
|
||||
* move, extents counted here are also in the rebalance_work btree.
|
||||
*/
|
||||
struct bch_acct_rebalance_work {
|
||||
};
|
||||
|
||||
@ -149,10 +207,10 @@ struct disk_accounting_pos {
|
||||
struct {
|
||||
__u8 type;
|
||||
union {
|
||||
struct bch_nr_inodes nr_inodes;
|
||||
struct bch_persistent_reserved persistent_reserved;
|
||||
struct bch_acct_nr_inodes nr_inodes;
|
||||
struct bch_acct_persistent_reserved persistent_reserved;
|
||||
struct bch_replicas_entry_v1 replicas;
|
||||
struct bch_dev_data_type dev_data_type;
|
||||
struct bch_acct_dev_data_type dev_data_type;
|
||||
struct bch_acct_compression compression;
|
||||
struct bch_acct_snapshot snapshot;
|
||||
struct bch_acct_btree btree;
|
||||
|
504
libbcachefs/ec.c
504
libbcachefs/ec.c
@ -20,6 +20,7 @@
|
||||
#include "io_read.h"
|
||||
#include "io_write.h"
|
||||
#include "keylist.h"
|
||||
#include "lru.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
@ -104,6 +105,7 @@ struct ec_bio {
|
||||
struct bch_dev *ca;
|
||||
struct ec_stripe_buf *buf;
|
||||
size_t idx;
|
||||
u64 submit_time;
|
||||
struct bio bio;
|
||||
};
|
||||
|
||||
@ -298,15 +300,27 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
||||
struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
|
||||
|
||||
if (flags & BTREE_TRIGGER_transactional) {
|
||||
struct extent_ptr_decoded p = {
|
||||
.ptr = *ptr,
|
||||
.crc = bch2_extent_crc_unpack(s.k, NULL),
|
||||
};
|
||||
struct bkey_i_backpointer bp;
|
||||
bch2_extent_ptr_to_bp(c, BTREE_ID_stripes, 0, s.s_c, p,
|
||||
(const union bch_extent_entry *) ptr, &bp);
|
||||
|
||||
struct bkey_i_alloc_v4 *a =
|
||||
bch2_trans_start_alloc_update(trans, bucket, 0);
|
||||
ret = PTR_ERR_OR_ZERO(a) ?:
|
||||
__mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags);
|
||||
ret = PTR_ERR_OR_ZERO(a) ?:
|
||||
__mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags) ?:
|
||||
bch2_bucket_backpointer_mod(trans, s.s_c, &bp,
|
||||
!(flags & BTREE_TRIGGER_overwrite));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (flags & BTREE_TRIGGER_gc) {
|
||||
struct bucket *g = gc_bucket(ca, bucket.offset);
|
||||
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
|
||||
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n%s",
|
||||
ptr->dev,
|
||||
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
||||
ret = -BCH_ERR_mark_stripe;
|
||||
@ -366,19 +380,6 @@ static int mark_stripe_buckets(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void stripe_to_mem(struct stripe *m, const struct bch_stripe *s)
|
||||
{
|
||||
m->sectors = le16_to_cpu(s->sectors);
|
||||
m->algorithm = s->algorithm;
|
||||
m->nr_blocks = s->nr_blocks;
|
||||
m->nr_redundant = s->nr_redundant;
|
||||
m->disk_label = s->disk_label;
|
||||
m->blocks_nonempty = 0;
|
||||
|
||||
for (unsigned i = 0; i < s->nr_blocks; i++)
|
||||
m->blocks_nonempty += !!stripe_blockcount_get(s, i);
|
||||
}
|
||||
|
||||
int bch2_trigger_stripe(struct btree_trans *trans,
|
||||
enum btree_id btree, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_s _new,
|
||||
@ -399,6 +400,15 @@ int bch2_trigger_stripe(struct btree_trans *trans,
|
||||
(new_s->nr_blocks != old_s->nr_blocks ||
|
||||
new_s->nr_redundant != old_s->nr_redundant));
|
||||
|
||||
if (flags & BTREE_TRIGGER_transactional) {
|
||||
int ret = bch2_lru_change(trans,
|
||||
BCH_LRU_STRIPE_FRAGMENTATION,
|
||||
idx,
|
||||
stripe_lru_pos(old_s),
|
||||
stripe_lru_pos(new_s));
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
|
||||
/*
|
||||
@ -443,9 +453,9 @@ int bch2_trigger_stripe(struct btree_trans *trans,
|
||||
if (new_s) {
|
||||
s64 sectors = (u64) le16_to_cpu(new_s->sectors) * new_s->nr_redundant;
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||
};
|
||||
struct disk_accounting_pos acc;
|
||||
memset(&acc, 0, sizeof(acc));
|
||||
acc.type = BCH_DISK_ACCOUNTING_replicas;
|
||||
bch2_bkey_to_replicas(&acc.replicas, new);
|
||||
int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc);
|
||||
if (ret)
|
||||
@ -458,9 +468,9 @@ int bch2_trigger_stripe(struct btree_trans *trans,
|
||||
if (old_s) {
|
||||
s64 sectors = -((s64) le16_to_cpu(old_s->sectors)) * old_s->nr_redundant;
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||
};
|
||||
struct disk_accounting_pos acc;
|
||||
memset(&acc, 0, sizeof(acc));
|
||||
acc.type = BCH_DISK_ACCOUNTING_replicas;
|
||||
bch2_bkey_to_replicas(&acc.replicas, old);
|
||||
int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc);
|
||||
if (ret)
|
||||
@ -472,38 +482,6 @@ int bch2_trigger_stripe(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (flags & BTREE_TRIGGER_atomic) {
|
||||
struct stripe *m = genradix_ptr(&c->stripes, idx);
|
||||
|
||||
if (!m) {
|
||||
struct printbuf buf1 = PRINTBUF;
|
||||
struct printbuf buf2 = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf1, c, old);
|
||||
bch2_bkey_val_to_text(&buf2, c, new);
|
||||
bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n"
|
||||
"old %s\n"
|
||||
"new %s", idx, buf1.buf, buf2.buf);
|
||||
printbuf_exit(&buf2);
|
||||
printbuf_exit(&buf1);
|
||||
bch2_inconsistent_error(c);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!new_s) {
|
||||
bch2_stripes_heap_del(c, m, idx);
|
||||
|
||||
memset(m, 0, sizeof(*m));
|
||||
} else {
|
||||
stripe_to_mem(m, new_s);
|
||||
|
||||
if (!old_s)
|
||||
bch2_stripes_heap_insert(c, m, idx);
|
||||
else
|
||||
bch2_stripes_heap_update(c, m, idx);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -726,14 +704,15 @@ static void ec_block_endio(struct bio *bio)
|
||||
struct bch_dev *ca = ec_bio->ca;
|
||||
struct closure *cl = bio->bi_private;
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca,
|
||||
bio_data_dir(bio)
|
||||
? BCH_MEMBER_ERROR_write
|
||||
: BCH_MEMBER_ERROR_read,
|
||||
"erasure coding %s error: %s",
|
||||
bch2_account_io_completion(ca, bio_data_dir(bio),
|
||||
ec_bio->submit_time, !bio->bi_status);
|
||||
|
||||
if (bio->bi_status) {
|
||||
bch_err_dev_ratelimited(ca, "erasure coding %s error: %s",
|
||||
str_write_read(bio_data_dir(bio)),
|
||||
bch2_blk_status_to_str(bio->bi_status)))
|
||||
bch2_blk_status_to_str(bio->bi_status));
|
||||
clear_bit(ec_bio->idx, ec_bio->buf->valid);
|
||||
}
|
||||
|
||||
int stale = dev_ptr_stale(ca, ptr);
|
||||
if (stale) {
|
||||
@ -796,6 +775,7 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
|
||||
ec_bio->ca = ca;
|
||||
ec_bio->buf = buf;
|
||||
ec_bio->idx = idx;
|
||||
ec_bio->submit_time = local_clock();
|
||||
|
||||
ec_bio->bio.bi_iter.bi_sector = ptr->offset + buf->offset + (offset >> 9);
|
||||
ec_bio->bio.bi_end_io = ec_block_endio;
|
||||
@ -917,26 +897,6 @@ err:
|
||||
|
||||
static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
|
||||
{
|
||||
ec_stripes_heap n, *h = &c->ec_stripes_heap;
|
||||
|
||||
if (idx >= h->size) {
|
||||
if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp))
|
||||
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
|
||||
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
if (n.size > h->size) {
|
||||
memcpy(n.data, h->data, h->nr * sizeof(h->data[0]));
|
||||
n.nr = h->nr;
|
||||
swap(*h, n);
|
||||
}
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
|
||||
free_heap(&n);
|
||||
}
|
||||
|
||||
if (!genradix_ptr_alloc(&c->stripes, idx, gfp))
|
||||
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
|
||||
|
||||
if (c->gc_pos.phase != GC_PHASE_not_running &&
|
||||
!genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
|
||||
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
|
||||
@ -1009,180 +969,50 @@ static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s)
|
||||
s->idx = 0;
|
||||
}
|
||||
|
||||
/* Heap of all existing stripes, ordered by blocks_nonempty */
|
||||
|
||||
static u64 stripe_idx_to_delete(struct bch_fs *c)
|
||||
{
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
|
||||
lockdep_assert_held(&c->ec_stripes_heap_lock);
|
||||
|
||||
if (h->nr &&
|
||||
h->data[0].blocks_nonempty == 0 &&
|
||||
!bch2_stripe_is_open(c, h->data[0].idx))
|
||||
return h->data[0].idx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
|
||||
size_t i)
|
||||
{
|
||||
struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap);
|
||||
|
||||
genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i;
|
||||
}
|
||||
|
||||
static inline bool ec_stripes_heap_cmp(const void *l, const void *r, void __always_unused *args)
|
||||
{
|
||||
struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l;
|
||||
struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r;
|
||||
|
||||
return ((_l->blocks_nonempty > _r->blocks_nonempty) <
|
||||
(_l->blocks_nonempty < _r->blocks_nonempty));
|
||||
}
|
||||
|
||||
static inline void ec_stripes_heap_swap(void *l, void *r, void *h)
|
||||
{
|
||||
struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l;
|
||||
struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r;
|
||||
ec_stripes_heap *_h = (ec_stripes_heap *)h;
|
||||
size_t i = _l - _h->data;
|
||||
size_t j = _r - _h->data;
|
||||
|
||||
swap(*_l, *_r);
|
||||
|
||||
ec_stripes_heap_set_backpointer(_h, i);
|
||||
ec_stripes_heap_set_backpointer(_h, j);
|
||||
}
|
||||
|
||||
static const struct min_heap_callbacks callbacks = {
|
||||
.less = ec_stripes_heap_cmp,
|
||||
.swp = ec_stripes_heap_swap,
|
||||
};
|
||||
|
||||
static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
|
||||
{
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
struct stripe *m = genradix_ptr(&c->stripes, idx);
|
||||
|
||||
BUG_ON(m->heap_idx >= h->nr);
|
||||
BUG_ON(h->data[m->heap_idx].idx != idx);
|
||||
}
|
||||
|
||||
void bch2_stripes_heap_del(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
min_heap_del(&c->ec_stripes_heap, m->heap_idx, &callbacks, &c->ec_stripes_heap);
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
}
|
||||
|
||||
void bch2_stripes_heap_insert(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
BUG_ON(min_heap_full(&c->ec_stripes_heap));
|
||||
|
||||
genradix_ptr(&c->stripes, idx)->heap_idx = c->ec_stripes_heap.nr;
|
||||
min_heap_push(&c->ec_stripes_heap, &((struct ec_stripe_heap_entry) {
|
||||
.idx = idx,
|
||||
.blocks_nonempty = m->blocks_nonempty,
|
||||
}),
|
||||
&callbacks,
|
||||
&c->ec_stripes_heap);
|
||||
|
||||
heap_verify_backpointer(c, idx);
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
}
|
||||
|
||||
void bch2_stripes_heap_update(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
bool do_deletes;
|
||||
size_t i;
|
||||
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
|
||||
|
||||
i = m->heap_idx;
|
||||
min_heap_sift_up(h, i, &callbacks, &c->ec_stripes_heap);
|
||||
min_heap_sift_down(h, i, &callbacks, &c->ec_stripes_heap);
|
||||
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
do_deletes = stripe_idx_to_delete(c) != 0;
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
|
||||
if (do_deletes)
|
||||
bch2_do_stripe_deletes(c);
|
||||
}
|
||||
|
||||
/* stripe deletion */
|
||||
|
||||
static int ec_stripe_delete(struct btree_trans *trans, u64 idx)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_stripe s;
|
||||
int ret;
|
||||
|
||||
k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes, POS(0, idx),
|
||||
BTREE_ITER_intent);
|
||||
ret = bkey_err(k);
|
||||
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter,
|
||||
BTREE_ID_stripes, POS(0, idx),
|
||||
BTREE_ITER_intent);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_stripe) {
|
||||
bch2_fs_inconsistent(c, "attempting to delete nonexistent stripe %llu", idx);
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
s = bkey_s_c_to_stripe(k);
|
||||
for (unsigned i = 0; i < s.v->nr_blocks; i++)
|
||||
if (stripe_blockcount_get(s.v, i)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch2_fs_inconsistent(c, "attempting to delete nonempty stripe %s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_btree_delete_at(trans, &iter, 0);
|
||||
/*
|
||||
* We expect write buffer races here
|
||||
* Important: check stripe_is_open with stripe key locked:
|
||||
*/
|
||||
if (k.k->type == KEY_TYPE_stripe &&
|
||||
!bch2_stripe_is_open(trans->c, idx) &&
|
||||
stripe_lru_pos(bkey_s_c_to_stripe(k).v) == 1)
|
||||
ret = bch2_btree_delete_at(trans, &iter, 0);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* can we kill this and delete stripes from the trigger?
|
||||
*/
|
||||
static void ec_stripe_delete_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c =
|
||||
container_of(work, struct bch_fs, ec_stripe_delete_work);
|
||||
|
||||
while (1) {
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
u64 idx = stripe_idx_to_delete(c);
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
|
||||
if (!idx)
|
||||
break;
|
||||
|
||||
int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
ec_stripe_delete(trans, idx));
|
||||
bch_err_fn(c, ret);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_trans_run(c,
|
||||
bch2_btree_write_buffer_tryflush(trans) ?:
|
||||
for_each_btree_key_max_commit(trans, lru_iter, BTREE_ID_lru,
|
||||
lru_pos(BCH_LRU_STRIPE_FRAGMENTATION, 1, 0),
|
||||
lru_pos(BCH_LRU_STRIPE_FRAGMENTATION, 1, LRU_TIME_MAX),
|
||||
0, lru_k,
|
||||
NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc, ({
|
||||
ec_stripe_delete(trans, lru_k.k->p.offset);
|
||||
})));
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
|
||||
}
|
||||
|
||||
@ -1294,7 +1124,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
|
||||
|
||||
bch2_fs_inconsistent(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EIO;
|
||||
return -BCH_ERR_erasure_coding_found_btree_node;
|
||||
}
|
||||
|
||||
k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed);
|
||||
@ -1360,7 +1190,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
|
||||
|
||||
struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev);
|
||||
if (!ca)
|
||||
return -EIO;
|
||||
return -BCH_ERR_ENOENT_dev_not_found;
|
||||
|
||||
struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr);
|
||||
|
||||
@ -1380,8 +1210,12 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
|
||||
if (bp_k.k->type != KEY_TYPE_backpointer)
|
||||
continue;
|
||||
|
||||
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
|
||||
if (bp.v->btree_id == BTREE_ID_stripes)
|
||||
continue;
|
||||
|
||||
ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s,
|
||||
bkey_s_c_to_backpointer(bp_k), &last_flushed);
|
||||
bp, &last_flushed);
|
||||
}));
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
@ -1393,21 +1227,19 @@ static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
|
||||
unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
|
||||
int ret = 0;
|
||||
unsigned nr_data = v->nr_blocks - v->nr_redundant;
|
||||
|
||||
ret = bch2_btree_write_buffer_flush_sync(trans);
|
||||
int ret = bch2_btree_write_buffer_flush_sync(trans);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < nr_data; i++) {
|
||||
for (unsigned i = 0; i < nr_data; i++) {
|
||||
ret = ec_stripe_update_bucket(trans, s, i);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1473,6 +1305,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
if (s->err) {
|
||||
if (!bch2_err_matches(s->err, EROFS))
|
||||
bch_err(c, "error creating stripe: error writing data buckets");
|
||||
ret = s->err;
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1481,6 +1314,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
|
||||
if (ec_do_recov(c, &s->existing_stripe)) {
|
||||
bch_err(c, "error creating stripe: error reading existing stripe");
|
||||
ret = -BCH_ERR_ec_block_read;
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1506,6 +1340,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
|
||||
if (ec_nr_failed(&s->new_stripe)) {
|
||||
bch_err(c, "error creating stripe: error writing redundancy buckets");
|
||||
ret = -BCH_ERR_ec_block_write;
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1527,6 +1362,8 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
if (ret)
|
||||
goto err;
|
||||
err:
|
||||
trace_stripe_create(c, s->idx, ret);
|
||||
|
||||
bch2_disk_reservation_put(c, &s->res);
|
||||
|
||||
for (i = 0; i < v->nr_blocks; i++)
|
||||
@ -1612,11 +1449,11 @@ static void ec_stripe_new_cancel(struct bch_fs *c, struct ec_stripe_head *h, int
|
||||
ec_stripe_new_set_pending(c, h);
|
||||
}
|
||||
|
||||
void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
|
||||
void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob, int err)
|
||||
{
|
||||
struct ec_stripe_new *s = ob->ec;
|
||||
|
||||
s->err = -EIO;
|
||||
s->err = err;
|
||||
}
|
||||
|
||||
void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
|
||||
@ -1968,39 +1805,40 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static s64 get_existing_stripe(struct bch_fs *c,
|
||||
struct ec_stripe_head *head)
|
||||
static int __get_existing_stripe(struct btree_trans *trans,
|
||||
struct ec_stripe_head *head,
|
||||
struct ec_stripe_buf *stripe,
|
||||
u64 idx)
|
||||
{
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
struct stripe *m;
|
||||
size_t heap_idx;
|
||||
u64 stripe_idx;
|
||||
s64 ret = -1;
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
if (may_create_new_stripe(c))
|
||||
return -1;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter,
|
||||
BTREE_ID_stripes, POS(0, idx), 0);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
for (heap_idx = 0; heap_idx < h->nr; heap_idx++) {
|
||||
/* No blocks worth reusing, stripe will just be deleted: */
|
||||
if (!h->data[heap_idx].blocks_nonempty)
|
||||
continue;
|
||||
/* We expect write buffer races here */
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
goto out;
|
||||
|
||||
stripe_idx = h->data[heap_idx].idx;
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
if (stripe_lru_pos(s.v) <= 1)
|
||||
goto out;
|
||||
|
||||
m = genradix_ptr(&c->stripes, stripe_idx);
|
||||
|
||||
if (m->disk_label == head->disk_label &&
|
||||
m->algorithm == head->algo &&
|
||||
m->nr_redundant == head->redundancy &&
|
||||
m->sectors == head->blocksize &&
|
||||
m->blocks_nonempty < m->nr_blocks - m->nr_redundant &&
|
||||
bch2_try_open_stripe(c, head->s, stripe_idx)) {
|
||||
ret = stripe_idx;
|
||||
break;
|
||||
}
|
||||
if (s.v->disk_label == head->disk_label &&
|
||||
s.v->algorithm == head->algo &&
|
||||
s.v->nr_redundant == head->redundancy &&
|
||||
le16_to_cpu(s.v->sectors) == head->blocksize &&
|
||||
bch2_try_open_stripe(c, head->s, idx)) {
|
||||
bkey_reassemble(&stripe->key, k);
|
||||
ret = 1;
|
||||
}
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
out:
|
||||
bch2_set_btree_iter_dontneed(&iter);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2052,24 +1890,33 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
|
||||
struct ec_stripe_new *s)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
s64 idx;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* If we can't allocate a new stripe, and there's no stripes with empty
|
||||
* blocks for us to reuse, that means we have to wait on copygc:
|
||||
*/
|
||||
idx = get_existing_stripe(c, h);
|
||||
if (idx < 0)
|
||||
return -BCH_ERR_stripe_alloc_blocked;
|
||||
if (may_create_new_stripe(c))
|
||||
return -1;
|
||||
|
||||
ret = get_stripe_key_trans(trans, idx, &s->existing_stripe);
|
||||
bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c,
|
||||
"reading stripe key: %s", bch2_err_str(ret));
|
||||
if (ret) {
|
||||
bch2_stripe_close(c, s);
|
||||
return ret;
|
||||
struct btree_iter lru_iter;
|
||||
struct bkey_s_c lru_k;
|
||||
int ret = 0;
|
||||
|
||||
for_each_btree_key_max_norestart(trans, lru_iter, BTREE_ID_lru,
|
||||
lru_pos(BCH_LRU_STRIPE_FRAGMENTATION, 2, 0),
|
||||
lru_pos(BCH_LRU_STRIPE_FRAGMENTATION, 2, LRU_TIME_MAX),
|
||||
0, lru_k, ret) {
|
||||
ret = __get_existing_stripe(trans, h, &s->existing_stripe, lru_k.k->p.offset);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &lru_iter);
|
||||
if (!ret)
|
||||
ret = -BCH_ERR_stripe_alloc_blocked;
|
||||
if (ret == 1)
|
||||
ret = 0;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return init_new_stripe_from_existing(c, s);
|
||||
}
|
||||
@ -2263,14 +2110,14 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||
};
|
||||
struct disk_accounting_pos acc;
|
||||
|
||||
s64 sectors = 0;
|
||||
for (unsigned i = 0; i < s->v.nr_blocks; i++)
|
||||
sectors -= stripe_blockcount_get(&s->v, i);
|
||||
|
||||
memset(&acc, 0, sizeof(acc));
|
||||
acc.type = BCH_DISK_ACCOUNTING_replicas;
|
||||
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
|
||||
acc.replicas.data_type = BCH_DATA_user;
|
||||
ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false);
|
||||
@ -2284,6 +2131,8 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_
|
||||
|
||||
sectors = -sectors;
|
||||
|
||||
memset(&acc, 0, sizeof(acc));
|
||||
acc.type = BCH_DISK_ACCOUNTING_replicas;
|
||||
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
|
||||
acc.replicas.data_type = BCH_DATA_user;
|
||||
ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false);
|
||||
@ -2367,46 +2216,7 @@ void bch2_fs_ec_flush(struct bch_fs *c)
|
||||
|
||||
int bch2_stripes_read(struct bch_fs *c)
|
||||
{
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN,
|
||||
BTREE_ITER_prefetch, k, ({
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
continue;
|
||||
|
||||
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
struct stripe *m = genradix_ptr(&c->stripes, k.k->p.offset);
|
||||
|
||||
stripe_to_mem(m, bkey_s_c_to_stripe(k).v);
|
||||
|
||||
bch2_stripes_heap_insert(c, m, k.k->p.offset);
|
||||
0;
|
||||
})));
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
struct stripe *m;
|
||||
size_t i;
|
||||
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
for (i = 0; i < min_t(size_t, h->nr, 50); i++) {
|
||||
m = genradix_ptr(&c->stripes, h->data[i].idx);
|
||||
|
||||
prt_printf(out, "%zu %u/%u+%u", h->data[i].idx,
|
||||
h->data[i].blocks_nonempty,
|
||||
m->nr_blocks - m->nr_redundant,
|
||||
m->nr_redundant);
|
||||
if (bch2_stripe_is_open(c, h->data[i].idx))
|
||||
prt_str(out, " open");
|
||||
prt_newline(out);
|
||||
}
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
@ -2477,15 +2287,12 @@ void bch2_fs_ec_exit(struct bch_fs *c)
|
||||
|
||||
BUG_ON(!list_empty(&c->ec_stripe_new_list));
|
||||
|
||||
free_heap(&c->ec_stripes_heap);
|
||||
genradix_free(&c->stripes);
|
||||
bioset_exit(&c->ec_bioset);
|
||||
}
|
||||
|
||||
void bch2_fs_ec_init_early(struct bch_fs *c)
|
||||
{
|
||||
spin_lock_init(&c->ec_stripes_new_lock);
|
||||
mutex_init(&c->ec_stripes_heap_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_stripe_head_list);
|
||||
mutex_init(&c->ec_stripe_head_lock);
|
||||
@ -2503,3 +2310,40 @@ int bch2_fs_ec_init(struct bch_fs *c)
|
||||
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
|
||||
BIOSET_NEED_BVECS);
|
||||
}
|
||||
|
||||
static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans,
|
||||
struct bkey_s_c k,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
return 0;
|
||||
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
|
||||
u64 lru_idx = stripe_lru_pos(s.v);
|
||||
if (lru_idx) {
|
||||
int ret = bch2_lru_check_set(trans, BCH_LRU_STRIPE_FRAGMENTATION,
|
||||
k.k->p.offset, lru_idx, k, last_flushed);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_check_stripe_to_lru_refs(struct bch_fs *c)
|
||||
{
|
||||
struct bkey_buf last_flushed;
|
||||
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter, BTREE_ID_stripes,
|
||||
POS_MIN, BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_check_stripe_to_lru_ref(trans, k, &last_flushed)));
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
@ -92,6 +92,29 @@ static inline void stripe_csum_set(struct bch_stripe *s,
|
||||
memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]);
|
||||
}
|
||||
|
||||
#define STRIPE_LRU_POS_EMPTY 1
|
||||
|
||||
static inline u64 stripe_lru_pos(const struct bch_stripe *s)
|
||||
{
|
||||
if (!s)
|
||||
return 0;
|
||||
|
||||
unsigned nr_data = s->nr_blocks - s->nr_redundant, blocks_empty = 0;
|
||||
|
||||
for (unsigned i = 0; i < nr_data; i++)
|
||||
blocks_empty += !stripe_blockcount_get(s, i);
|
||||
|
||||
/* Will be picked up by the stripe_delete worker */
|
||||
if (blocks_empty == nr_data)
|
||||
return STRIPE_LRU_POS_EMPTY;
|
||||
|
||||
if (!blocks_empty)
|
||||
return 0;
|
||||
|
||||
/* invert: more blocks empty = reuse first */
|
||||
return LRU_TIME_MAX - blocks_empty;
|
||||
}
|
||||
|
||||
static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr,
|
||||
const struct bch_extent_ptr *data_ptr,
|
||||
unsigned sectors)
|
||||
@ -132,6 +155,20 @@ static inline bool bch2_ptr_matches_stripe_m(const struct gc_stripe *m,
|
||||
m->sectors);
|
||||
}
|
||||
|
||||
static inline void gc_stripe_unlock(struct gc_stripe *s)
|
||||
{
|
||||
BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte);
|
||||
|
||||
clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &s->lock);
|
||||
wake_up_bit((void *) &s->lock, BUCKET_LOCK_BITNR);
|
||||
}
|
||||
|
||||
static inline void gc_stripe_lock(struct gc_stripe *s)
|
||||
{
|
||||
wait_on_bit_lock((void *) &s->lock, BUCKET_LOCK_BITNR,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
struct bch_read_bio;
|
||||
|
||||
struct ec_stripe_buf {
|
||||
@ -212,7 +249,7 @@ int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *, struct bkey
|
||||
|
||||
void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
|
||||
|
||||
void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
|
||||
void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *, int);
|
||||
|
||||
int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
|
||||
|
||||
@ -221,10 +258,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *,
|
||||
unsigned, unsigned, unsigned,
|
||||
enum bch_watermark, struct closure *);
|
||||
|
||||
void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
|
||||
void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
|
||||
void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
|
||||
|
||||
void bch2_do_stripe_deletes(struct bch_fs *);
|
||||
void bch2_ec_do_stripe_creates(struct bch_fs *);
|
||||
void bch2_ec_stripe_new_free(struct bch_fs *, struct ec_stripe_new *);
|
||||
@ -261,11 +294,12 @@ void bch2_fs_ec_flush(struct bch_fs *);
|
||||
|
||||
int bch2_stripes_read(struct bch_fs *);
|
||||
|
||||
void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *);
|
||||
void bch2_new_stripes_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
void bch2_fs_ec_exit(struct bch_fs *);
|
||||
void bch2_fs_ec_init_early(struct bch_fs *);
|
||||
int bch2_fs_ec_init(struct bch_fs *);
|
||||
|
||||
int bch2_check_stripe_to_lru_refs(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_EC_H */
|
||||
|
@ -20,23 +20,15 @@ struct stripe {
|
||||
};
|
||||
|
||||
struct gc_stripe {
|
||||
u8 lock;
|
||||
unsigned alive:1; /* does a corresponding key exist in stripes btree? */
|
||||
u16 sectors;
|
||||
|
||||
u8 nr_blocks;
|
||||
u8 nr_redundant;
|
||||
|
||||
unsigned alive:1; /* does a corresponding key exist in stripes btree? */
|
||||
u16 block_sectors[BCH_BKEY_PTRS_MAX];
|
||||
struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX];
|
||||
|
||||
struct bch_replicas_padded r;
|
||||
};
|
||||
|
||||
struct ec_stripe_heap_entry {
|
||||
size_t idx;
|
||||
unsigned blocks_nonempty;
|
||||
};
|
||||
|
||||
typedef DEFINE_MIN_HEAP(struct ec_stripe_heap_entry, ec_stripes_heap) ec_stripes_heap;
|
||||
|
||||
#endif /* _BCACHEFS_EC_TYPES_H */
|
||||
|
@ -5,6 +5,8 @@
|
||||
#define BCH_ERRCODES() \
|
||||
x(ERANGE, ERANGE_option_too_small) \
|
||||
x(ERANGE, ERANGE_option_too_big) \
|
||||
x(EINVAL, injected) \
|
||||
x(BCH_ERR_injected, injected_fs_start) \
|
||||
x(EINVAL, mount_option) \
|
||||
x(BCH_ERR_mount_option, option_name) \
|
||||
x(BCH_ERR_mount_option, option_value) \
|
||||
@ -116,9 +118,11 @@
|
||||
x(ENOENT, ENOENT_snapshot_tree) \
|
||||
x(ENOENT, ENOENT_dirent_doesnt_match_inode) \
|
||||
x(ENOENT, ENOENT_dev_not_found) \
|
||||
x(ENOENT, ENOENT_dev_bucket_not_found) \
|
||||
x(ENOENT, ENOENT_dev_idx_not_found) \
|
||||
x(ENOENT, ENOENT_inode_no_backpointer) \
|
||||
x(ENOENT, ENOENT_no_snapshot_tree_subvol) \
|
||||
x(ENOENT, btree_node_dying) \
|
||||
x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \
|
||||
x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \
|
||||
x(EEXIST, EEXIST_str_hash_set) \
|
||||
@ -185,6 +189,7 @@
|
||||
x(BCH_ERR_data_update_done, data_update_done_no_writes_needed) \
|
||||
x(BCH_ERR_data_update_done, data_update_done_no_snapshot) \
|
||||
x(BCH_ERR_data_update_done, data_update_done_no_dev_refs) \
|
||||
x(BCH_ERR_data_update_done, data_update_done_no_rw_devs) \
|
||||
x(EINVAL, device_state_not_allowed) \
|
||||
x(EINVAL, member_info_missing) \
|
||||
x(EINVAL, mismatched_block_size) \
|
||||
@ -205,6 +210,8 @@
|
||||
x(EINVAL, no_resize_with_buckets_nouse) \
|
||||
x(EINVAL, inode_unpack_error) \
|
||||
x(EINVAL, varint_decode_error) \
|
||||
x(EINVAL, erasure_coding_found_btree_node) \
|
||||
x(EOPNOTSUPP, may_not_use_incompat_feature) \
|
||||
x(EROFS, erofs_trans_commit) \
|
||||
x(EROFS, erofs_no_writes) \
|
||||
x(EROFS, erofs_journal_err) \
|
||||
@ -215,10 +222,18 @@
|
||||
x(EROFS, insufficient_devices) \
|
||||
x(0, operation_blocked) \
|
||||
x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \
|
||||
x(BCH_ERR_operation_blocked, journal_res_get_blocked) \
|
||||
x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \
|
||||
x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \
|
||||
x(BCH_ERR_operation_blocked, stripe_alloc_blocked) \
|
||||
x(BCH_ERR_operation_blocked, journal_res_blocked) \
|
||||
x(BCH_ERR_journal_res_blocked, journal_blocked) \
|
||||
x(BCH_ERR_journal_res_blocked, journal_max_in_flight) \
|
||||
x(BCH_ERR_journal_res_blocked, journal_max_open) \
|
||||
x(BCH_ERR_journal_res_blocked, journal_full) \
|
||||
x(BCH_ERR_journal_res_blocked, journal_pin_full) \
|
||||
x(BCH_ERR_journal_res_blocked, journal_buf_enomem) \
|
||||
x(BCH_ERR_journal_res_blocked, journal_stuck) \
|
||||
x(BCH_ERR_journal_res_blocked, journal_retry_open) \
|
||||
x(BCH_ERR_journal_res_blocked, journal_preres_get_blocked) \
|
||||
x(BCH_ERR_journal_res_blocked, bucket_alloc_blocked) \
|
||||
x(BCH_ERR_journal_res_blocked, stripe_alloc_blocked) \
|
||||
x(BCH_ERR_invalid, invalid_sb) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_magic) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_version) \
|
||||
@ -228,6 +243,7 @@
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_csum) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_block_size) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_uuid) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_offset) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_too_many_members) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_dev_idx) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_time_precision) \
|
||||
@ -255,6 +271,7 @@
|
||||
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
|
||||
x(EIO, journal_shutdown) \
|
||||
x(EIO, journal_flush_err) \
|
||||
x(EIO, journal_write_err) \
|
||||
x(EIO, btree_node_read_err) \
|
||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_cached) \
|
||||
x(EIO, sb_not_downgraded) \
|
||||
@ -263,18 +280,53 @@
|
||||
x(EIO, btree_node_read_validate_error) \
|
||||
x(EIO, btree_need_topology_repair) \
|
||||
x(EIO, bucket_ref_update) \
|
||||
x(EIO, trigger_alloc) \
|
||||
x(EIO, trigger_pointer) \
|
||||
x(EIO, trigger_stripe_pointer) \
|
||||
x(EIO, metadata_bucket_inconsistency) \
|
||||
x(EIO, mark_stripe) \
|
||||
x(EIO, stripe_reconstruct) \
|
||||
x(EIO, key_type_error) \
|
||||
x(EIO, no_device_to_read_from) \
|
||||
x(EIO, extent_poisened) \
|
||||
x(EIO, missing_indirect_extent) \
|
||||
x(EIO, invalidate_stripe_to_dev) \
|
||||
x(EIO, no_encryption_key) \
|
||||
x(EIO, insufficient_journal_devices) \
|
||||
x(EIO, device_offline) \
|
||||
x(EIO, EIO_fault_injected) \
|
||||
x(EIO, ec_block_read) \
|
||||
x(EIO, ec_block_write) \
|
||||
x(EIO, recompute_checksum) \
|
||||
x(EIO, decompress) \
|
||||
x(BCH_ERR_decompress, decompress_exceeded_max_encoded_extent) \
|
||||
x(BCH_ERR_decompress, decompress_lz4) \
|
||||
x(BCH_ERR_decompress, decompress_gzip) \
|
||||
x(BCH_ERR_decompress, decompress_zstd_src_len_bad) \
|
||||
x(BCH_ERR_decompress, decompress_zstd) \
|
||||
x(EIO, data_write) \
|
||||
x(BCH_ERR_data_write, data_write_io) \
|
||||
x(BCH_ERR_data_write, data_write_csum) \
|
||||
x(BCH_ERR_data_write, data_write_invalid_ptr) \
|
||||
x(BCH_ERR_data_write, data_write_misaligned) \
|
||||
x(BCH_ERR_decompress, data_read) \
|
||||
x(BCH_ERR_data_read, no_device_to_read_from) \
|
||||
x(BCH_ERR_data_read, no_devices_valid) \
|
||||
x(BCH_ERR_data_read, data_read_io_err) \
|
||||
x(BCH_ERR_data_read, data_read_csum_err) \
|
||||
x(BCH_ERR_data_read, data_read_retry) \
|
||||
x(BCH_ERR_data_read_retry, data_read_retry_avoid) \
|
||||
x(BCH_ERR_data_read_retry_avoid,data_read_retry_device_offline) \
|
||||
x(BCH_ERR_data_read_retry_avoid,data_read_retry_io_err) \
|
||||
x(BCH_ERR_data_read_retry_avoid,data_read_retry_ec_reconstruct_err) \
|
||||
x(BCH_ERR_data_read_retry_avoid,data_read_retry_csum_err) \
|
||||
x(BCH_ERR_data_read_retry, data_read_retry_csum_err_maybe_userspace)\
|
||||
x(BCH_ERR_data_read, data_read_decompress_err) \
|
||||
x(BCH_ERR_data_read, data_read_decrypt_err) \
|
||||
x(BCH_ERR_data_read, data_read_ptr_stale_race) \
|
||||
x(BCH_ERR_data_read_retry, data_read_ptr_stale_retry) \
|
||||
x(BCH_ERR_data_read, data_read_no_encryption_key) \
|
||||
x(BCH_ERR_data_read, data_read_buffer_too_small) \
|
||||
x(BCH_ERR_data_read, data_read_key_overwritten) \
|
||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
|
||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
|
||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
|
||||
|
@ -3,15 +3,24 @@
|
||||
#include "btree_cache.h"
|
||||
#include "btree_iter.h"
|
||||
#include "error.h"
|
||||
#include "fs-common.h"
|
||||
#include "journal.h"
|
||||
#include "namei.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "super.h"
|
||||
#include "thread_with_file.h"
|
||||
|
||||
#define FSCK_ERR_RATELIMIT_NR 10
|
||||
|
||||
bool bch2_inconsistent_error(struct bch_fs *c)
|
||||
void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out)
|
||||
{
|
||||
printbuf_indent_add_nextline(out, 2);
|
||||
|
||||
#ifdef BCACHEFS_LOG_PREFIX
|
||||
prt_printf(out, bch2_log_msg(c, ""));
|
||||
#endif
|
||||
}
|
||||
|
||||
bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out)
|
||||
{
|
||||
set_bit(BCH_FS_error, &c->flags);
|
||||
|
||||
@ -21,10 +30,11 @@ bool bch2_inconsistent_error(struct bch_fs *c)
|
||||
case BCH_ON_ERROR_fix_safe:
|
||||
case BCH_ON_ERROR_ro:
|
||||
if (bch2_fs_emergency_read_only(c))
|
||||
bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
|
||||
journal_cur_seq(&c->journal));
|
||||
prt_printf(out, "inconsistency detected - emergency read only at journal seq %llu\n",
|
||||
journal_cur_seq(&c->journal));
|
||||
return true;
|
||||
case BCH_ON_ERROR_panic:
|
||||
bch2_print_string_as_lines(KERN_ERR, out->buf);
|
||||
panic(bch2_fmt(c, "panic after error"));
|
||||
return true;
|
||||
default:
|
||||
@ -32,11 +42,63 @@ bool bch2_inconsistent_error(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_topology_error(struct bch_fs *c)
|
||||
bool bch2_inconsistent_error(struct bch_fs *c)
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
printbuf_indent_add_nextline(&buf, 2);
|
||||
|
||||
bool ret = __bch2_inconsistent_error(c, &buf);
|
||||
if (ret)
|
||||
bch_err(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__printf(3, 0)
|
||||
static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *trans,
|
||||
const char *fmt, va_list args)
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_log_msg_start(c, &buf);
|
||||
|
||||
prt_vprintf(&buf, fmt, args);
|
||||
prt_newline(&buf);
|
||||
|
||||
if (trans)
|
||||
bch2_trans_updates_to_text(&buf, trans);
|
||||
bool ret = __bch2_inconsistent_error(c, &buf);
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool bch2_fs_inconsistent(struct bch_fs *c, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
bool ret = bch2_fs_trans_inconsistent(c, NULL, fmt, args);
|
||||
va_end(args);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool bch2_trans_inconsistent(struct btree_trans *trans, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
bool ret = bch2_fs_trans_inconsistent(trans->c, trans, fmt, args);
|
||||
va_end(args);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
|
||||
{
|
||||
prt_printf(out, "btree topology error: ");
|
||||
|
||||
set_bit(BCH_FS_topology_error, &c->flags);
|
||||
if (!test_bit(BCH_FS_recovery_running, &c->flags)) {
|
||||
bch2_inconsistent_error(c);
|
||||
__bch2_inconsistent_error(c, out);
|
||||
return -BCH_ERR_btree_need_topology_repair;
|
||||
} else {
|
||||
return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?:
|
||||
@ -44,6 +106,24 @@ int bch2_topology_error(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...)
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_log_msg_start(c, &buf);
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(&buf, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
int ret = __bch2_topology_error(c, &buf);
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_fatal_error(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_fs_emergency_read_only(c))
|
||||
@ -54,25 +134,41 @@ void bch2_io_error_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
|
||||
struct bch_fs *c = ca->fs;
|
||||
bool dev;
|
||||
|
||||
/* XXX: if it's reads or checksums that are failing, set it to failed */
|
||||
|
||||
down_write(&c->state_lock);
|
||||
dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro,
|
||||
BCH_FORCE_IF_DEGRADED);
|
||||
if (dev
|
||||
? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
|
||||
BCH_FORCE_IF_DEGRADED)
|
||||
: bch2_fs_emergency_read_only(c))
|
||||
unsigned long write_errors_start = READ_ONCE(ca->write_errors_start);
|
||||
|
||||
if (write_errors_start &&
|
||||
time_after(jiffies,
|
||||
write_errors_start + c->opts.write_error_timeout * HZ)) {
|
||||
if (ca->mi.state >= BCH_MEMBER_STATE_ro)
|
||||
goto out;
|
||||
|
||||
bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
|
||||
BCH_FORCE_IF_DEGRADED);
|
||||
|
||||
bch_err(ca,
|
||||
"too many IO errors, setting %s RO",
|
||||
"writes erroring for %u seconds, setting %s ro",
|
||||
c->opts.write_error_timeout,
|
||||
dev ? "device" : "filesystem");
|
||||
if (!dev)
|
||||
bch2_fs_emergency_read_only(c);
|
||||
|
||||
}
|
||||
out:
|
||||
up_write(&c->state_lock);
|
||||
}
|
||||
|
||||
void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type)
|
||||
{
|
||||
atomic64_inc(&ca->errors[type]);
|
||||
//queue_work(system_long_wq, &ca->io_error_work);
|
||||
|
||||
if (type == BCH_MEMBER_ERROR_write && !ca->write_errors_start)
|
||||
ca->write_errors_start = jiffies;
|
||||
|
||||
queue_work(system_long_wq, &ca->io_error_work);
|
||||
}
|
||||
|
||||
enum ask_yn {
|
||||
@ -168,7 +264,8 @@ static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans)
|
||||
|
||||
#endif
|
||||
|
||||
static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
|
||||
static struct fsck_err_state *fsck_err_get(struct bch_fs *c,
|
||||
enum bch_sb_error_id id)
|
||||
{
|
||||
struct fsck_err_state *s;
|
||||
|
||||
@ -176,7 +273,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
|
||||
return NULL;
|
||||
|
||||
list_for_each_entry(s, &c->fsck_error_msgs, list)
|
||||
if (s->fmt == fmt) {
|
||||
if (s->id == id) {
|
||||
/*
|
||||
* move it to the head of the list: repeated fsck errors
|
||||
* are common
|
||||
@ -194,7 +291,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&s->list);
|
||||
s->fmt = fmt;
|
||||
s->id = id;
|
||||
list_add(&s->list, &c->fsck_error_msgs);
|
||||
return s;
|
||||
}
|
||||
@ -244,15 +341,59 @@ static int do_fsck_ask_yn(struct bch_fs *c,
|
||||
return ask;
|
||||
}
|
||||
|
||||
static struct fsck_err_state *count_fsck_err_locked(struct bch_fs *c,
|
||||
enum bch_sb_error_id id, const char *msg,
|
||||
bool *repeat, bool *print, bool *suppress)
|
||||
{
|
||||
bch2_sb_error_count(c, id);
|
||||
|
||||
struct fsck_err_state *s = fsck_err_get(c, id);
|
||||
if (s) {
|
||||
/*
|
||||
* We may be called multiple times for the same error on
|
||||
* transaction restart - this memoizes instead of asking the user
|
||||
* multiple times for the same error:
|
||||
*/
|
||||
if (s->last_msg && !strcmp(msg, s->last_msg)) {
|
||||
*repeat = true;
|
||||
*print = false;
|
||||
return s;
|
||||
}
|
||||
|
||||
kfree(s->last_msg);
|
||||
s->last_msg = kstrdup(msg, GFP_KERNEL);
|
||||
|
||||
if (c->opts.ratelimit_errors &&
|
||||
s->nr >= FSCK_ERR_RATELIMIT_NR) {
|
||||
if (s->nr == FSCK_ERR_RATELIMIT_NR)
|
||||
*suppress = true;
|
||||
else
|
||||
*print = false;
|
||||
}
|
||||
|
||||
s->nr++;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
void __bch2_count_fsck_err(struct bch_fs *c,
|
||||
enum bch_sb_error_id id, const char *msg,
|
||||
bool *repeat, bool *print, bool *suppress)
|
||||
{
|
||||
bch2_sb_error_count(c, id);
|
||||
|
||||
mutex_lock(&c->fsck_error_msgs_lock);
|
||||
count_fsck_err_locked(c, id, msg, repeat, print, suppress);
|
||||
mutex_unlock(&c->fsck_error_msgs_lock);
|
||||
}
|
||||
|
||||
int __bch2_fsck_err(struct bch_fs *c,
|
||||
struct btree_trans *trans,
|
||||
enum bch_fsck_flags flags,
|
||||
enum bch_sb_error_id err,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct fsck_err_state *s = NULL;
|
||||
va_list args;
|
||||
bool print = true, suppressing = false, inconsistent = false, exiting = false;
|
||||
struct printbuf buf = PRINTBUF, *out = &buf;
|
||||
int ret = -BCH_ERR_fsck_ignore;
|
||||
const char *action_orig = "fix?", *action = action_orig;
|
||||
@ -287,7 +428,12 @@ int __bch2_fsck_err(struct bch_fs *c,
|
||||
? -BCH_ERR_fsck_fix
|
||||
: -BCH_ERR_fsck_ignore;
|
||||
|
||||
bch2_sb_error_count(c, err);
|
||||
printbuf_indent_add_nextline(out, 2);
|
||||
|
||||
#ifdef BCACHEFS_LOG_PREFIX
|
||||
if (strncmp(fmt, "bcachefs", 8))
|
||||
prt_printf(out, bch2_log_msg(c, ""));
|
||||
#endif
|
||||
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(out, fmt, args);
|
||||
@ -307,42 +453,15 @@ int __bch2_fsck_err(struct bch_fs *c,
|
||||
}
|
||||
|
||||
mutex_lock(&c->fsck_error_msgs_lock);
|
||||
s = fsck_err_get(c, fmt);
|
||||
if (s) {
|
||||
/*
|
||||
* We may be called multiple times for the same error on
|
||||
* transaction restart - this memoizes instead of asking the user
|
||||
* multiple times for the same error:
|
||||
*/
|
||||
if (s->last_msg && !strcmp(buf.buf, s->last_msg)) {
|
||||
ret = s->ret;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
kfree(s->last_msg);
|
||||
s->last_msg = kstrdup(buf.buf, GFP_KERNEL);
|
||||
if (!s->last_msg) {
|
||||
ret = -ENOMEM;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
if (c->opts.ratelimit_errors &&
|
||||
!(flags & FSCK_NO_RATELIMIT) &&
|
||||
s->nr >= FSCK_ERR_RATELIMIT_NR) {
|
||||
if (s->nr == FSCK_ERR_RATELIMIT_NR)
|
||||
suppressing = true;
|
||||
else
|
||||
print = false;
|
||||
}
|
||||
|
||||
s->nr++;
|
||||
bool repeat = false, print = true, suppress = false;
|
||||
bool inconsistent = false, exiting = false;
|
||||
struct fsck_err_state *s =
|
||||
count_fsck_err_locked(c, err, buf.buf, &repeat, &print, &suppress);
|
||||
if (repeat) {
|
||||
ret = s->ret;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
#ifdef BCACHEFS_LOG_PREFIX
|
||||
if (!strncmp(fmt, "bcachefs:", 9))
|
||||
prt_printf(out, bch2_log_msg(c, ""));
|
||||
#endif
|
||||
|
||||
if ((flags & FSCK_AUTOFIX) &&
|
||||
(c->opts.errors == BCH_ON_ERROR_continue ||
|
||||
c->opts.errors == BCH_ON_ERROR_fix_safe)) {
|
||||
@ -361,6 +480,7 @@ int __bch2_fsck_err(struct bch_fs *c,
|
||||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
|
||||
prt_str(out, ", shutting down");
|
||||
inconsistent = true;
|
||||
print = true;
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed;
|
||||
} else if (flags & FSCK_CAN_FIX) {
|
||||
prt_str(out, ", ");
|
||||
@ -419,24 +539,30 @@ int __bch2_fsck_err(struct bch_fs *c,
|
||||
print = true;
|
||||
}
|
||||
print:
|
||||
prt_newline(out);
|
||||
|
||||
if (inconsistent)
|
||||
__bch2_inconsistent_error(c, out);
|
||||
else if (exiting)
|
||||
prt_printf(out, "Unable to continue, halting\n");
|
||||
else if (suppress)
|
||||
prt_printf(out, "Ratelimiting new instances of previous error\n");
|
||||
|
||||
if (print) {
|
||||
/* possibly strip an empty line, from printbuf_indent_add */
|
||||
while (out->pos && out->buf[out->pos - 1] == ' ')
|
||||
--out->pos;
|
||||
printbuf_nul_terminate(out);
|
||||
|
||||
if (bch2_fs_stdio_redirect(c))
|
||||
bch2_print(c, "%s\n", out->buf);
|
||||
bch2_print(c, "%s", out->buf);
|
||||
else
|
||||
bch2_print_string_as_lines(KERN_ERR, out->buf);
|
||||
}
|
||||
|
||||
if (exiting)
|
||||
bch_err(c, "Unable to continue, halting");
|
||||
else if (suppressing)
|
||||
bch_err(c, "Ratelimiting new instances of previous error");
|
||||
|
||||
if (s)
|
||||
s->ret = ret;
|
||||
|
||||
if (inconsistent)
|
||||
bch2_inconsistent_error(c);
|
||||
|
||||
/*
|
||||
* We don't yet track whether the filesystem currently has errors, for
|
||||
* log_fsck_err()s: that would require us to track for every error type
|
||||
@ -498,16 +624,14 @@ int __bch2_bkey_fsck_err(struct bch_fs *c,
|
||||
prt_printf(&buf, " level=%u: ", from.level);
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
prt_str(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(&buf, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
prt_str(&buf, ": delete?");
|
||||
|
||||
int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf);
|
||||
int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s, delete?", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
@ -520,7 +644,7 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
|
||||
|
||||
list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
|
||||
if (s->ratelimited && s->last_msg)
|
||||
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
|
||||
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
|
||||
|
||||
list_del(&s->list);
|
||||
kfree(s->last_msg);
|
||||
@ -530,35 +654,59 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
|
||||
mutex_unlock(&c->fsck_error_msgs_lock);
|
||||
}
|
||||
|
||||
int bch2_inum_err_msg_trans(struct btree_trans *trans, struct printbuf *out, subvol_inum inum)
|
||||
int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
subvol_inum inum, u64 offset)
|
||||
{
|
||||
u32 restart_count = trans->restart_count;
|
||||
int ret = 0;
|
||||
|
||||
/* XXX: we don't yet attempt to print paths when we don't know the subvol */
|
||||
if (inum.subvol)
|
||||
ret = lockrestart_do(trans, bch2_inum_to_path(trans, inum, out));
|
||||
if (inum.subvol) {
|
||||
ret = bch2_inum_to_path(trans, inum, out);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ret;
|
||||
}
|
||||
if (!inum.subvol || ret)
|
||||
prt_printf(out, "inum %llu:%llu", inum.subvol, inum.inum);
|
||||
prt_printf(out, " offset %llu: ", offset);
|
||||
|
||||
return trans_was_restarted(trans, restart_count);
|
||||
}
|
||||
|
||||
int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
subvol_inum inum, u64 offset)
|
||||
{
|
||||
int ret = bch2_inum_err_msg_trans(trans, out, inum);
|
||||
prt_printf(out, " offset %llu: ", offset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_inum_err_msg(struct bch_fs *c, struct printbuf *out, subvol_inum inum)
|
||||
{
|
||||
bch2_trans_run(c, bch2_inum_err_msg_trans(trans, out, inum));
|
||||
}
|
||||
|
||||
void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out,
|
||||
subvol_inum inum, u64 offset)
|
||||
{
|
||||
bch2_trans_run(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset));
|
||||
bch2_trans_do(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset));
|
||||
}
|
||||
|
||||
int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
struct bpos pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret = 0;
|
||||
|
||||
if (!bch2_snapshot_is_leaf(c, pos.snapshot))
|
||||
prt_str(out, "(multiple snapshots) ");
|
||||
|
||||
subvol_inum inum = {
|
||||
.subvol = bch2_snapshot_tree_oldest_subvol(c, pos.snapshot),
|
||||
.inum = pos.inode,
|
||||
};
|
||||
|
||||
if (inum.subvol) {
|
||||
ret = bch2_inum_to_path(trans, inum, out);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!inum.subvol || ret)
|
||||
prt_printf(out, "inum %llu:%u", pos.inode, pos.snapshot);
|
||||
|
||||
prt_printf(out, " offset %llu: ", pos.offset << 8);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_inum_snap_offset_err_msg(struct bch_fs *c, struct printbuf *out,
|
||||
struct bpos pos)
|
||||
{
|
||||
bch2_trans_do(c, bch2_inum_snap_offset_err_msg_trans(trans, out, pos));
|
||||
}
|
||||
|
@ -18,6 +18,8 @@ struct work_struct;
|
||||
|
||||
/* Error messages: */
|
||||
|
||||
void bch2_log_msg_start(struct bch_fs *, struct printbuf *);
|
||||
|
||||
/*
|
||||
* Inconsistency errors: The on disk data is inconsistent. If these occur during
|
||||
* initial recovery, they don't indicate a bug in the running code - we walk all
|
||||
@ -29,21 +31,10 @@ struct work_struct;
|
||||
* BCH_ON_ERROR_CONTINUE mode
|
||||
*/
|
||||
|
||||
bool __bch2_inconsistent_error(struct bch_fs *, struct printbuf *);
|
||||
bool bch2_inconsistent_error(struct bch_fs *);
|
||||
|
||||
int bch2_topology_error(struct bch_fs *);
|
||||
|
||||
#define bch2_fs_topology_error(c, ...) \
|
||||
({ \
|
||||
bch_err(c, "btree topology error: " __VA_ARGS__); \
|
||||
bch2_topology_error(c); \
|
||||
})
|
||||
|
||||
#define bch2_fs_inconsistent(c, ...) \
|
||||
({ \
|
||||
bch_err(c, __VA_ARGS__); \
|
||||
bch2_inconsistent_error(c); \
|
||||
})
|
||||
__printf(2, 3)
|
||||
bool bch2_fs_inconsistent(struct bch_fs *, const char *, ...);
|
||||
|
||||
#define bch2_fs_inconsistent_on(cond, ...) \
|
||||
({ \
|
||||
@ -53,26 +44,21 @@ int bch2_topology_error(struct bch_fs *);
|
||||
_ret; \
|
||||
})
|
||||
|
||||
/*
|
||||
* When a transaction update discovers or is causing a fs inconsistency, it's
|
||||
* helpful to also dump the pending updates:
|
||||
*/
|
||||
#define bch2_trans_inconsistent(trans, ...) \
|
||||
({ \
|
||||
bch_err(trans->c, __VA_ARGS__); \
|
||||
bch2_dump_trans_updates(trans); \
|
||||
bch2_inconsistent_error(trans->c); \
|
||||
})
|
||||
__printf(2, 3)
|
||||
bool bch2_trans_inconsistent(struct btree_trans *, const char *, ...);
|
||||
|
||||
#define bch2_trans_inconsistent_on(cond, trans, ...) \
|
||||
#define bch2_trans_inconsistent_on(cond, ...) \
|
||||
({ \
|
||||
bool _ret = unlikely(!!(cond)); \
|
||||
\
|
||||
if (_ret) \
|
||||
bch2_trans_inconsistent(trans, __VA_ARGS__); \
|
||||
bch2_trans_inconsistent(__VA_ARGS__); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
int __bch2_topology_error(struct bch_fs *, struct printbuf *);
|
||||
__printf(2, 3)
|
||||
int bch2_fs_topology_error(struct bch_fs *, const char *, ...);
|
||||
|
||||
/*
|
||||
* Fsck errors: inconsistency errors we detect at mount time, and should ideally
|
||||
* be able to repair:
|
||||
@ -80,7 +66,7 @@ int bch2_topology_error(struct bch_fs *);
|
||||
|
||||
struct fsck_err_state {
|
||||
struct list_head list;
|
||||
const char *fmt;
|
||||
enum bch_sb_error_id id;
|
||||
u64 nr;
|
||||
bool ratelimited;
|
||||
int ret;
|
||||
@ -90,6 +76,12 @@ struct fsck_err_state {
|
||||
|
||||
#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
|
||||
|
||||
void __bch2_count_fsck_err(struct bch_fs *,
|
||||
enum bch_sb_error_id, const char *,
|
||||
bool *, bool *, bool *);
|
||||
#define bch2_count_fsck_err(_c, _err, ...) \
|
||||
__bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__)
|
||||
|
||||
__printf(5, 6) __cold
|
||||
int __bch2_fsck_err(struct bch_fs *, struct btree_trans *,
|
||||
enum bch_fsck_flags,
|
||||
@ -216,32 +208,43 @@ void bch2_io_error_work(struct work_struct *);
|
||||
/* Does the error handling without logging a message */
|
||||
void bch2_io_error(struct bch_dev *, enum bch_member_error_type);
|
||||
|
||||
#define bch2_dev_io_err_on(cond, ca, _type, ...) \
|
||||
({ \
|
||||
bool _ret = (cond); \
|
||||
\
|
||||
if (_ret) { \
|
||||
bch_err_dev_ratelimited(ca, __VA_ARGS__); \
|
||||
bch2_io_error(ca, _type); \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
|
||||
void bch2_latency_acct(struct bch_dev *, u64, int);
|
||||
#else
|
||||
static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {}
|
||||
#endif
|
||||
|
||||
#define bch2_dev_inum_io_err_on(cond, ca, _type, ...) \
|
||||
({ \
|
||||
bool _ret = (cond); \
|
||||
\
|
||||
if (_ret) { \
|
||||
bch_err_inum_offset_ratelimited(ca, __VA_ARGS__); \
|
||||
bch2_io_error(ca, _type); \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
static inline void bch2_account_io_success_fail(struct bch_dev *ca,
|
||||
enum bch_member_error_type type,
|
||||
bool success)
|
||||
{
|
||||
if (likely(success)) {
|
||||
if (type == BCH_MEMBER_ERROR_write &&
|
||||
ca->write_errors_start)
|
||||
ca->write_errors_start = 0;
|
||||
} else {
|
||||
bch2_io_error(ca, type);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void bch2_account_io_completion(struct bch_dev *ca,
|
||||
enum bch_member_error_type type,
|
||||
u64 submit_time, bool success)
|
||||
{
|
||||
if (unlikely(!ca))
|
||||
return;
|
||||
|
||||
if (type != BCH_MEMBER_ERROR_checksum)
|
||||
bch2_latency_acct(ca, submit_time, type);
|
||||
|
||||
bch2_account_io_success_fail(ca, type, success);
|
||||
}
|
||||
|
||||
int bch2_inum_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum);
|
||||
int bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum, u64);
|
||||
|
||||
void bch2_inum_err_msg(struct bch_fs *, struct printbuf *, subvol_inum);
|
||||
void bch2_inum_offset_err_msg(struct bch_fs *, struct printbuf *, subvol_inum, u64);
|
||||
|
||||
int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *, struct printbuf *, struct bpos);
|
||||
void bch2_inum_snap_offset_err_msg(struct bch_fs *, struct printbuf *, struct bpos);
|
||||
|
||||
#endif /* _BCACHEFS_ERROR_H */
|
||||
|
@ -28,6 +28,13 @@
|
||||
#include "trace.h"
|
||||
#include "util.h"
|
||||
|
||||
static const char * const bch2_extent_flags_strs[] = {
|
||||
#define x(n, v) [BCH_EXTENT_FLAG_##n] = #n,
|
||||
BCH_EXTENT_FLAGS()
|
||||
#undef x
|
||||
NULL,
|
||||
};
|
||||
|
||||
static unsigned bch2_crc_field_size_max[] = {
|
||||
[BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
|
||||
[BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
|
||||
@ -51,7 +58,8 @@ struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f,
|
||||
}
|
||||
|
||||
void bch2_mark_io_failure(struct bch_io_failures *failed,
|
||||
struct extent_ptr_decoded *p)
|
||||
struct extent_ptr_decoded *p,
|
||||
bool csum_error)
|
||||
{
|
||||
struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, p->ptr.dev);
|
||||
|
||||
@ -59,53 +67,57 @@ void bch2_mark_io_failure(struct bch_io_failures *failed,
|
||||
BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
|
||||
|
||||
f = &failed->devs[failed->nr++];
|
||||
f->dev = p->ptr.dev;
|
||||
f->idx = p->idx;
|
||||
f->nr_failed = 1;
|
||||
f->nr_retries = 0;
|
||||
} else if (p->idx != f->idx) {
|
||||
f->idx = p->idx;
|
||||
f->nr_failed = 1;
|
||||
f->nr_retries = 0;
|
||||
} else {
|
||||
f->nr_failed++;
|
||||
memset(f, 0, sizeof(*f));
|
||||
f->dev = p->ptr.dev;
|
||||
}
|
||||
|
||||
if (p->do_ec_reconstruct)
|
||||
f->failed_ec = true;
|
||||
else if (!csum_error)
|
||||
f->failed_io = true;
|
||||
else
|
||||
f->failed_csum_nr++;
|
||||
}
|
||||
|
||||
static inline u64 dev_latency(struct bch_fs *c, unsigned dev)
|
||||
static inline u64 dev_latency(struct bch_dev *ca)
|
||||
{
|
||||
struct bch_dev *ca = bch2_dev_rcu(c, dev);
|
||||
return ca ? atomic64_read(&ca->cur_latency[READ]) : S64_MAX;
|
||||
}
|
||||
|
||||
static inline int dev_failed(struct bch_dev *ca)
|
||||
{
|
||||
return !ca || ca->mi.state == BCH_MEMBER_STATE_failed;
|
||||
}
|
||||
|
||||
/*
|
||||
* returns true if p1 is better than p2:
|
||||
*/
|
||||
static inline bool ptr_better(struct bch_fs *c,
|
||||
const struct extent_ptr_decoded p1,
|
||||
const struct extent_ptr_decoded p2)
|
||||
u64 p1_latency,
|
||||
struct bch_dev *ca1,
|
||||
const struct extent_ptr_decoded p2,
|
||||
u64 p2_latency)
|
||||
{
|
||||
if (likely(!p1.idx && !p2.idx)) {
|
||||
u64 l1 = dev_latency(c, p1.ptr.dev);
|
||||
u64 l2 = dev_latency(c, p2.ptr.dev);
|
||||
struct bch_dev *ca2 = bch2_dev_rcu(c, p2.ptr.dev);
|
||||
|
||||
/*
|
||||
* Square the latencies, to bias more in favor of the faster
|
||||
* device - we never want to stop issuing reads to the slower
|
||||
* device altogether, so that we can update our latency numbers:
|
||||
*/
|
||||
l1 *= l1;
|
||||
l2 *= l2;
|
||||
int failed_delta = dev_failed(ca1) - dev_failed(ca2);
|
||||
if (unlikely(failed_delta))
|
||||
return failed_delta < 0;
|
||||
|
||||
/* Pick at random, biased in favor of the faster device: */
|
||||
if (unlikely(bch2_force_reconstruct_read))
|
||||
return p1.do_ec_reconstruct > p2.do_ec_reconstruct;
|
||||
|
||||
return bch2_rand_range(l1 + l2) > l1;
|
||||
}
|
||||
if (unlikely(p1.do_ec_reconstruct || p2.do_ec_reconstruct))
|
||||
return p1.do_ec_reconstruct < p2.do_ec_reconstruct;
|
||||
|
||||
if (bch2_force_reconstruct_read)
|
||||
return p1.idx > p2.idx;
|
||||
int crc_retry_delta = (int) p1.crc_retry_nr - (int) p2.crc_retry_nr;
|
||||
if (unlikely(crc_retry_delta))
|
||||
return crc_retry_delta < 0;
|
||||
|
||||
return p1.idx < p2.idx;
|
||||
/* Pick at random, biased in favor of the faster device: */
|
||||
|
||||
return bch2_get_random_u64_below(p1_latency + p2_latency) > p1_latency;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -114,70 +126,112 @@ static inline bool ptr_better(struct bch_fs *c,
|
||||
* other devices, it will still pick a pointer from avoid.
|
||||
*/
|
||||
int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bch_io_failures *failed,
|
||||
struct extent_ptr_decoded *pick,
|
||||
int dev)
|
||||
struct bch_io_failures *failed,
|
||||
struct extent_ptr_decoded *pick,
|
||||
int dev)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
struct bch_dev_io_failures *f;
|
||||
int ret = 0;
|
||||
bool have_csum_errors = false, have_io_errors = false, have_missing_devs = false;
|
||||
bool have_dirty_ptrs = false, have_pick = false;
|
||||
|
||||
if (k.k->type == KEY_TYPE_error)
|
||||
return -BCH_ERR_key_type_error;
|
||||
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
|
||||
if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
|
||||
return -BCH_ERR_extent_poisened;
|
||||
|
||||
rcu_read_lock();
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
u64 pick_latency;
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
have_dirty_ptrs |= !p.ptr.cached;
|
||||
|
||||
/*
|
||||
* Unwritten extent: no need to actually read, treat it as a
|
||||
* hole and return 0s:
|
||||
*/
|
||||
if (p.ptr.unwritten) {
|
||||
ret = 0;
|
||||
break;
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Are we being asked to read from a specific device? */
|
||||
if (dev >= 0 && p.ptr.dev != dev)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If there are any dirty pointers it's an error if we can't
|
||||
* read:
|
||||
*/
|
||||
if (!ret && !p.ptr.cached)
|
||||
ret = -BCH_ERR_no_device_to_read_from;
|
||||
|
||||
struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
|
||||
|
||||
if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr)))
|
||||
continue;
|
||||
|
||||
f = failed ? bch2_dev_io_failures(failed, p.ptr.dev) : NULL;
|
||||
if (f)
|
||||
p.idx = f->nr_failed < f->nr_retries
|
||||
? f->idx
|
||||
: f->idx + 1;
|
||||
struct bch_dev_io_failures *f =
|
||||
unlikely(failed) ? bch2_dev_io_failures(failed, p.ptr.dev) : NULL;
|
||||
if (unlikely(f)) {
|
||||
p.crc_retry_nr = f->failed_csum_nr;
|
||||
p.has_ec &= ~f->failed_ec;
|
||||
|
||||
if (!p.idx && (!ca || !bch2_dev_is_readable(ca)))
|
||||
p.idx++;
|
||||
if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) {
|
||||
have_io_errors |= f->failed_io;
|
||||
have_io_errors |= f->failed_ec;
|
||||
}
|
||||
have_csum_errors |= !!f->failed_csum_nr;
|
||||
|
||||
if (!p.idx && p.has_ec && bch2_force_reconstruct_read)
|
||||
p.idx++;
|
||||
if (p.has_ec && (f->failed_io || f->failed_csum_nr))
|
||||
p.do_ec_reconstruct = true;
|
||||
else if (f->failed_io ||
|
||||
f->failed_csum_nr > c->opts.checksum_err_retry_nr)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (p.idx > (unsigned) p.has_ec)
|
||||
continue;
|
||||
have_missing_devs |= ca && !bch2_dev_is_online(ca);
|
||||
|
||||
if (ret > 0 && !ptr_better(c, p, *pick))
|
||||
continue;
|
||||
if (!ca || !bch2_dev_is_online(ca)) {
|
||||
if (!p.has_ec)
|
||||
continue;
|
||||
p.do_ec_reconstruct = true;
|
||||
}
|
||||
|
||||
*pick = p;
|
||||
ret = 1;
|
||||
if (bch2_force_reconstruct_read && p.has_ec)
|
||||
p.do_ec_reconstruct = true;
|
||||
|
||||
u64 p_latency = dev_latency(ca);
|
||||
/*
|
||||
* Square the latencies, to bias more in favor of the faster
|
||||
* device - we never want to stop issuing reads to the slower
|
||||
* device altogether, so that we can update our latency numbers:
|
||||
*/
|
||||
p_latency *= p_latency;
|
||||
|
||||
if (!have_pick ||
|
||||
ptr_better(c,
|
||||
p, p_latency, ca,
|
||||
*pick, pick_latency)) {
|
||||
*pick = p;
|
||||
pick_latency = p_latency;
|
||||
have_pick = true;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
if (have_pick)
|
||||
return 1;
|
||||
if (!have_dirty_ptrs)
|
||||
return 0;
|
||||
if (have_missing_devs)
|
||||
return -BCH_ERR_no_device_to_read_from;
|
||||
if (have_csum_errors)
|
||||
return -BCH_ERR_data_read_csum_err;
|
||||
if (have_io_errors)
|
||||
return -BCH_ERR_data_read_io_err;
|
||||
|
||||
/*
|
||||
* If we get here, we have pointers (bkey_ptrs_validate() ensures that),
|
||||
* but they don't point to valid devices:
|
||||
*/
|
||||
return -BCH_ERR_no_devices_valid;
|
||||
}
|
||||
|
||||
/* KEY_TYPE_btree_ptr: */
|
||||
@ -541,29 +595,35 @@ static void bch2_extent_crc_pack(union bch_extent_crc *dst,
|
||||
struct bch_extent_crc_unpacked src,
|
||||
enum bch_extent_entry_type type)
|
||||
{
|
||||
#define set_common_fields(_dst, _src) \
|
||||
_dst.type = 1 << type; \
|
||||
_dst.csum_type = _src.csum_type, \
|
||||
_dst.compression_type = _src.compression_type, \
|
||||
_dst._compressed_size = _src.compressed_size - 1, \
|
||||
_dst._uncompressed_size = _src.uncompressed_size - 1, \
|
||||
_dst.offset = _src.offset
|
||||
#define common_fields(_src) \
|
||||
.type = BIT(type), \
|
||||
.csum_type = _src.csum_type, \
|
||||
.compression_type = _src.compression_type, \
|
||||
._compressed_size = _src.compressed_size - 1, \
|
||||
._uncompressed_size = _src.uncompressed_size - 1, \
|
||||
.offset = _src.offset
|
||||
|
||||
switch (type) {
|
||||
case BCH_EXTENT_ENTRY_crc32:
|
||||
set_common_fields(dst->crc32, src);
|
||||
dst->crc32.csum = (u32 __force) *((__le32 *) &src.csum.lo);
|
||||
dst->crc32 = (struct bch_extent_crc32) {
|
||||
common_fields(src),
|
||||
.csum = (u32 __force) *((__le32 *) &src.csum.lo),
|
||||
};
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_crc64:
|
||||
set_common_fields(dst->crc64, src);
|
||||
dst->crc64.nonce = src.nonce;
|
||||
dst->crc64.csum_lo = (u64 __force) src.csum.lo;
|
||||
dst->crc64.csum_hi = (u64 __force) *((__le16 *) &src.csum.hi);
|
||||
dst->crc64 = (struct bch_extent_crc64) {
|
||||
common_fields(src),
|
||||
.nonce = src.nonce,
|
||||
.csum_lo = (u64 __force) src.csum.lo,
|
||||
.csum_hi = (u64 __force) *((__le16 *) &src.csum.hi),
|
||||
};
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_crc128:
|
||||
set_common_fields(dst->crc128, src);
|
||||
dst->crc128.nonce = src.nonce;
|
||||
dst->crc128.csum = src.csum;
|
||||
dst->crc128 = (struct bch_extent_crc128) {
|
||||
common_fields(src),
|
||||
.nonce = src.nonce,
|
||||
.csum = src.csum,
|
||||
};
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
@ -1002,7 +1062,7 @@ static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts,
|
||||
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
|
||||
|
||||
return ca && bch2_dev_is_readable(ca) && !dev_ptr_stale_rcu(ca, ptr);
|
||||
return ca && bch2_dev_is_healthy(ca) && !dev_ptr_stale_rcu(ca, ptr);
|
||||
}
|
||||
|
||||
void bch2_extent_ptr_set_cached(struct bch_fs *c,
|
||||
@ -1225,6 +1285,10 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
bch2_extent_rebalance_to_text(out, c, &entry->rebalance);
|
||||
break;
|
||||
|
||||
case BCH_EXTENT_ENTRY_flags:
|
||||
prt_bitflags(out, bch2_extent_flags_strs, entry->flags.flags);
|
||||
break;
|
||||
|
||||
default:
|
||||
prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
|
||||
return;
|
||||
@ -1386,6 +1450,11 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case BCH_EXTENT_ENTRY_flags:
|
||||
bkey_fsck_err_on(entry != ptrs.start,
|
||||
c, extent_flags_not_at_start,
|
||||
"extent flags entry not at start");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1452,6 +1521,28 @@ void bch2_ptr_swab(struct bkey_s k)
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_bkey_extent_flags_set(struct bch_fs *c, struct bkey_i *k, u64 flags)
|
||||
{
|
||||
int ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_extent_flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
|
||||
|
||||
if (ptrs.start != ptrs.end &&
|
||||
extent_entry_type(ptrs.start) == BCH_EXTENT_ENTRY_flags) {
|
||||
ptrs.start->flags.flags = flags;
|
||||
} else {
|
||||
struct bch_extent_flags f = {
|
||||
.type = BIT(BCH_EXTENT_ENTRY_flags),
|
||||
.flags = flags,
|
||||
};
|
||||
__extent_entry_insert(k, ptrs.start, (union bch_extent_entry *) &f);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Generic extent code: */
|
||||
|
||||
int bch2_cut_front_s(struct bpos where, struct bkey_s k)
|
||||
@ -1497,8 +1588,8 @@ int bch2_cut_front_s(struct bpos where, struct bkey_s k)
|
||||
entry->crc128.offset += sub;
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_stripe_ptr:
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_rebalance:
|
||||
case BCH_EXTENT_ENTRY_flags:
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -320,8 +320,9 @@ static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k)
|
||||
({ \
|
||||
__label__ out; \
|
||||
\
|
||||
(_ptr).idx = 0; \
|
||||
(_ptr).has_ec = false; \
|
||||
(_ptr).has_ec = false; \
|
||||
(_ptr).do_ec_reconstruct = false; \
|
||||
(_ptr).crc_retry_nr = 0; \
|
||||
\
|
||||
__bkey_extent_entry_for_each_from(_entry, _end, _entry) \
|
||||
switch (__extent_entry_type(_entry)) { \
|
||||
@ -401,7 +402,7 @@ out: \
|
||||
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *,
|
||||
unsigned);
|
||||
void bch2_mark_io_failure(struct bch_io_failures *,
|
||||
struct extent_ptr_decoded *);
|
||||
struct extent_ptr_decoded *, bool);
|
||||
int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
|
||||
struct bch_io_failures *,
|
||||
struct extent_ptr_decoded *, int);
|
||||
@ -704,7 +705,7 @@ static inline bool bch2_extent_ptr_eq(struct bch_extent_ptr ptr1,
|
||||
ptr1.unwritten == ptr2.unwritten &&
|
||||
ptr1.offset == ptr2.offset &&
|
||||
ptr1.dev == ptr2.dev &&
|
||||
ptr1.dev == ptr2.dev);
|
||||
ptr1.gen == ptr2.gen);
|
||||
}
|
||||
|
||||
void bch2_ptr_swab(struct bkey_s);
|
||||
@ -753,4 +754,19 @@ static inline void bch2_key_resize(struct bkey *k, unsigned new_size)
|
||||
k->size = new_size;
|
||||
}
|
||||
|
||||
static inline u64 bch2_bkey_extent_ptrs_flags(struct bkey_ptrs_c ptrs)
|
||||
{
|
||||
if (ptrs.start != ptrs.end &&
|
||||
extent_entry_type(ptrs.start) == BCH_EXTENT_ENTRY_flags)
|
||||
return ptrs.start->flags.flags;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u64 bch2_bkey_extent_flags(struct bkey_s_c k)
|
||||
{
|
||||
return bch2_bkey_extent_ptrs_flags(bch2_bkey_ptrs_c(k));
|
||||
}
|
||||
|
||||
int bch2_bkey_extent_flags_set(struct bch_fs *, struct bkey_i *, u64);
|
||||
|
||||
#endif /* _BCACHEFS_EXTENTS_H */
|
||||
|
@ -79,8 +79,9 @@
|
||||
x(crc64, 2) \
|
||||
x(crc128, 3) \
|
||||
x(stripe_ptr, 4) \
|
||||
x(rebalance, 5)
|
||||
#define BCH_EXTENT_ENTRY_MAX 6
|
||||
x(rebalance, 5) \
|
||||
x(flags, 6)
|
||||
#define BCH_EXTENT_ENTRY_MAX 7
|
||||
|
||||
enum bch_extent_entry_type {
|
||||
#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
|
||||
@ -201,6 +202,25 @@ struct bch_extent_stripe_ptr {
|
||||
#endif
|
||||
};
|
||||
|
||||
#define BCH_EXTENT_FLAGS() \
|
||||
x(poisoned, 0)
|
||||
|
||||
enum bch_extent_flags_e {
|
||||
#define x(n, v) BCH_EXTENT_FLAG_##n = v,
|
||||
BCH_EXTENT_FLAGS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct bch_extent_flags {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:7,
|
||||
flags:57;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 flags:57,
|
||||
type:7;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* bch_extent_rebalance: */
|
||||
#include "rebalance_format.h"
|
||||
|
||||
|
@ -20,8 +20,9 @@ struct bch_extent_crc_unpacked {
|
||||
};
|
||||
|
||||
struct extent_ptr_decoded {
|
||||
unsigned idx;
|
||||
bool has_ec;
|
||||
bool do_ec_reconstruct;
|
||||
u8 crc_retry_nr;
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
struct bch_extent_ptr ptr;
|
||||
struct bch_extent_stripe_ptr ec;
|
||||
@ -31,10 +32,10 @@ struct bch_io_failures {
|
||||
u8 nr;
|
||||
struct bch_dev_io_failures {
|
||||
u8 dev;
|
||||
u8 idx;
|
||||
u8 nr_failed;
|
||||
u8 nr_retries;
|
||||
} devs[BCH_REPLICAS_MAX];
|
||||
unsigned failed_csum_nr:6,
|
||||
failed_io:1,
|
||||
failed_ec:1;
|
||||
} devs[BCH_REPLICAS_MAX + 1];
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_EXTENTS_TYPES_H */
|
||||
|
@ -148,87 +148,97 @@ static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *pr
|
||||
return cmp(a, b, priv);
|
||||
}
|
||||
|
||||
static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
|
||||
static inline int eytzinger1_do_cmp(void *base1, size_t n, size_t size,
|
||||
cmp_r_func_t cmp_func, const void *priv,
|
||||
size_t l, size_t r)
|
||||
{
|
||||
return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
return do_cmp(base1 + inorder_to_eytzinger1(l, n) * size,
|
||||
base1 + inorder_to_eytzinger1(r, n) * size,
|
||||
cmp_func, priv);
|
||||
}
|
||||
|
||||
static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
|
||||
static inline void eytzinger1_do_swap(void *base1, size_t n, size_t size,
|
||||
swap_r_func_t swap_func, const void *priv,
|
||||
size_t l, size_t r)
|
||||
{
|
||||
do_swap(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
do_swap(base1 + inorder_to_eytzinger1(l, n) * size,
|
||||
base1 + inorder_to_eytzinger1(r, n) * size,
|
||||
size, swap_func, priv);
|
||||
}
|
||||
|
||||
static void eytzinger1_sort_r(void *base1, size_t n, size_t size,
|
||||
cmp_r_func_t cmp_func,
|
||||
swap_r_func_t swap_func,
|
||||
const void *priv)
|
||||
{
|
||||
unsigned i, j, k;
|
||||
|
||||
/* called from 'sort' without swap function, let's pick the default */
|
||||
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
|
||||
swap_func = NULL;
|
||||
|
||||
if (!swap_func) {
|
||||
if (is_aligned(base1, size, 8))
|
||||
swap_func = SWAP_WORDS_64;
|
||||
else if (is_aligned(base1, size, 4))
|
||||
swap_func = SWAP_WORDS_32;
|
||||
else
|
||||
swap_func = SWAP_BYTES;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for (i = n / 2; i >= 1; --i) {
|
||||
/* Find the sift-down path all the way to the leaves. */
|
||||
for (j = i; k = j * 2, k < n;)
|
||||
j = eytzinger1_do_cmp(base1, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
|
||||
|
||||
/* Special case for the last leaf with no sibling. */
|
||||
if (j * 2 == n)
|
||||
j *= 2;
|
||||
|
||||
/* Backtrack to the correct location. */
|
||||
while (j != i && eytzinger1_do_cmp(base1, n, size, cmp_func, priv, i, j) >= 0)
|
||||
j /= 2;
|
||||
|
||||
/* Shift the element into its correct place. */
|
||||
for (k = j; j != i;) {
|
||||
j /= 2;
|
||||
eytzinger1_do_swap(base1, n, size, swap_func, priv, j, k);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n; i > 1; --i) {
|
||||
eytzinger1_do_swap(base1, n, size, swap_func, priv, 1, i);
|
||||
|
||||
/* Find the sift-down path all the way to the leaves. */
|
||||
for (j = 1; k = j * 2, k + 1 < i;)
|
||||
j = eytzinger1_do_cmp(base1, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
|
||||
|
||||
/* Special case for the last leaf with no sibling. */
|
||||
if (j * 2 + 1 == i)
|
||||
j *= 2;
|
||||
|
||||
/* Backtrack to the correct location. */
|
||||
while (j >= 1 && eytzinger1_do_cmp(base1, n, size, cmp_func, priv, 1, j) >= 0)
|
||||
j /= 2;
|
||||
|
||||
/* Shift the element into its correct place. */
|
||||
for (k = j; j > 1;) {
|
||||
j /= 2;
|
||||
eytzinger1_do_swap(base1, n, size, swap_func, priv, j, k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void eytzinger0_sort_r(void *base, size_t n, size_t size,
|
||||
cmp_r_func_t cmp_func,
|
||||
swap_r_func_t swap_func,
|
||||
const void *priv)
|
||||
{
|
||||
int i, j, k;
|
||||
void *base1 = base - size;
|
||||
|
||||
/* called from 'sort' without swap function, let's pick the default */
|
||||
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
|
||||
swap_func = NULL;
|
||||
|
||||
if (!swap_func) {
|
||||
if (is_aligned(base, size, 8))
|
||||
swap_func = SWAP_WORDS_64;
|
||||
else if (is_aligned(base, size, 4))
|
||||
swap_func = SWAP_WORDS_32;
|
||||
else
|
||||
swap_func = SWAP_BYTES;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for (i = n / 2 - 1; i >= 0; --i) {
|
||||
/* Find the sift-down path all the way to the leaves. */
|
||||
for (j = i; k = j * 2 + 1, k + 1 < n;)
|
||||
j = eytzinger0_do_cmp(base, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
|
||||
|
||||
/* Special case for the last leaf with no sibling. */
|
||||
if (j * 2 + 2 == n)
|
||||
j = j * 2 + 1;
|
||||
|
||||
/* Backtrack to the correct location. */
|
||||
while (j != i && eytzinger0_do_cmp(base, n, size, cmp_func, priv, i, j) >= 0)
|
||||
j = (j - 1) / 2;
|
||||
|
||||
/* Shift the element into its correct place. */
|
||||
for (k = j; j != i;) {
|
||||
j = (j - 1) / 2;
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, j, k);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - 1; i > 0; --i) {
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
|
||||
|
||||
/* Find the sift-down path all the way to the leaves. */
|
||||
for (j = 0; k = j * 2 + 1, k + 1 < i;)
|
||||
j = eytzinger0_do_cmp(base, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
|
||||
|
||||
/* Special case for the last leaf with no sibling. */
|
||||
if (j * 2 + 2 == i)
|
||||
j = j * 2 + 1;
|
||||
|
||||
/* Backtrack to the correct location. */
|
||||
while (j && eytzinger0_do_cmp(base, n, size, cmp_func, priv, 0, j) >= 0)
|
||||
j = (j - 1) / 2;
|
||||
|
||||
/* Shift the element into its correct place. */
|
||||
for (k = j; j;) {
|
||||
j = (j - 1) / 2;
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, j, k);
|
||||
}
|
||||
}
|
||||
return eytzinger1_sort_r(base1, n, size, cmp_func, swap_func, priv);
|
||||
}
|
||||
|
||||
void eytzinger0_sort(void *base, size_t n, size_t size,
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <linux/log2.h>
|
||||
|
||||
#ifdef EYTZINGER_DEBUG
|
||||
#include <linux/bug.h>
|
||||
#define EYTZINGER_BUG_ON(cond) BUG_ON(cond)
|
||||
#else
|
||||
#define EYTZINGER_BUG_ON(cond)
|
||||
@ -56,24 +57,14 @@ static inline unsigned eytzinger1_last(unsigned size)
|
||||
return rounddown_pow_of_two(size + 1) - 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* eytzinger1_next() and eytzinger1_prev() have the nice properties that
|
||||
*
|
||||
* eytzinger1_next(0) == eytzinger1_first())
|
||||
* eytzinger1_prev(0) == eytzinger1_last())
|
||||
*
|
||||
* eytzinger1_prev(eytzinger1_first()) == 0
|
||||
* eytzinger1_next(eytzinger1_last()) == 0
|
||||
*/
|
||||
|
||||
static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
||||
{
|
||||
EYTZINGER_BUG_ON(i > size);
|
||||
EYTZINGER_BUG_ON(i == 0 || i > size);
|
||||
|
||||
if (eytzinger1_right_child(i) <= size) {
|
||||
i = eytzinger1_right_child(i);
|
||||
|
||||
i <<= __fls(size + 1) - __fls(i);
|
||||
i <<= __fls(size) - __fls(i);
|
||||
i >>= i > size;
|
||||
} else {
|
||||
i >>= ffz(i) + 1;
|
||||
@ -84,12 +75,12 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
||||
|
||||
static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
|
||||
{
|
||||
EYTZINGER_BUG_ON(i > size);
|
||||
EYTZINGER_BUG_ON(i == 0 || i > size);
|
||||
|
||||
if (eytzinger1_left_child(i) <= size) {
|
||||
i = eytzinger1_left_child(i) + 1;
|
||||
|
||||
i <<= __fls(size + 1) - __fls(i);
|
||||
i <<= __fls(size) - __fls(i);
|
||||
i -= 1;
|
||||
i >>= i > size;
|
||||
} else {
|
||||
@ -243,73 +234,63 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
|
||||
(_i) != -1; \
|
||||
(_i) = eytzinger0_next((_i), (_size)))
|
||||
|
||||
#define eytzinger0_for_each_prev(_i, _size) \
|
||||
for (unsigned (_i) = eytzinger0_last((_size)); \
|
||||
(_i) != -1; \
|
||||
(_i) = eytzinger0_prev((_i), (_size)))
|
||||
|
||||
/* return greatest node <= @search, or -1 if not found */
|
||||
static inline int eytzinger0_find_le(void *base, size_t nr, size_t size,
|
||||
cmp_func_t cmp, const void *search)
|
||||
{
|
||||
unsigned i, n = 0;
|
||||
void *base1 = base - size;
|
||||
unsigned n = 1;
|
||||
|
||||
if (!nr)
|
||||
return -1;
|
||||
|
||||
do {
|
||||
i = n;
|
||||
n = eytzinger0_child(i, cmp(base + i * size, search) <= 0);
|
||||
} while (n < nr);
|
||||
|
||||
if (n & 1) {
|
||||
/*
|
||||
* @i was greater than @search, return previous node:
|
||||
*
|
||||
* if @i was leftmost/smallest element,
|
||||
* eytzinger0_prev(eytzinger0_first())) returns -1, as expected
|
||||
*/
|
||||
return eytzinger0_prev(i, nr);
|
||||
} else {
|
||||
return i;
|
||||
}
|
||||
while (n <= nr)
|
||||
n = eytzinger1_child(n, cmp(base1 + n * size, search) <= 0);
|
||||
n >>= __ffs(n) + 1;
|
||||
return n - 1;
|
||||
}
|
||||
|
||||
/* return smallest node > @search, or -1 if not found */
|
||||
static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size,
|
||||
cmp_func_t cmp, const void *search)
|
||||
{
|
||||
ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
|
||||
void *base1 = base - size;
|
||||
unsigned n = 1;
|
||||
|
||||
/*
|
||||
* if eytitzinger0_find_le() returned -1 - no element was <= search - we
|
||||
* want to return the first element; next/prev identities mean this work
|
||||
* as expected
|
||||
*
|
||||
* similarly if find_le() returns last element, we should return -1;
|
||||
* identities mean this all works out:
|
||||
*/
|
||||
return eytzinger0_next(idx, nr);
|
||||
while (n <= nr)
|
||||
n = eytzinger1_child(n, cmp(base1 + n * size, search) <= 0);
|
||||
n >>= __ffs(n + 1) + 1;
|
||||
return n - 1;
|
||||
}
|
||||
|
||||
/* return smallest node >= @search, or -1 if not found */
|
||||
static inline int eytzinger0_find_ge(void *base, size_t nr, size_t size,
|
||||
cmp_func_t cmp, const void *search)
|
||||
{
|
||||
ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
|
||||
void *base1 = base - size;
|
||||
unsigned n = 1;
|
||||
|
||||
if (idx < nr && !cmp(base + idx * size, search))
|
||||
return idx;
|
||||
|
||||
return eytzinger0_next(idx, nr);
|
||||
while (n <= nr)
|
||||
n = eytzinger1_child(n, cmp(base1 + n * size, search) < 0);
|
||||
n >>= __ffs(n + 1) + 1;
|
||||
return n - 1;
|
||||
}
|
||||
|
||||
#define eytzinger0_find(base, nr, size, _cmp, search) \
|
||||
({ \
|
||||
void *_base = (base); \
|
||||
size_t _size = (size); \
|
||||
void *_base1 = (void *)(base) - _size; \
|
||||
const void *_search = (search); \
|
||||
size_t _nr = (nr); \
|
||||
size_t _size = (size); \
|
||||
size_t _i = 0; \
|
||||
size_t _i = 1; \
|
||||
int _res; \
|
||||
\
|
||||
while (_i < _nr && \
|
||||
(_res = _cmp(_search, _base + _i * _size))) \
|
||||
_i = eytzinger0_child(_i, _res > 0); \
|
||||
_i; \
|
||||
while (_i <= _nr && \
|
||||
(_res = _cmp(_search, _base1 + _i * _size))) \
|
||||
_i = eytzinger1_child(_i, _res > 0); \
|
||||
_i - 1; \
|
||||
})
|
||||
|
||||
void eytzinger0_sort_r(void *, size_t, size_t,
|
||||
|
@ -110,11 +110,21 @@ static int readpage_bio_extend(struct btree_trans *trans,
|
||||
if (!get_more)
|
||||
break;
|
||||
|
||||
unsigned sectors_remaining = sectors_this_extent - bio_sectors(bio);
|
||||
|
||||
if (sectors_remaining < PAGE_SECTORS << mapping_min_folio_order(iter->mapping))
|
||||
break;
|
||||
|
||||
unsigned order = ilog2(rounddown_pow_of_two(sectors_remaining) / PAGE_SECTORS);
|
||||
|
||||
/* ensure proper alignment */
|
||||
order = min(order, __ffs(folio_offset|BIT(31)));
|
||||
|
||||
folio = xa_load(&iter->mapping->i_pages, folio_offset);
|
||||
if (folio && !xa_is_value(folio))
|
||||
break;
|
||||
|
||||
folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), 0);
|
||||
folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), order);
|
||||
if (!folio)
|
||||
break;
|
||||
|
||||
@ -215,11 +225,11 @@ static void bchfs_read(struct btree_trans *trans,
|
||||
|
||||
bch2_read_extent(trans, rbio, iter.pos,
|
||||
data_btree, k, offset_into_extent, flags);
|
||||
swap(rbio->bio.bi_iter.bi_size, bytes);
|
||||
|
||||
if (flags & BCH_READ_last_fragment)
|
||||
break;
|
||||
|
||||
swap(rbio->bio.bi_iter.bi_size, bytes);
|
||||
bio_advance(&rbio->bio, bytes);
|
||||
err:
|
||||
if (ret &&
|
||||
@ -230,7 +240,8 @@ err:
|
||||
|
||||
if (ret) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9);
|
||||
lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9));
|
||||
prt_printf(&buf, "read error %i from btree lookup", ret);
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
@ -466,6 +466,7 @@ int bchfs_truncate(struct mnt_idmap *idmap,
|
||||
ret = bch2_truncate_folio(inode, iattr->ia_size);
|
||||
if (unlikely(ret < 0))
|
||||
goto err;
|
||||
ret = 0;
|
||||
|
||||
truncate_setsize(&inode->v, iattr->ia_size);
|
||||
|
||||
@ -998,17 +999,28 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
|
||||
POS(inode->v.i_ino, offset >> 9),
|
||||
POS(inode->v.i_ino, U64_MAX),
|
||||
inum.subvol, BTREE_ITER_slots, k, ({
|
||||
if (k.k->p.inode != inode->v.i_ino) {
|
||||
next_hole = bch2_seek_pagecache_hole(&inode->v,
|
||||
offset, MAX_LFS_FILESIZE, 0, false);
|
||||
break;
|
||||
} else if (!bkey_extent_is_data(k.k)) {
|
||||
next_hole = bch2_seek_pagecache_hole(&inode->v,
|
||||
max(offset, bkey_start_offset(k.k) << 9),
|
||||
k.k->p.offset << 9, 0, false);
|
||||
if (k.k->p.inode != inode->v.i_ino ||
|
||||
!bkey_extent_is_data(k.k)) {
|
||||
loff_t start_offset = k.k->p.inode == inode->v.i_ino
|
||||
? max(offset, bkey_start_offset(k.k) << 9)
|
||||
: offset;
|
||||
loff_t end_offset = k.k->p.inode == inode->v.i_ino
|
||||
? MAX_LFS_FILESIZE
|
||||
: k.k->p.offset << 9;
|
||||
|
||||
if (next_hole < k.k->p.offset << 9)
|
||||
/*
|
||||
* Found a hole in the btree, now make sure it's
|
||||
* a hole in the pagecache. We might have to
|
||||
* keep searching if this hole is entirely dirty
|
||||
* in the page cache:
|
||||
*/
|
||||
bch2_trans_unlock(trans);
|
||||
loff_t pagecache_hole = bch2_seek_pagecache_hole(&inode->v,
|
||||
start_offset, end_offset, 0, false);
|
||||
if (pagecache_hole < end_offset) {
|
||||
next_hole = pagecache_hole;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
offset = max(offset, bkey_start_offset(k.k) << 9);
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
#include "chardev.h"
|
||||
#include "dirent.h"
|
||||
#include "fs.h"
|
||||
#include "fs-common.h"
|
||||
#include "fs-ioctl.h"
|
||||
#include "namei.h"
|
||||
#include "quota.h"
|
||||
|
||||
#include <linux/compat.h>
|
||||
@ -54,6 +54,32 @@ static int bch2_inode_flags_set(struct btree_trans *trans,
|
||||
(newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
|
||||
return -EINVAL;
|
||||
|
||||
if ((newflags ^ oldflags) & BCH_INODE_casefolded) {
|
||||
#ifdef CONFIG_UNICODE
|
||||
int ret = 0;
|
||||
/* Not supported on individual files. */
|
||||
if (!S_ISDIR(bi->bi_mode))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/*
|
||||
* Make sure the dir is empty, as otherwise we'd need to
|
||||
* rehash everything and update the dirent keys.
|
||||
*/
|
||||
ret = bch2_empty_dir_trans(trans, inode_inum(inode));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = bch2_request_incompat_feature(c,bcachefs_metadata_version_casefolding);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_check_set_feature(c, BCH_FEATURE_casefolding);
|
||||
#else
|
||||
printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
|
||||
return -EOPNOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (s->set_projinherit) {
|
||||
bi->bi_fields_set &= ~(1 << Inode_opt_project);
|
||||
bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
|
||||
@ -218,7 +244,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
|
||||
int ret = 0;
|
||||
subvol_inum inum;
|
||||
|
||||
kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL);
|
||||
kname = kmalloc(BCH_NAME_MAX, GFP_KERNEL);
|
||||
if (!kname)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -515,10 +541,12 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
ret = __bch2_unlink(dir, victim, true);
|
||||
|
||||
ret = inode_permission(file_mnt_idmap(filp), d_inode(victim), MAY_WRITE) ?:
|
||||
__bch2_unlink(dir, victim, true);
|
||||
if (!ret) {
|
||||
fsnotify_rmdir(dir, victim);
|
||||
d_delete(victim);
|
||||
d_invalidate(victim);
|
||||
}
|
||||
err:
|
||||
inode_unlock(dir);
|
||||
|
@ -6,19 +6,21 @@
|
||||
|
||||
/* bcachefs inode flags -> vfs inode flags: */
|
||||
static const __maybe_unused unsigned bch_flags_to_vfs[] = {
|
||||
[__BCH_INODE_sync] = S_SYNC,
|
||||
[__BCH_INODE_immutable] = S_IMMUTABLE,
|
||||
[__BCH_INODE_append] = S_APPEND,
|
||||
[__BCH_INODE_noatime] = S_NOATIME,
|
||||
[__BCH_INODE_sync] = S_SYNC,
|
||||
[__BCH_INODE_immutable] = S_IMMUTABLE,
|
||||
[__BCH_INODE_append] = S_APPEND,
|
||||
[__BCH_INODE_noatime] = S_NOATIME,
|
||||
[__BCH_INODE_casefolded] = S_CASEFOLD,
|
||||
};
|
||||
|
||||
/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
|
||||
static const __maybe_unused unsigned bch_flags_to_uflags[] = {
|
||||
[__BCH_INODE_sync] = FS_SYNC_FL,
|
||||
[__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
|
||||
[__BCH_INODE_append] = FS_APPEND_FL,
|
||||
[__BCH_INODE_nodump] = FS_NODUMP_FL,
|
||||
[__BCH_INODE_noatime] = FS_NOATIME_FL,
|
||||
[__BCH_INODE_sync] = FS_SYNC_FL,
|
||||
[__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
|
||||
[__BCH_INODE_append] = FS_APPEND_FL,
|
||||
[__BCH_INODE_nodump] = FS_NODUMP_FL,
|
||||
[__BCH_INODE_noatime] = FS_NOATIME_FL,
|
||||
[__BCH_INODE_casefolded] = FS_CASEFOLD_FL,
|
||||
};
|
||||
|
||||
/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
|
||||
|
148
libbcachefs/fs.c
148
libbcachefs/fs.c
@ -11,7 +11,6 @@
|
||||
#include "errcode.h"
|
||||
#include "extents.h"
|
||||
#include "fs.h"
|
||||
#include "fs-common.h"
|
||||
#include "fs-io.h"
|
||||
#include "fs-ioctl.h"
|
||||
#include "fs-io-buffered.h"
|
||||
@ -22,6 +21,7 @@
|
||||
#include "io_read.h"
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "namei.h"
|
||||
#include "quota.h"
|
||||
#include "rebalance.h"
|
||||
#include "snapshot.h"
|
||||
@ -641,7 +641,9 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), &inum);
|
||||
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
|
||||
|
||||
ret = bch2_dirent_read_target(trans, dir, d, &inum);
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
if (ret)
|
||||
@ -651,30 +653,30 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
|
||||
if (inode)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Note: if check/repair needs it, we commit before
|
||||
* bch2_inode_hash_init_insert(), as after that point we can't take a
|
||||
* restart - not in the top level loop with a commit_do(), like we
|
||||
* usually do:
|
||||
*/
|
||||
|
||||
struct bch_subvolume subvol;
|
||||
struct bch_inode_unpacked inode_u;
|
||||
ret = bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
|
||||
bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?:
|
||||
bch2_check_dirent_target(trans, &dirent_iter, d, &inode_u, false) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
|
||||
PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
|
||||
|
||||
/*
|
||||
* don't remove it: check_inodes might find another inode that points
|
||||
* back to this dirent
|
||||
*/
|
||||
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
|
||||
c, "dirent to missing inode:\n %s",
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
c, "dirent to missing inode:\n%s",
|
||||
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* regular files may have hardlinks: */
|
||||
if (bch2_fs_inconsistent_on(bch2_inode_should_have_single_bp(&inode_u) &&
|
||||
!bkey_eq(k.k->p, POS(inode_u.bi_dir, inode_u.bi_dir_offset)),
|
||||
c,
|
||||
"dirent points to inode that does not point back:\n %s",
|
||||
(bch2_bkey_val_to_text(&buf, c, k),
|
||||
prt_printf(&buf, "\n "),
|
||||
bch2_inode_unpacked_to_text(&buf, &inode_u),
|
||||
buf.buf))) {
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &dirent_iter);
|
||||
printbuf_exit(&buf);
|
||||
@ -698,6 +700,23 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
|
||||
if (IS_ERR(inode))
|
||||
inode = NULL;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
if (!inode && IS_CASEFOLDED(vdir)) {
|
||||
/*
|
||||
* Do not cache a negative dentry in casefolded directories
|
||||
* as it would need to be invalidated in the following situation:
|
||||
* - Lookup file "blAH" in a casefolded directory
|
||||
* - Creation of file "BLAH" in a casefolded directory
|
||||
* - Lookup file "blAH" in a casefolded directory
|
||||
* which would fail if we had a negative dentry.
|
||||
*
|
||||
* We should come back to this when VFS has a method to handle
|
||||
* this edgecase.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
return d_splice_alias(&inode->v, dentry);
|
||||
}
|
||||
|
||||
@ -1802,7 +1821,8 @@ static void bch2_vfs_inode_init(struct btree_trans *trans,
|
||||
break;
|
||||
}
|
||||
|
||||
mapping_set_large_folios(inode->v.i_mapping);
|
||||
mapping_set_folio_min_order(inode->v.i_mapping,
|
||||
get_order(trans->c->opts.block_size));
|
||||
}
|
||||
|
||||
static void bch2_free_inode(struct inode *vinode)
|
||||
@ -2008,44 +2028,6 @@ static struct bch_fs *bch2_path_to_fs(const char *path)
|
||||
return c ?: ERR_PTR(-ENOENT);
|
||||
}
|
||||
|
||||
static int bch2_remount(struct super_block *sb, int *flags,
|
||||
struct bch_opts opts)
|
||||
{
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
int ret = 0;
|
||||
|
||||
opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
|
||||
|
||||
if (opts.read_only != c->opts.read_only) {
|
||||
down_write(&c->state_lock);
|
||||
|
||||
if (opts.read_only) {
|
||||
bch2_fs_read_only(c);
|
||||
|
||||
sb->s_flags |= SB_RDONLY;
|
||||
} else {
|
||||
ret = bch2_fs_read_write(c);
|
||||
if (ret) {
|
||||
bch_err(c, "error going rw: %i", ret);
|
||||
up_write(&c->state_lock);
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
sb->s_flags &= ~SB_RDONLY;
|
||||
}
|
||||
|
||||
c->opts.read_only = opts.read_only;
|
||||
|
||||
up_write(&c->state_lock);
|
||||
}
|
||||
|
||||
if (opt_defined(opts, errors))
|
||||
c->opts.errors = opts.errors;
|
||||
err:
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
|
||||
{
|
||||
struct bch_fs *c = root->d_sb->s_fs_info;
|
||||
@ -2192,17 +2174,21 @@ static int bch2_fs_get_tree(struct fs_context *fc)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (opt_defined(opts, discard))
|
||||
set_bit(BCH_FS_discard_mount_opt_set, &c->flags);
|
||||
|
||||
/* Some options can't be parsed until after the fs is started: */
|
||||
opts = bch2_opts_empty();
|
||||
ret = bch2_parse_mount_opts(c, &opts, NULL, opts_parse->parse_later.buf);
|
||||
ret = bch2_parse_mount_opts(c, &opts, NULL, opts_parse->parse_later.buf, false);
|
||||
if (ret)
|
||||
goto err_stop_fs;
|
||||
|
||||
bch2_opts_apply(&c->opts, opts);
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
if (ret)
|
||||
goto err_stop_fs;
|
||||
/*
|
||||
* need to initialise sb and set c->vfs_sb _before_ starting fs,
|
||||
* for blk_holder_ops
|
||||
*/
|
||||
|
||||
sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c);
|
||||
ret = PTR_ERR_OR_ZERO(sb);
|
||||
@ -2264,6 +2250,10 @@ got_sb:
|
||||
|
||||
sb->s_shrink->seeks = 0;
|
||||
|
||||
ret = bch2_fs_start(c);
|
||||
if (ret)
|
||||
goto err_put_super;
|
||||
|
||||
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
|
||||
ret = PTR_ERR_OR_ZERO(vinode);
|
||||
bch_err_msg(c, ret, "mounting: error getting root inode");
|
||||
@ -2300,7 +2290,8 @@ err_stop_fs:
|
||||
goto err;
|
||||
|
||||
err_put_super:
|
||||
__bch2_fs_stop(c);
|
||||
if (!sb->s_root)
|
||||
__bch2_fs_stop(c);
|
||||
deactivate_locked_super(sb);
|
||||
goto err;
|
||||
}
|
||||
@ -2343,6 +2334,8 @@ static int bch2_fs_parse_param(struct fs_context *fc,
|
||||
int ret = bch2_parse_one_mount_opt(c, &opts->opts,
|
||||
&opts->parse_later, param->key,
|
||||
param->string);
|
||||
if (ret)
|
||||
pr_err("Error parsing option %s: %s", param->key, bch2_err_str(ret));
|
||||
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
@ -2351,8 +2344,39 @@ static int bch2_fs_reconfigure(struct fs_context *fc)
|
||||
{
|
||||
struct super_block *sb = fc->root->d_sb;
|
||||
struct bch2_opts_parse *opts = fc->fs_private;
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
int ret = 0;
|
||||
|
||||
return bch2_remount(sb, &fc->sb_flags, opts->opts);
|
||||
opt_set(opts->opts, read_only, (fc->sb_flags & SB_RDONLY) != 0);
|
||||
|
||||
if (opts->opts.read_only != c->opts.read_only) {
|
||||
down_write(&c->state_lock);
|
||||
|
||||
if (opts->opts.read_only) {
|
||||
bch2_fs_read_only(c);
|
||||
|
||||
sb->s_flags |= SB_RDONLY;
|
||||
} else {
|
||||
ret = bch2_fs_read_write(c);
|
||||
if (ret) {
|
||||
bch_err(c, "error going rw: %i", ret);
|
||||
up_write(&c->state_lock);
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
sb->s_flags &= ~SB_RDONLY;
|
||||
}
|
||||
|
||||
c->opts.read_only = opts->opts.read_only;
|
||||
|
||||
up_write(&c->state_lock);
|
||||
}
|
||||
|
||||
if (opt_defined(opts->opts, errors))
|
||||
c->opts.errors = opts->opts.errors;
|
||||
err:
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
static const struct fs_context_operations bch2_context_ops = {
|
||||
|
@ -10,10 +10,10 @@
|
||||
#include "dirent.h"
|
||||
#include "error.h"
|
||||
#include "fs.h"
|
||||
#include "fs-common.h"
|
||||
#include "fsck.h"
|
||||
#include "inode.h"
|
||||
#include "keylist.h"
|
||||
#include "namei.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "snapshot.h"
|
||||
#include "super.h"
|
||||
@ -23,13 +23,6 @@
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/dcache.h> /* struct qstr */
|
||||
|
||||
static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
|
||||
struct bkey_s_c_dirent d)
|
||||
{
|
||||
return inode->bi_dir == d.k->p.inode &&
|
||||
inode->bi_dir_offset == d.k->p.offset;
|
||||
}
|
||||
|
||||
static int dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
@ -116,29 +109,6 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr),
|
||||
BTREE_ITER_all_snapshots, k, ret) {
|
||||
if (k.k->p.offset != inode_nr)
|
||||
break;
|
||||
if (!bkey_is_inode(k.k))
|
||||
continue;
|
||||
ret = bch2_inode_unpack(k, inode);
|
||||
goto found;
|
||||
}
|
||||
ret = -BCH_ERR_ENOENT_inode;
|
||||
found:
|
||||
bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lookup_inode(struct btree_trans *trans, u64 inode_nr, u32 snapshot,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
@ -179,32 +149,6 @@ static int lookup_dirent_in_snapshot(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bch_inode_unpacked dir_inode;
|
||||
struct bch_hash_info dir_hash_info;
|
||||
int ret;
|
||||
|
||||
ret = lookup_first_inode(trans, pos.inode, &dir_inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
dir_hash_info = bch2_hash_info_init(c, &dir_inode);
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent);
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
|
||||
&dir_hash_info, &iter,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
err:
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find any subvolume associated with a tree of snapshots
|
||||
* We can't rely on master_subvol - it might have been deleted.
|
||||
@ -548,7 +492,7 @@ static int remove_backpointer(struct btree_trans *trans,
|
||||
SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot));
|
||||
int ret = bkey_err(d) ?:
|
||||
dirent_points_to_inode(c, d, inode) ?:
|
||||
__remove_dirent(trans, d.k->p);
|
||||
bch2_fsck_remove_dirent(trans, d.k->p);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
@ -823,6 +767,7 @@ struct inode_walker_entry {
|
||||
struct bch_inode_unpacked inode;
|
||||
u32 snapshot;
|
||||
u64 count;
|
||||
u64 i_size;
|
||||
};
|
||||
|
||||
struct inode_walker {
|
||||
@ -910,8 +855,9 @@ found:
|
||||
if (k.k->p.snapshot != i->snapshot && !is_whiteout) {
|
||||
struct inode_walker_entry new = *i;
|
||||
|
||||
new.snapshot = k.k->p.snapshot;
|
||||
new.count = 0;
|
||||
new.snapshot = k.k->p.snapshot;
|
||||
new.count = 0;
|
||||
new.i_size = 0;
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
@ -1116,37 +1062,6 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_directory_size(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode_u,
|
||||
struct bkey_s_c inode_k, bool *write_inode)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 new_size = 0;
|
||||
int ret;
|
||||
|
||||
for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents,
|
||||
SPOS(inode_k.k->p.offset, 0, inode_k.k->p.snapshot),
|
||||
POS(inode_k.k->p.offset, U64_MAX),
|
||||
0, k, ret) {
|
||||
if (k.k->type != KEY_TYPE_dirent)
|
||||
continue;
|
||||
|
||||
struct bkey_s_c_dirent dirent = bkey_s_c_to_dirent(k);
|
||||
struct qstr name = bch2_dirent_get_name(dirent);
|
||||
|
||||
new_size += dirent_occupied_size(&name);
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (!ret && inode_u->bi_size != new_size) {
|
||||
inode_u->bi_size = new_size;
|
||||
*write_inode = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_inode(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
@ -1335,16 +1250,6 @@ static int check_inode(struct btree_trans *trans,
|
||||
u.bi_journal_seq = journal_cur_seq(&c->journal);
|
||||
do_update = true;
|
||||
}
|
||||
|
||||
if (S_ISDIR(u.bi_mode)) {
|
||||
ret = check_directory_size(trans, &u, k, &do_update);
|
||||
|
||||
fsck_err_on(ret,
|
||||
trans, directory_size_mismatch,
|
||||
"directory inode %llu:%u with the mismatch directory size",
|
||||
u.bi_inum, k.k->p.snapshot);
|
||||
ret = 0;
|
||||
}
|
||||
do_update:
|
||||
if (do_update) {
|
||||
ret = __bch2_fsck_write_inode(trans, &u);
|
||||
@ -1516,14 +1421,14 @@ static int check_key_has_inode(struct btree_trans *trans,
|
||||
|
||||
if (fsck_err_on(!i,
|
||||
trans, key_in_missing_inode,
|
||||
"key in missing inode:\n %s",
|
||||
"key in missing inode:\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
|
||||
goto delete;
|
||||
|
||||
if (fsck_err_on(i && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode),
|
||||
trans, key_in_wrong_inode_type,
|
||||
"key for wrong inode mode %o:\n %s",
|
||||
"key for wrong inode mode %o:\n%s",
|
||||
i->inode.bi_mode,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
|
||||
@ -1666,13 +1571,13 @@ static int overlapping_extents_found(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
prt_str(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, k1);
|
||||
|
||||
if (!bpos_eq(pos1, k1.k->p)) {
|
||||
prt_str(&buf, "\n wanted\n ");
|
||||
prt_str(&buf, "\nwanted\n ");
|
||||
bch2_bpos_to_text(&buf, pos1);
|
||||
prt_str(&buf, "\n ");
|
||||
prt_str(&buf, "\n");
|
||||
bch2_bkey_to_text(&buf, &pos2);
|
||||
|
||||
bch_err(c, "%s: error finding first overlapping extent when repairing, got%s",
|
||||
@ -1695,7 +1600,7 @@ static int overlapping_extents_found(struct btree_trans *trans,
|
||||
break;
|
||||
}
|
||||
|
||||
prt_str(&buf, "\n ");
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, k2);
|
||||
|
||||
if (bpos_gt(k2.k->p, pos2.p) ||
|
||||
@ -1706,7 +1611,7 @@ static int overlapping_extents_found(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
prt_printf(&buf, "\n overwriting %s extent",
|
||||
prt_printf(&buf, "\noverwriting %s extent",
|
||||
pos1.snapshot >= pos2.p.snapshot ? "first" : "second");
|
||||
|
||||
if (fsck_err(trans, extent_overlapping,
|
||||
@ -1727,6 +1632,8 @@ static int overlapping_extents_found(struct btree_trans *trans,
|
||||
bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
|
||||
bch_info(c, "repair ret %s", bch2_err_str(ret));
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -1879,7 +1786,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
|
||||
!bkey_extent_is_reservation(k),
|
||||
trans, extent_past_end_of_inode,
|
||||
"extent type past end of inode %llu:%u, i_size %llu\n %s",
|
||||
"extent type past end of inode %llu:%u, i_size %llu\n%s",
|
||||
i->inode.bi_inum, i->snapshot, i->inode.bi_size,
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||
struct btree_iter iter2;
|
||||
@ -2017,176 +1924,13 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
|
||||
static int check_subdir_dirents_count(struct btree_trans *trans, struct inode_walker *w)
|
||||
{
|
||||
u32 restart_count = trans->restart_count;
|
||||
return check_subdir_count_notnested(trans, w) ?:
|
||||
trans_was_restarted(trans, restart_count);
|
||||
}
|
||||
|
||||
noinline_for_stack
|
||||
static int check_dirent_inode_dirent(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c_dirent d,
|
||||
struct bch_inode_unpacked *target)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct btree_iter bp_iter = { NULL };
|
||||
int ret = 0;
|
||||
|
||||
if (inode_points_to_dirent(target, d))
|
||||
return 0;
|
||||
|
||||
if (!target->bi_dir &&
|
||||
!target->bi_dir_offset) {
|
||||
fsck_err_on(S_ISDIR(target->bi_mode),
|
||||
trans, inode_dir_missing_backpointer,
|
||||
"directory with missing backpointer\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, d.s_c),
|
||||
prt_printf(&buf, "\n"),
|
||||
bch2_inode_unpacked_to_text(&buf, target),
|
||||
buf.buf));
|
||||
|
||||
fsck_err_on(target->bi_flags & BCH_INODE_unlinked,
|
||||
trans, inode_unlinked_but_has_dirent,
|
||||
"inode unlinked but has dirent\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, d.s_c),
|
||||
prt_printf(&buf, "\n"),
|
||||
bch2_inode_unpacked_to_text(&buf, target),
|
||||
buf.buf));
|
||||
|
||||
target->bi_flags &= ~BCH_INODE_unlinked;
|
||||
target->bi_dir = d.k->p.inode;
|
||||
target->bi_dir_offset = d.k->p.offset;
|
||||
return __bch2_fsck_write_inode(trans, target);
|
||||
}
|
||||
|
||||
if (bch2_inode_should_have_single_bp(target) &&
|
||||
!fsck_err(trans, inode_wrong_backpointer,
|
||||
"dirent points to inode that does not point back:\n %s",
|
||||
(bch2_bkey_val_to_text(&buf, c, d.s_c),
|
||||
prt_printf(&buf, "\n "),
|
||||
bch2_inode_unpacked_to_text(&buf, target),
|
||||
buf.buf)))
|
||||
goto err;
|
||||
|
||||
struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
|
||||
SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot));
|
||||
ret = bkey_err(bp_dirent);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
goto err;
|
||||
|
||||
bool backpointer_exists = !ret;
|
||||
ret = 0;
|
||||
|
||||
if (fsck_err_on(!backpointer_exists,
|
||||
trans, inode_wrong_backpointer,
|
||||
"inode %llu:%u has wrong backpointer:\n"
|
||||
"got %llu:%llu\n"
|
||||
"should be %llu:%llu",
|
||||
target->bi_inum, target->bi_snapshot,
|
||||
target->bi_dir,
|
||||
target->bi_dir_offset,
|
||||
d.k->p.inode,
|
||||
d.k->p.offset)) {
|
||||
target->bi_dir = d.k->p.inode;
|
||||
target->bi_dir_offset = d.k->p.offset;
|
||||
ret = __bch2_fsck_write_inode(trans, target);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, d.s_c);
|
||||
prt_newline(&buf);
|
||||
if (backpointer_exists)
|
||||
bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
|
||||
|
||||
if (fsck_err_on(backpointer_exists &&
|
||||
(S_ISDIR(target->bi_mode) ||
|
||||
target->bi_subvol),
|
||||
trans, inode_dir_multiple_links,
|
||||
"%s %llu:%u with multiple links\n%s",
|
||||
S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
|
||||
target->bi_inum, target->bi_snapshot, buf.buf)) {
|
||||
ret = __remove_dirent(trans, d.k->p);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* hardlinked file with nlink 0:
|
||||
* We're just adjusting nlink here so check_nlinks() will pick
|
||||
* it up, it ignores inodes with nlink 0
|
||||
*/
|
||||
if (fsck_err_on(backpointer_exists && !target->bi_nlink,
|
||||
trans, inode_multiple_links_but_nlink_0,
|
||||
"inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
|
||||
target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
|
||||
target->bi_nlink++;
|
||||
target->bi_flags &= ~BCH_INODE_unlinked;
|
||||
ret = __bch2_fsck_write_inode(trans, target);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
out:
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
printbuf_exit(&buf);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
noinline_for_stack
|
||||
static int check_dirent_target(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c_dirent d,
|
||||
struct bch_inode_unpacked *target)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i_dirent *n;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
ret = check_dirent_inode_dirent(trans, iter, d, target);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (fsck_err_on(d.v->d_type != inode_d_type(target),
|
||||
trans, dirent_d_type_wrong,
|
||||
"incorrect d_type: got %s, should be %s:\n%s",
|
||||
bch2_d_type_str(d.v->d_type),
|
||||
bch2_d_type_str(inode_d_type(target)),
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
|
||||
n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bkey_reassemble(&n->k_i, d.s_c);
|
||||
n->v.d_type = inode_d_type(target);
|
||||
if (n->v.d_type == DT_SUBVOL) {
|
||||
n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
|
||||
n->v.d_child_subvol = cpu_to_le32(target->bi_subvol);
|
||||
} else {
|
||||
n->v.d_inum = cpu_to_le64(target->bi_inum);
|
||||
}
|
||||
|
||||
ret = bch2_trans_update(trans, iter, &n->k_i, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
d = dirent_i_to_s_c(n);
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* find a subvolume that's a descendent of @snapshot: */
|
||||
static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid)
|
||||
{
|
||||
@ -2286,7 +2030,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
|
||||
if (fsck_err(trans, dirent_to_missing_subvol,
|
||||
"dirent points to missing subvolume\n%s",
|
||||
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)))
|
||||
return __remove_dirent(trans, d.k->p);
|
||||
return bch2_fsck_remove_dirent(trans, d.k->p);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
@ -2330,7 +2074,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = check_dirent_target(trans, iter, d, &subvol_root);
|
||||
ret = bch2_check_dirent_target(trans, iter, d, &subvol_root, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
out:
|
||||
@ -2367,7 +2111,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
goto out;
|
||||
|
||||
if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) {
|
||||
ret = check_subdir_count(trans, dir);
|
||||
ret = check_subdir_dirents_count(trans, dir);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -2417,13 +2161,13 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k),
|
||||
buf.buf))) {
|
||||
ret = __remove_dirent(trans, d.k->p);
|
||||
ret = bch2_fsck_remove_dirent(trans, d.k->p);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
darray_for_each(target->inodes, i) {
|
||||
ret = check_dirent_target(trans, iter, d, &i->inode);
|
||||
ret = bch2_check_dirent_target(trans, iter, d, &i->inode, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -2457,9 +2201,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (d.v->d_type == DT_DIR)
|
||||
for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
|
||||
for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) {
|
||||
if (d.v->d_type == DT_DIR)
|
||||
i->count++;
|
||||
i->i_size += bkey_bytes(d.k);
|
||||
}
|
||||
out:
|
||||
err:
|
||||
fsck_err:
|
||||
@ -3277,7 +3023,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
|
||||
if (arg.opts) {
|
||||
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
|
||||
ret = PTR_ERR_OR_ZERO(optstr) ?:
|
||||
bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr);
|
||||
bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr, false);
|
||||
if (!IS_ERR(optstr))
|
||||
kfree(optstr);
|
||||
|
||||
@ -3385,7 +3131,7 @@ long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg)
|
||||
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(optstr) ?:
|
||||
bch2_parse_mount_opts(c, &thr->opts, NULL, optstr);
|
||||
bch2_parse_mount_opts(c, &thr->opts, NULL, optstr, false);
|
||||
if (!IS_ERR(optstr))
|
||||
kfree(optstr);
|
||||
|
||||
|
@ -731,10 +731,9 @@ int bch2_trigger_inode(struct btree_trans *trans,
|
||||
bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq);
|
||||
}
|
||||
|
||||
s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k);
|
||||
if ((flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) && nr) {
|
||||
struct disk_accounting_pos acc = { .type = BCH_DISK_ACCOUNTING_nr_inodes };
|
||||
int ret = bch2_disk_accounting_mod(trans, &acc, &nr, 1, flags & BTREE_TRIGGER_gc);
|
||||
s64 nr[1] = { bkey_is_inode(new.k) - bkey_is_inode(old.k) };
|
||||
if ((flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) && nr[0]) {
|
||||
int ret = bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, nr, nr_inodes);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -868,19 +867,6 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
|
||||
uid, gid, mode, rdev, parent);
|
||||
}
|
||||
|
||||
static inline u32 bkey_generation(struct bkey_s_c k)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_inode:
|
||||
case KEY_TYPE_inode_v2:
|
||||
BUG();
|
||||
case KEY_TYPE_inode_generation:
|
||||
return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static struct bkey_i_inode_alloc_cursor *
|
||||
bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max)
|
||||
{
|
||||
@ -1092,7 +1078,7 @@ retry:
|
||||
bch2_fs_inconsistent(c,
|
||||
"inode %llu:%u not found when deleting",
|
||||
inum.inum, snapshot);
|
||||
ret = -EIO;
|
||||
ret = -BCH_ERR_ENOENT_inode;
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1198,6 +1184,7 @@ void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c,
|
||||
opts->_name##_from_inode = true; \
|
||||
} else { \
|
||||
opts->_name = c->opts._name; \
|
||||
opts->_name##_from_inode = false; \
|
||||
}
|
||||
BCH_INODE_OPTS()
|
||||
#undef x
|
||||
@ -1255,7 +1242,7 @@ retry:
|
||||
bch2_fs_inconsistent(c,
|
||||
"inode %llu:%u not found when deleting",
|
||||
inum, snapshot);
|
||||
ret = -EIO;
|
||||
ret = -BCH_ERR_ENOENT_inode;
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -277,6 +277,7 @@ static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *i
|
||||
bool inode_has_bp = inode->bi_dir || inode->bi_dir_offset;
|
||||
|
||||
return S_ISDIR(inode->bi_mode) ||
|
||||
inode->bi_subvol ||
|
||||
(!inode->bi_nlink && inode_has_bp);
|
||||
}
|
||||
|
||||
|
@ -137,7 +137,8 @@ enum inode_opt_id {
|
||||
x(i_sectors_dirty, 6) \
|
||||
x(unlinked, 7) \
|
||||
x(backptr_untrusted, 8) \
|
||||
x(has_child_snapshot, 9)
|
||||
x(has_child_snapshot, 9) \
|
||||
x(casefolded, 10)
|
||||
|
||||
/* bits 20+ reserved for packed fields below: */
|
||||
|
||||
|
@ -115,7 +115,8 @@ err:
|
||||
bch2_increment_clock(c, sectors_allocated, WRITE);
|
||||
if (should_print_err(ret)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9);
|
||||
lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9));
|
||||
prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user