mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-03-30 00:00:04 +03:00
Update bcachefs sources to 1392e502d48b bcachefs: Add an "ignore unknown" option to bch2_parse_mount_opts()
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
6657ce2de3
commit
8af5c93e48
.bcachefs_revision
c_src
include/linux
libbcachefs
alloc_background.calloc_foreground.calloc_foreground.hbackpointers.cbkey.hbtree_io.cbtree_iter.hbtree_locking.cbtree_trans_commit.cbtree_update.cbtree_update.hbtree_update_interior.cbuckets.cchecksum.ccompress.cdata_update.cdirent.cdirent.hdisk_accounting.cdisk_accounting.hdisk_accounting_format.hec.cec.herrcode.herror.cextents.cfs-io-buffered.cfs-ioctl.cfs.cfsck.cinode.cinode.hio_read.cio_read.hio_write.cio_write.hjournal.cjournal_io.cjournal_reclaim.cmove.cmove_types.hnamei.cnamei.hopts.copts.hrebalance.crecovery.csb-counters_format.hsb-errors_format.hsuper.csysfs.ctime_stats.ctime_stats.htrace.hutil.h
@ -1 +1 @@
|
||||
dbe591cee299957e282eb7857edea35050b1d8b5
|
||||
e2e7dcddb3660e90a972473bb10de570964754d7
|
||||
|
@ -207,9 +207,8 @@ int cmd_format(int argc, char *argv[])
|
||||
force = true;
|
||||
break;
|
||||
case O_fs_size:
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.opts.fs_size))
|
||||
if (bch2_strtoull_h(optarg, &dev_opts.fs_size))
|
||||
die("invalid filesystem size");
|
||||
dev_opts.opts.fs_size_defined = true;
|
||||
unconsumed_dev_option = true;
|
||||
break;
|
||||
case O_superblock_size:
|
||||
@ -233,8 +232,7 @@ int cmd_format(int argc, char *argv[])
|
||||
darray_push(&device_paths, optarg);
|
||||
dev_opts.path = optarg;
|
||||
darray_push(&devices, dev_opts);
|
||||
dev_opts.opts.fs_size = 0;
|
||||
dev_opts.opts.fs_size_defined = 0;
|
||||
dev_opts.fs_size = 0;
|
||||
unconsumed_dev_option = false;
|
||||
break;
|
||||
case O_quiet:
|
||||
|
@ -326,7 +326,7 @@ kernel_fsck_err:
|
||||
} else {
|
||||
userland_fsck:
|
||||
printf("Running userspace offline fsck\n");
|
||||
ret = bch2_parse_mount_opts(NULL, &opts, &parse_later, opts_str.buf);
|
||||
ret = bch2_parse_mount_opts(NULL, &opts, &parse_later, opts_str.buf, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -228,9 +228,10 @@ static int migrate_fs(const char *fs_path,
|
||||
printf("Creating new filesystem on %s in space reserved at %s\n",
|
||||
dev->path, file_path);
|
||||
|
||||
dev->opts.fs_size = get_size(dev->bdev->bd_fd);
|
||||
dev->opts.bucket_size = bch2_pick_bucket_size(fs_opts, devs);
|
||||
dev->nbuckets = dev->opts.fs_size / dev->opts.bucket_size;
|
||||
dev->fs_size = get_size(dev->bdev->bd_fd);
|
||||
opt_set(dev->opts, bucket_size, bch2_pick_bucket_size(fs_opts, devs));
|
||||
|
||||
dev->nbuckets = dev->fs_size / dev->opts.bucket_size;
|
||||
|
||||
bch2_check_bucket_size(fs_opts, dev);
|
||||
|
||||
|
@ -78,13 +78,13 @@ u64 bch2_pick_bucket_size(struct bch_opts opts, dev_opts_list devs)
|
||||
|
||||
u64 min_dev_size = BCH_MIN_NR_NBUCKETS * bucket_size;
|
||||
darray_for_each(devs, i)
|
||||
if (i->opts.fs_size < min_dev_size)
|
||||
if (i->fs_size < min_dev_size)
|
||||
die("cannot format %s, too small (%llu bytes, min %llu)",
|
||||
i->path, i->opts.fs_size, min_dev_size);
|
||||
i->path, i->fs_size, min_dev_size);
|
||||
|
||||
u64 total_fs_size = 0;
|
||||
darray_for_each(devs, i)
|
||||
total_fs_size += i->opts.fs_size;
|
||||
total_fs_size += i->fs_size;
|
||||
|
||||
struct sysinfo info;
|
||||
si_meminfo(&info);
|
||||
@ -181,8 +181,8 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
|
||||
/* get device size, if it wasn't specified: */
|
||||
darray_for_each(devs, i)
|
||||
if (!opt_defined(i->opts, fs_size))
|
||||
opt_set(i->opts, fs_size, get_size(i->bdev->bd_fd));
|
||||
if (!i->fs_size)
|
||||
i->fs_size = get_size(i->bdev->bd_fd);
|
||||
|
||||
/* calculate bucket sizes: */
|
||||
u64 fs_bucket_size = bch2_pick_bucket_size(fs_opts, devs);
|
||||
@ -190,10 +190,10 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
darray_for_each(devs, i)
|
||||
if (!opt_defined(i->opts, bucket_size))
|
||||
opt_set(i->opts, bucket_size,
|
||||
min(fs_bucket_size, dev_max_bucket_size(i->opts.fs_size)));
|
||||
min(fs_bucket_size, dev_max_bucket_size(i->fs_size)));
|
||||
|
||||
darray_for_each(devs, i) {
|
||||
i->nbuckets = i->opts.fs_size / i->opts.bucket_size;
|
||||
i->nbuckets = i->fs_size / i->opts.bucket_size;
|
||||
bch2_check_bucket_size(fs_opts, i);
|
||||
}
|
||||
|
||||
@ -292,7 +292,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
|
||||
bch2_sb_members_cpy_v2_v1(&sb);
|
||||
|
||||
darray_for_each(devs, i) {
|
||||
u64 size_sectors = i->opts.fs_size >> 9;
|
||||
u64 size_sectors = i->fs_size >> 9;
|
||||
|
||||
sb.sb->dev_idx = i - devs.data;
|
||||
|
||||
|
@ -66,6 +66,7 @@ struct dev_opts {
|
||||
u64 sb_end;
|
||||
|
||||
u64 nbuckets;
|
||||
u64 fs_size;
|
||||
|
||||
const char *label; /* make this a bch_opt */
|
||||
|
||||
|
@ -6,8 +6,8 @@
|
||||
#include "posix_to_bcachefs.h"
|
||||
#include "libbcachefs/alloc_foreground.h"
|
||||
#include "libbcachefs/buckets.h"
|
||||
#include "libbcachefs/fs-common.h"
|
||||
#include "libbcachefs/io_write.h"
|
||||
#include "libbcachefs/namei.h"
|
||||
#include "libbcachefs/str_hash.h"
|
||||
#include "libbcachefs/xattr.h"
|
||||
|
||||
|
@ -67,6 +67,7 @@
|
||||
#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
|
||||
#define fallthrough __attribute__((__fallthrough__))
|
||||
#define __noreturn __attribute__((__noreturn__))
|
||||
#define __no_kmsan_checks
|
||||
|
||||
#ifndef __counted_by
|
||||
#define __counted_by(nr)
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <linux/byteorder.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/kmsan-checks.h>
|
||||
#include <linux/math.h>
|
||||
#include <linux/minmax.h>
|
||||
|
||||
|
98
include/linux/kmsan-checks.h
Normal file
98
include/linux/kmsan-checks.h
Normal file
@ -0,0 +1,98 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* KMSAN checks to be used for one-off annotations in subsystems.
|
||||
*
|
||||
* Copyright (C) 2017-2022 Google LLC
|
||||
* Author: Alexander Potapenko <glider@google.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_KMSAN_CHECKS_H
|
||||
#define _LINUX_KMSAN_CHECKS_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef CONFIG_KMSAN
|
||||
|
||||
/**
|
||||
* kmsan_poison_memory() - Mark the memory range as uninitialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to poison.
|
||||
* @flags: GFP flags for allocations done by this function.
|
||||
*
|
||||
* Until other data is written to this range, KMSAN will treat it as
|
||||
* uninitialized. Error reports for this memory will reference the call site of
|
||||
* kmsan_poison_memory() as origin.
|
||||
*/
|
||||
void kmsan_poison_memory(const void *address, size_t size, gfp_t flags);
|
||||
|
||||
/**
|
||||
* kmsan_unpoison_memory() - Mark the memory range as initialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to unpoison.
|
||||
*
|
||||
* Until other data is written to this range, KMSAN will treat it as
|
||||
* initialized.
|
||||
*/
|
||||
void kmsan_unpoison_memory(const void *address, size_t size);
|
||||
|
||||
/**
|
||||
* kmsan_check_memory() - Check the memory range for being initialized.
|
||||
* @address: address to start with.
|
||||
* @size: size of buffer to check.
|
||||
*
|
||||
* If any piece of the given range is marked as uninitialized, KMSAN will report
|
||||
* an error.
|
||||
*/
|
||||
void kmsan_check_memory(const void *address, size_t size);
|
||||
|
||||
/**
|
||||
* kmsan_copy_to_user() - Notify KMSAN about a data transfer to userspace.
|
||||
* @to: destination address in the userspace.
|
||||
* @from: source address in the kernel.
|
||||
* @to_copy: number of bytes to copy.
|
||||
* @left: number of bytes not copied.
|
||||
*
|
||||
* If this is a real userspace data transfer, KMSAN checks the bytes that were
|
||||
* actually copied to ensure there was no information leak. If @to belongs to
|
||||
* the kernel space (which is possible for compat syscalls), KMSAN just copies
|
||||
* the metadata.
|
||||
*/
|
||||
void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
|
||||
size_t left);
|
||||
|
||||
/**
|
||||
* kmsan_memmove() - Notify KMSAN about a data copy within kernel.
|
||||
* @to: destination address in the kernel.
|
||||
* @from: source address in the kernel.
|
||||
* @size: number of bytes to copy.
|
||||
*
|
||||
* Invoked after non-instrumented version (e.g. implemented using assembly
|
||||
* code) of memmove()/memcpy() is called, in order to copy KMSAN's metadata.
|
||||
*/
|
||||
void kmsan_memmove(void *to, const void *from, size_t to_copy);
|
||||
|
||||
#else
|
||||
|
||||
static inline void kmsan_poison_memory(const void *address, size_t size,
|
||||
gfp_t flags)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_unpoison_memory(const void *address, size_t size)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_check_memory(const void *address, size_t size)
|
||||
{
|
||||
}
|
||||
static inline void kmsan_copy_to_user(void __user *to, const void *from,
|
||||
size_t to_copy, size_t left)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void kmsan_memmove(void *to, const void *from, size_t to_copy)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_KMSAN_CHECKS_H */
|
@ -777,14 +777,12 @@ static inline int bch2_dev_data_type_accounting_mod(struct btree_trans *trans, s
|
||||
s64 delta_sectors,
|
||||
s64 delta_fragmented, unsigned flags)
|
||||
{
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_dev_data_type,
|
||||
.dev_data_type.dev = ca->dev_idx,
|
||||
.dev_data_type.data_type = data_type,
|
||||
};
|
||||
s64 d[3] = { delta_buckets, delta_sectors, delta_fragmented };
|
||||
|
||||
return bch2_disk_accounting_mod(trans, &acc, d, 3, flags & BTREE_TRIGGER_gc);
|
||||
return bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc,
|
||||
d, dev_data_type,
|
||||
.dev = ca->dev_idx,
|
||||
.data_type = data_type);
|
||||
}
|
||||
|
||||
int bch2_alloc_key_to_dev_counters(struct btree_trans *trans, struct bch_dev *ca,
|
||||
@ -837,7 +835,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
|
||||
struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p);
|
||||
if (!ca)
|
||||
return -EIO;
|
||||
return -BCH_ERR_trigger_alloc;
|
||||
|
||||
struct bch_alloc_v4 old_a_convert;
|
||||
const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);
|
||||
@ -1031,7 +1029,7 @@ fsck_err:
|
||||
invalid_bucket:
|
||||
bch2_fs_inconsistent(c, "reference to invalid bucket\n %s",
|
||||
(bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf));
|
||||
ret = -EIO;
|
||||
ret = -BCH_ERR_trigger_alloc;
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -127,14 +127,14 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
|
||||
void bch2_open_bucket_write_error(struct bch_fs *c,
|
||||
struct open_buckets *obs,
|
||||
unsigned dev)
|
||||
unsigned dev, int err)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
unsigned i;
|
||||
|
||||
open_bucket_for_each(c, obs, ob, i)
|
||||
if (ob->dev == dev && ob->ec)
|
||||
bch2_ec_bucket_cancel(c, ob);
|
||||
bch2_ec_bucket_cancel(c, ob, err);
|
||||
}
|
||||
|
||||
static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
|
||||
@ -631,7 +631,7 @@ static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca,
|
||||
struct bch_dev_usage *usage)
|
||||
{
|
||||
u64 *v = stripe->next_alloc + ca->dev_idx;
|
||||
u64 free_space = dev_buckets_available(ca, BCH_WATERMARK_normal);
|
||||
u64 free_space = __dev_buckets_available(ca, *usage, BCH_WATERMARK_normal);
|
||||
u64 free_space_inv = free_space
|
||||
? div64_u64(1ULL << 48, free_space)
|
||||
: 1ULL << 48;
|
||||
|
@ -82,7 +82,7 @@ static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
|
||||
}
|
||||
|
||||
void bch2_open_bucket_write_error(struct bch_fs *,
|
||||
struct open_buckets *, unsigned);
|
||||
struct open_buckets *, unsigned, int);
|
||||
|
||||
void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
|
||||
|
||||
|
@ -50,6 +50,8 @@ void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bke
|
||||
}
|
||||
|
||||
bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level);
|
||||
prt_str(out, " data_type=");
|
||||
bch2_prt_data_type(out, bp.v->data_type);
|
||||
prt_printf(out, " suboffset=%u len=%u gen=%u pos=",
|
||||
(u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
|
||||
bp.v->bucket_len,
|
||||
@ -782,7 +784,7 @@ enum alloc_sector_counter {
|
||||
ALLOC_SECTORS_NR
|
||||
};
|
||||
|
||||
static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t)
|
||||
static int data_type_to_alloc_counter(enum bch_data_type t)
|
||||
{
|
||||
switch (t) {
|
||||
case BCH_DATA_btree:
|
||||
@ -791,9 +793,10 @@ static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t
|
||||
case BCH_DATA_cached:
|
||||
return ALLOC_cached;
|
||||
case BCH_DATA_stripe:
|
||||
case BCH_DATA_parity:
|
||||
return ALLOC_stripe;
|
||||
default:
|
||||
BUG();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -844,7 +847,11 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
if (bp.v->bucket_gen != a->gen)
|
||||
continue;
|
||||
|
||||
sectors[data_type_to_alloc_counter(bp.v->data_type)] += bp.v->bucket_len;
|
||||
int alloc_counter = data_type_to_alloc_counter(bp.v->data_type);
|
||||
if (alloc_counter < 0)
|
||||
continue;
|
||||
|
||||
sectors[alloc_counter] += bp.v->bucket_len;
|
||||
};
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
if (ret)
|
||||
|
@ -191,6 +191,7 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
|
||||
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
|
||||
{
|
||||
return bpos_eq(l.k->p, r.k->p) &&
|
||||
l.k->size == r.k->size &&
|
||||
bkey_bytes(l.k) == bkey_bytes(r.k) &&
|
||||
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
|
||||
}
|
||||
|
@ -2117,8 +2117,14 @@ out:
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c,
|
||||
"writing btree node: %s", bch2_err_str(ret));
|
||||
|
||||
if (!bch2_err_matches(ret, EROFS)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret));
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch2_fs_fatal_error(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2135,10 +2141,14 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_write,
|
||||
wbio->submit_time, !bio->bi_status);
|
||||
|
||||
if (ca && bio->bi_status)
|
||||
bch_err_dev_ratelimited(ca,
|
||||
"btree write error: %s",
|
||||
bch2_blk_status_to_str(bio->bi_status));
|
||||
if (ca && bio->bi_status) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_printf(&buf, "btree write error: %s\n ",
|
||||
bch2_blk_status_to_str(bio->bi_status));
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch_err_dev_ratelimited(ca, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
if (bio->bi_status) {
|
||||
unsigned long flags;
|
||||
|
@ -335,13 +335,20 @@ static inline void bch2_trans_verify_not_unlocked_or_in_restart(struct btree_tra
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip)
|
||||
static int btree_trans_restart_foreign_task(struct btree_trans *trans, int err, unsigned long ip)
|
||||
{
|
||||
BUG_ON(err <= 0);
|
||||
BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart));
|
||||
|
||||
trans->restarted = err;
|
||||
trans->last_restarted_ip = ip;
|
||||
return -err;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip)
|
||||
{
|
||||
btree_trans_restart_foreign_task(trans, err, ip);
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
darray_exit(&trans->last_restarted_trace);
|
||||
bch2_save_backtrace(&trans->last_restarted_trace, current, 0, GFP_NOWAIT);
|
||||
|
@ -91,10 +91,10 @@ static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
for (i = g->g; i != g->g + g->nr; i++) {
|
||||
struct task_struct *task = i->trans->locking_wait.task;
|
||||
struct task_struct *task = READ_ONCE(i->trans->locking_wait.task);
|
||||
if (i != g->g)
|
||||
prt_str(out, "<- ");
|
||||
prt_printf(out, "%u ", task ?task->pid : 0);
|
||||
prt_printf(out, "%u ", task ? task->pid : 0);
|
||||
}
|
||||
prt_newline(out);
|
||||
}
|
||||
@ -172,7 +172,9 @@ static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
|
||||
{
|
||||
if (i == g->g) {
|
||||
trace_would_deadlock(g, i->trans);
|
||||
return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
|
||||
return btree_trans_restart_foreign_task(i->trans,
|
||||
BCH_ERR_transaction_restart_would_deadlock,
|
||||
_THIS_IP_);
|
||||
} else {
|
||||
i->trans->lock_must_abort = true;
|
||||
wake_up_process(i->trans->locking_wait.task);
|
||||
|
@ -164,6 +164,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
|
||||
EBUG_ON(bpos_gt(insert->k.p, b->data->max_key));
|
||||
EBUG_ON(insert->k.u64s > bch2_btree_keys_u64s_remaining(b));
|
||||
EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos));
|
||||
kmsan_check_memory(insert, bkey_bytes(&insert->k));
|
||||
|
||||
k = bch2_btree_node_iter_peek_all(node_iter, b);
|
||||
if (k && bkey_cmp_left_packed(b, k, &insert->k.p))
|
||||
|
@ -512,6 +512,8 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
|
||||
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_i *k, enum btree_iter_update_trigger_flags flags)
|
||||
{
|
||||
kmsan_check_memory(k, bkey_bytes(&k->k));
|
||||
|
||||
btree_path_idx_t path_idx = iter->update_path ?: iter->path;
|
||||
int ret;
|
||||
|
||||
|
@ -133,6 +133,8 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr
|
||||
enum btree_id btree,
|
||||
struct bkey_i *k)
|
||||
{
|
||||
kmsan_check_memory(k, bkey_bytes(&k->k));
|
||||
|
||||
if (unlikely(!btree_type_uses_write_buffer(btree))) {
|
||||
int ret = bch2_btree_write_buffer_insert_err(trans, btree, k);
|
||||
dump_stack();
|
||||
|
@ -649,6 +649,14 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If the node has been reused, we might be reading uninitialized memory - that's fine: */
|
||||
static noinline __no_kmsan_checks bool btree_node_seq_matches(struct btree *b, __le64 seq)
|
||||
{
|
||||
struct btree_node *b_data = READ_ONCE(b->data);
|
||||
|
||||
return (b_data ? b_data->keys.seq : 0) == seq;
|
||||
}
|
||||
|
||||
static void btree_update_nodes_written(struct btree_update *as)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
@ -677,17 +685,9 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
* on disk:
|
||||
*/
|
||||
for (i = 0; i < as->nr_old_nodes; i++) {
|
||||
__le64 seq;
|
||||
|
||||
b = as->old_nodes[i];
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
|
||||
seq = b->data ? b->data->keys.seq : 0;
|
||||
six_unlock_read(&b->c.lock);
|
||||
bch2_trans_unlock_long(trans);
|
||||
|
||||
if (seq == as->old_nodes_seq[i])
|
||||
if (btree_node_seq_matches(b, as->old_nodes_seq[i]))
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
@ -724,9 +724,7 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
.replicas.nr_required = 1,
|
||||
};
|
||||
|
||||
struct disk_accounting_pos acct_compression_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_compression,
|
||||
};
|
||||
unsigned cur_compression_type = 0;
|
||||
u64 compression_acct[3] = { 1, 0, 0 };
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
@ -760,13 +758,13 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
acc_replicas_key.replicas.nr_required = 0;
|
||||
}
|
||||
|
||||
if (acct_compression_key.compression.type &&
|
||||
acct_compression_key.compression.type != p.crc.compression_type) {
|
||||
if (cur_compression_type &&
|
||||
cur_compression_type != p.crc.compression_type) {
|
||||
if (flags & BTREE_TRIGGER_overwrite)
|
||||
bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
|
||||
|
||||
ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct,
|
||||
ARRAY_SIZE(compression_acct), gc);
|
||||
ret = bch2_disk_accounting_mod2(trans, gc, compression_acct,
|
||||
compression, cur_compression_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -775,7 +773,7 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
compression_acct[2] = 0;
|
||||
}
|
||||
|
||||
acct_compression_key.compression.type = p.crc.compression_type;
|
||||
cur_compression_type = p.crc.compression_type;
|
||||
if (p.crc.compression_type) {
|
||||
compression_acct[1] += p.crc.uncompressed_size;
|
||||
compression_acct[2] += p.crc.compressed_size;
|
||||
@ -789,45 +787,34 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (acc_replicas_key.replicas.nr_devs && !level && k.k->p.snapshot) {
|
||||
struct disk_accounting_pos acc_snapshot_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_snapshot,
|
||||
.snapshot.id = k.k->p.snapshot,
|
||||
};
|
||||
ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc);
|
||||
ret = bch2_disk_accounting_mod2_nr(trans, gc, replicas_sectors, 1, snapshot, k.k->p.snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (acct_compression_key.compression.type) {
|
||||
if (cur_compression_type) {
|
||||
if (flags & BTREE_TRIGGER_overwrite)
|
||||
bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
|
||||
|
||||
ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct,
|
||||
ARRAY_SIZE(compression_acct), gc);
|
||||
ret = bch2_disk_accounting_mod2(trans, gc, compression_acct,
|
||||
compression, cur_compression_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (level) {
|
||||
struct disk_accounting_pos acc_btree_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_btree,
|
||||
.btree.id = btree_id,
|
||||
};
|
||||
ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc);
|
||||
ret = bch2_disk_accounting_mod2_nr(trans, gc, replicas_sectors, 1, btree, btree_id);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
bool insert = !(flags & BTREE_TRIGGER_overwrite);
|
||||
struct disk_accounting_pos acc_inum_key = {
|
||||
.type = BCH_DISK_ACCOUNTING_inum,
|
||||
.inum.inum = k.k->p.inode,
|
||||
};
|
||||
|
||||
s64 v[3] = {
|
||||
insert ? 1 : -1,
|
||||
insert ? k.k->size : -((s64) k.k->size),
|
||||
*replicas_sectors,
|
||||
};
|
||||
ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc);
|
||||
ret = bch2_disk_accounting_mod2(trans, gc, v, inum, k.k->p.inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -876,15 +863,15 @@ int bch2_trigger_extent(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
int need_rebalance_delta = 0;
|
||||
s64 need_rebalance_sectors_delta = 0;
|
||||
s64 need_rebalance_sectors_delta[1] = { 0 };
|
||||
|
||||
s64 s = bch2_bkey_sectors_need_rebalance(c, old);
|
||||
need_rebalance_delta -= s != 0;
|
||||
need_rebalance_sectors_delta -= s;
|
||||
need_rebalance_sectors_delta[0] -= s;
|
||||
|
||||
s = bch2_bkey_sectors_need_rebalance(c, new.s_c);
|
||||
need_rebalance_delta += s != 0;
|
||||
need_rebalance_sectors_delta += s;
|
||||
need_rebalance_sectors_delta[0] += s;
|
||||
|
||||
if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) {
|
||||
int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
|
||||
@ -893,12 +880,9 @@ int bch2_trigger_extent(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (need_rebalance_sectors_delta) {
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_rebalance_work,
|
||||
};
|
||||
int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1,
|
||||
flags & BTREE_TRIGGER_gc);
|
||||
if (need_rebalance_sectors_delta[0]) {
|
||||
int ret = bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc,
|
||||
need_rebalance_sectors_delta, rebalance_work);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -914,17 +898,13 @@ static int __trigger_reservation(struct btree_trans *trans,
|
||||
enum btree_iter_update_trigger_flags flags)
|
||||
{
|
||||
if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
|
||||
s64 sectors = k.k->size;
|
||||
s64 sectors[1] = { k.k->size };
|
||||
|
||||
if (flags & BTREE_TRIGGER_overwrite)
|
||||
sectors = -sectors;
|
||||
sectors[0] = -sectors[0];
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_persistent_reserved,
|
||||
.persistent_reserved.nr_replicas = bkey_s_c_to_reservation(k).v->nr_replicas,
|
||||
};
|
||||
|
||||
return bch2_disk_accounting_mod(trans, &acc, §ors, 1, flags & BTREE_TRIGGER_gc);
|
||||
return bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, sectors,
|
||||
persistent_reserved, bkey_s_c_to_reservation(k).v->nr_replicas);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -466,7 +466,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
|
||||
prt_str(&buf, ")");
|
||||
WARN_RATELIMIT(1, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EIO;
|
||||
return -BCH_ERR_recompute_checksum;
|
||||
}
|
||||
|
||||
for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
|
||||
|
@ -177,7 +177,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
size_t src_len = src->bi_iter.bi_size;
|
||||
size_t dst_len = crc.uncompressed_size << 9;
|
||||
void *workspace;
|
||||
int ret;
|
||||
int ret = 0, ret2;
|
||||
|
||||
enum bch_compression_opts opt = bch2_compression_type_to_opt(crc.compression_type);
|
||||
mempool_t *workspace_pool = &c->compress_workspace[opt];
|
||||
@ -189,7 +189,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
else
|
||||
ret = -BCH_ERR_compression_workspace_not_initialized;
|
||||
if (ret)
|
||||
goto out;
|
||||
goto err;
|
||||
}
|
||||
|
||||
src_data = bio_map_or_bounce(c, src, READ);
|
||||
@ -197,10 +197,10 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
switch (crc.compression_type) {
|
||||
case BCH_COMPRESSION_TYPE_lz4_old:
|
||||
case BCH_COMPRESSION_TYPE_lz4:
|
||||
ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
|
||||
src_len, dst_len, dst_len);
|
||||
if (ret != dst_len)
|
||||
goto err;
|
||||
ret2 = LZ4_decompress_safe_partial(src_data.b, dst_data,
|
||||
src_len, dst_len, dst_len);
|
||||
if (ret2 != dst_len)
|
||||
ret = -BCH_ERR_decompress_lz4;
|
||||
break;
|
||||
case BCH_COMPRESSION_TYPE_gzip: {
|
||||
z_stream strm = {
|
||||
@ -214,45 +214,43 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
|
||||
zlib_set_workspace(&strm, workspace);
|
||||
zlib_inflateInit2(&strm, -MAX_WBITS);
|
||||
ret = zlib_inflate(&strm, Z_FINISH);
|
||||
ret2 = zlib_inflate(&strm, Z_FINISH);
|
||||
|
||||
mempool_free(workspace, workspace_pool);
|
||||
|
||||
if (ret != Z_STREAM_END)
|
||||
goto err;
|
||||
if (ret2 != Z_STREAM_END)
|
||||
ret = -BCH_ERR_decompress_gzip;
|
||||
break;
|
||||
}
|
||||
case BCH_COMPRESSION_TYPE_zstd: {
|
||||
ZSTD_DCtx *ctx;
|
||||
size_t real_src_len = le32_to_cpup(src_data.b);
|
||||
|
||||
if (real_src_len > src_len - 4)
|
||||
if (real_src_len > src_len - 4) {
|
||||
ret = -BCH_ERR_decompress_zstd_src_len_bad;
|
||||
goto err;
|
||||
}
|
||||
|
||||
workspace = mempool_alloc(workspace_pool, GFP_NOFS);
|
||||
ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
|
||||
|
||||
ret = zstd_decompress_dctx(ctx,
|
||||
ret2 = zstd_decompress_dctx(ctx,
|
||||
dst_data, dst_len,
|
||||
src_data.b + 4, real_src_len);
|
||||
|
||||
mempool_free(workspace, workspace_pool);
|
||||
|
||||
if (ret != dst_len)
|
||||
goto err;
|
||||
if (ret2 != dst_len)
|
||||
ret = -BCH_ERR_decompress_zstd;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
ret = 0;
|
||||
err:
|
||||
fsck_err:
|
||||
out:
|
||||
bio_unmap_or_unbounce(c, src_data);
|
||||
return ret;
|
||||
err:
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
int bch2_bio_uncompress_inplace(struct bch_write_op *op,
|
||||
@ -268,27 +266,22 @@ int bch2_bio_uncompress_inplace(struct bch_write_op *op,
|
||||
BUG_ON(!bio->bi_vcnt);
|
||||
BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
|
||||
|
||||
if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
|
||||
crc->compressed_size << 9 > c->opts.encoded_extent_max) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"extent too big to decompress");
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EIO;
|
||||
if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max) {
|
||||
bch2_write_op_error(op, op->pos.offset,
|
||||
"extent too big to decompress (%u > %u)",
|
||||
crc->uncompressed_size << 9, c->opts.encoded_extent_max);
|
||||
return -BCH_ERR_decompress_exceeded_max_encoded_extent;
|
||||
}
|
||||
|
||||
data = __bounce_alloc(c, dst_len, WRITE);
|
||||
|
||||
if (__bio_uncompress(c, bio, data.b, *crc)) {
|
||||
if (!c->opts.no_data_io) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"decompression error");
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
ret = -EIO;
|
||||
ret = __bio_uncompress(c, bio, data.b, *crc);
|
||||
|
||||
if (c->opts.no_data_io)
|
||||
ret = 0;
|
||||
|
||||
if (ret) {
|
||||
bch2_write_op_error(op, op->pos.offset, "%s", bch2_err_str(ret));
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -321,7 +314,7 @@ int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
|
||||
if (crc.uncompressed_size << 9 > c->opts.encoded_extent_max ||
|
||||
crc.compressed_size << 9 > c->opts.encoded_extent_max)
|
||||
return -EIO;
|
||||
return -BCH_ERR_decompress_exceeded_max_encoded_extent;
|
||||
|
||||
dst_data = dst_len == dst_iter.bi_size
|
||||
? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
|
||||
|
@ -354,7 +354,7 @@ restart_drop_extra_replicas:
|
||||
printbuf_exit(&buf);
|
||||
|
||||
bch2_fatal_error(c);
|
||||
ret = -EIO;
|
||||
ret = -BCH_ERR_invalid_bkey;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -729,3 +729,54 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
|
||||
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
|
||||
/* fsck */
|
||||
|
||||
static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr),
|
||||
BTREE_ITER_all_snapshots, k, ret) {
|
||||
if (k.k->p.offset != inode_nr)
|
||||
break;
|
||||
if (!bkey_is_inode(k.k))
|
||||
continue;
|
||||
ret = bch2_inode_unpack(k, inode);
|
||||
goto found;
|
||||
}
|
||||
ret = -BCH_ERR_ENOENT_inode;
|
||||
found:
|
||||
bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_fsck_remove_dirent(struct btree_trans *trans, struct bpos pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bch_inode_unpacked dir_inode;
|
||||
struct bch_hash_info dir_hash_info;
|
||||
int ret;
|
||||
|
||||
ret = lookup_first_inode(trans, pos.inode, &dir_inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
dir_hash_info = bch2_hash_info_init(c, &dir_inode);
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent);
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
|
||||
&dir_hash_info, &iter,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
err:
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
@ -82,4 +82,6 @@ int bch2_empty_dir_snapshot(struct btree_trans *, u64, u32, u32);
|
||||
int bch2_empty_dir_trans(struct btree_trans *, subvol_inum);
|
||||
int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *);
|
||||
|
||||
int bch2_fsck_remove_dirent(struct btree_trans *, struct bpos);
|
||||
|
||||
#endif /* _BCACHEFS_DIRENT_H */
|
||||
|
@ -135,6 +135,12 @@ static inline bool is_zero(char *start, char *end)
|
||||
|
||||
#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member))
|
||||
|
||||
static const unsigned bch2_accounting_type_nr_counters[] = {
|
||||
#define x(f, id, nr) [BCH_DISK_ACCOUNTING_##f] = nr,
|
||||
BCH_DISK_ACCOUNTING_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
@ -193,6 +199,11 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)),
|
||||
c, accounting_key_junk_at_end,
|
||||
"junk at end of accounting key");
|
||||
|
||||
bkey_fsck_err_on(bch2_accounting_counters(k.k) != bch2_accounting_type_nr_counters[acc_k.type],
|
||||
c, accounting_key_nr_counters_wrong,
|
||||
"accounting key with %u counters, should be %u",
|
||||
bch2_accounting_counters(k.k), bch2_accounting_type_nr_counters[acc_k.type]);
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
@ -33,10 +33,12 @@ static inline bool bch2_accounting_key_is_zero(struct bkey_s_c_accounting a)
|
||||
static inline void bch2_accounting_accumulate(struct bkey_i_accounting *dst,
|
||||
struct bkey_s_c_accounting src)
|
||||
{
|
||||
EBUG_ON(dst->k.u64s != src.k->u64s);
|
||||
|
||||
for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++)
|
||||
for (unsigned i = 0;
|
||||
i < min(bch2_accounting_counters(&dst->k),
|
||||
bch2_accounting_counters(src.k));
|
||||
i++)
|
||||
dst->v.d[i] += src.v->d[i];
|
||||
|
||||
if (bversion_cmp(dst->k.bversion, src.k->bversion) < 0)
|
||||
dst->k.bversion = src.k->bversion;
|
||||
}
|
||||
@ -85,6 +87,24 @@ static inline struct bpos disk_accounting_pos_to_bpos(struct disk_accounting_pos
|
||||
|
||||
int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *,
|
||||
s64 *, unsigned, bool);
|
||||
|
||||
#define disk_accounting_key_init(_k, _type, ...) \
|
||||
do { \
|
||||
memset(&(_k), 0, sizeof(_k)); \
|
||||
(_k).type = BCH_DISK_ACCOUNTING_##_type; \
|
||||
(_k)._type = (struct bch_acct_##_type) { __VA_ARGS__ }; \
|
||||
} while (0)
|
||||
|
||||
#define bch2_disk_accounting_mod2_nr(_trans, _gc, _v, _nr, ...) \
|
||||
({ \
|
||||
struct disk_accounting_pos pos; \
|
||||
disk_accounting_key_init(pos, __VA_ARGS__); \
|
||||
bch2_disk_accounting_mod(trans, &pos, _v, _nr, _gc); \
|
||||
})
|
||||
|
||||
#define bch2_disk_accounting_mod2(_trans, _gc, _v, ...) \
|
||||
bch2_disk_accounting_mod2_nr(_trans, _gc, _v, ARRAY_SIZE(_v), __VA_ARGS__)
|
||||
|
||||
int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool);
|
||||
|
||||
int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c,
|
||||
|
@ -95,40 +95,81 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* field 1: name
|
||||
* field 2: id
|
||||
* field 3: number of counters (max 3)
|
||||
*/
|
||||
|
||||
#define BCH_DISK_ACCOUNTING_TYPES() \
|
||||
x(nr_inodes, 0) \
|
||||
x(persistent_reserved, 1) \
|
||||
x(replicas, 2) \
|
||||
x(dev_data_type, 3) \
|
||||
x(compression, 4) \
|
||||
x(snapshot, 5) \
|
||||
x(btree, 6) \
|
||||
x(rebalance_work, 7) \
|
||||
x(inum, 8)
|
||||
x(nr_inodes, 0, 1) \
|
||||
x(persistent_reserved, 1, 1) \
|
||||
x(replicas, 2, 1) \
|
||||
x(dev_data_type, 3, 3) \
|
||||
x(compression, 4, 3) \
|
||||
x(snapshot, 5, 1) \
|
||||
x(btree, 6, 1) \
|
||||
x(rebalance_work, 7, 1) \
|
||||
x(inum, 8, 3)
|
||||
|
||||
enum disk_accounting_type {
|
||||
#define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr,
|
||||
#define x(f, nr, ...) BCH_DISK_ACCOUNTING_##f = nr,
|
||||
BCH_DISK_ACCOUNTING_TYPES()
|
||||
#undef x
|
||||
BCH_DISK_ACCOUNTING_TYPE_NR,
|
||||
};
|
||||
|
||||
struct bch_nr_inodes {
|
||||
/*
|
||||
* No subtypes - number of inodes in the entire filesystem
|
||||
*
|
||||
* XXX: perhaps we could add a per-subvolume counter?
|
||||
*/
|
||||
struct bch_acct_nr_inodes {
|
||||
};
|
||||
|
||||
struct bch_persistent_reserved {
|
||||
/*
|
||||
* Tracks KEY_TYPE_reservation sectors, broken out by number of replicas for the
|
||||
* reservation:
|
||||
*/
|
||||
struct bch_acct_persistent_reserved {
|
||||
__u8 nr_replicas;
|
||||
};
|
||||
|
||||
struct bch_dev_data_type {
|
||||
/*
|
||||
* device, data type counter fields:
|
||||
* [
|
||||
* nr_buckets
|
||||
* live sectors (in buckets of that data type)
|
||||
* sectors of internal fragmentation
|
||||
* ]
|
||||
*
|
||||
* XXX: live sectors should've been done differently, you can have multiple data
|
||||
* types in the same bucket (user, stripe, cached) and this collapses them to
|
||||
* the bucket data type, and makes the internal fragmentation counter redundant
|
||||
*/
|
||||
struct bch_acct_dev_data_type {
|
||||
__u8 dev;
|
||||
__u8 data_type;
|
||||
};
|
||||
|
||||
/*
|
||||
* Compression type fields:
|
||||
* [
|
||||
* number of extents
|
||||
* uncompressed size
|
||||
* compressed size
|
||||
* ]
|
||||
*
|
||||
* Compression ratio, average extent size (fragmentation).
|
||||
*/
|
||||
struct bch_acct_compression {
|
||||
__u8 type;
|
||||
};
|
||||
|
||||
/*
|
||||
* On disk usage by snapshot id; counts same values as replicas counter, but
|
||||
* aggregated differently
|
||||
*/
|
||||
struct bch_acct_snapshot {
|
||||
__u32 id;
|
||||
} __packed;
|
||||
@ -137,10 +178,27 @@ struct bch_acct_btree {
|
||||
__u32 id;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* inum counter fields:
|
||||
* [
|
||||
* number of extents
|
||||
* sum of extent sizes - bkey size
|
||||
* this field is similar to inode.bi_sectors, except here extents in
|
||||
* different snapshots but the same inode number are all collapsed to the
|
||||
* same counter
|
||||
* sum of on disk size - same values tracked by replicas counters
|
||||
* ]
|
||||
*
|
||||
* This tracks on disk fragmentation.
|
||||
*/
|
||||
struct bch_acct_inum {
|
||||
__u64 inum;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Simple counter of the amount of data (on disk sectors) rebalance needs to
|
||||
* move, extents counted here are also in the rebalance_work btree.
|
||||
*/
|
||||
struct bch_acct_rebalance_work {
|
||||
};
|
||||
|
||||
@ -149,10 +207,10 @@ struct disk_accounting_pos {
|
||||
struct {
|
||||
__u8 type;
|
||||
union {
|
||||
struct bch_nr_inodes nr_inodes;
|
||||
struct bch_persistent_reserved persistent_reserved;
|
||||
struct bch_acct_nr_inodes nr_inodes;
|
||||
struct bch_acct_persistent_reserved persistent_reserved;
|
||||
struct bch_replicas_entry_v1 replicas;
|
||||
struct bch_dev_data_type dev_data_type;
|
||||
struct bch_acct_dev_data_type dev_data_type;
|
||||
struct bch_acct_compression compression;
|
||||
struct bch_acct_snapshot snapshot;
|
||||
struct bch_acct_btree btree;
|
||||
|
@ -1124,7 +1124,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
|
||||
|
||||
bch2_fs_inconsistent(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EIO;
|
||||
return -BCH_ERR_erasure_coding_found_btree_node;
|
||||
}
|
||||
|
||||
k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed);
|
||||
@ -1190,7 +1190,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
|
||||
|
||||
struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev);
|
||||
if (!ca)
|
||||
return -EIO;
|
||||
return -BCH_ERR_ENOENT_dev_not_found;
|
||||
|
||||
struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr);
|
||||
|
||||
@ -1227,21 +1227,19 @@ static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
|
||||
unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
|
||||
int ret = 0;
|
||||
unsigned nr_data = v->nr_blocks - v->nr_redundant;
|
||||
|
||||
ret = bch2_btree_write_buffer_flush_sync(trans);
|
||||
int ret = bch2_btree_write_buffer_flush_sync(trans);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < nr_data; i++) {
|
||||
for (unsigned i = 0; i < nr_data; i++) {
|
||||
ret = ec_stripe_update_bucket(trans, s, i);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1451,11 +1449,11 @@ static void ec_stripe_new_cancel(struct bch_fs *c, struct ec_stripe_head *h, int
|
||||
ec_stripe_new_set_pending(c, h);
|
||||
}
|
||||
|
||||
void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
|
||||
void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob, int err)
|
||||
{
|
||||
struct ec_stripe_new *s = ob->ec;
|
||||
|
||||
s->err = -EIO;
|
||||
s->err = err;
|
||||
}
|
||||
|
||||
void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
|
||||
|
@ -249,7 +249,7 @@ int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *, struct bkey
|
||||
|
||||
void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
|
||||
|
||||
void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
|
||||
void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *, int);
|
||||
|
||||
int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
|
||||
|
||||
|
@ -116,6 +116,7 @@
|
||||
x(ENOENT, ENOENT_snapshot_tree) \
|
||||
x(ENOENT, ENOENT_dirent_doesnt_match_inode) \
|
||||
x(ENOENT, ENOENT_dev_not_found) \
|
||||
x(ENOENT, ENOENT_dev_bucket_not_found) \
|
||||
x(ENOENT, ENOENT_dev_idx_not_found) \
|
||||
x(ENOENT, ENOENT_inode_no_backpointer) \
|
||||
x(ENOENT, ENOENT_no_snapshot_tree_subvol) \
|
||||
@ -207,6 +208,7 @@
|
||||
x(EINVAL, no_resize_with_buckets_nouse) \
|
||||
x(EINVAL, inode_unpack_error) \
|
||||
x(EINVAL, varint_decode_error) \
|
||||
x(EINVAL, erasure_coding_found_btree_node) \
|
||||
x(EOPNOTSUPP, may_not_use_incompat_feature) \
|
||||
x(EROFS, erofs_trans_commit) \
|
||||
x(EROFS, erofs_no_writes) \
|
||||
@ -267,6 +269,7 @@
|
||||
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
|
||||
x(EIO, journal_shutdown) \
|
||||
x(EIO, journal_flush_err) \
|
||||
x(EIO, journal_write_err) \
|
||||
x(EIO, btree_node_read_err) \
|
||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_cached) \
|
||||
x(EIO, sb_not_downgraded) \
|
||||
@ -275,6 +278,7 @@
|
||||
x(EIO, btree_node_read_validate_error) \
|
||||
x(EIO, btree_need_topology_repair) \
|
||||
x(EIO, bucket_ref_update) \
|
||||
x(EIO, trigger_alloc) \
|
||||
x(EIO, trigger_pointer) \
|
||||
x(EIO, trigger_stripe_pointer) \
|
||||
x(EIO, metadata_bucket_inconsistency) \
|
||||
@ -290,7 +294,19 @@
|
||||
x(EIO, EIO_fault_injected) \
|
||||
x(EIO, ec_block_read) \
|
||||
x(EIO, ec_block_write) \
|
||||
x(EIO, data_read) \
|
||||
x(EIO, recompute_checksum) \
|
||||
x(EIO, decompress) \
|
||||
x(BCH_ERR_decompress, decompress_exceeded_max_encoded_extent) \
|
||||
x(BCH_ERR_decompress, decompress_lz4) \
|
||||
x(BCH_ERR_decompress, decompress_gzip) \
|
||||
x(BCH_ERR_decompress, decompress_zstd_src_len_bad) \
|
||||
x(BCH_ERR_decompress, decompress_zstd) \
|
||||
x(EIO, data_write) \
|
||||
x(BCH_ERR_data_write, data_write_io) \
|
||||
x(BCH_ERR_data_write, data_write_csum) \
|
||||
x(BCH_ERR_data_write, data_write_invalid_ptr) \
|
||||
x(BCH_ERR_data_write, data_write_misaligned) \
|
||||
x(BCH_ERR_decompress, data_read) \
|
||||
x(BCH_ERR_data_read, no_device_to_read_from) \
|
||||
x(BCH_ERR_data_read, data_read_io_err) \
|
||||
x(BCH_ERR_data_read, data_read_csum_err) \
|
||||
|
@ -3,8 +3,8 @@
|
||||
#include "btree_cache.h"
|
||||
#include "btree_iter.h"
|
||||
#include "error.h"
|
||||
#include "fs-common.h"
|
||||
#include "journal.h"
|
||||
#include "namei.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "super.h"
|
||||
#include "thread_with_file.h"
|
||||
|
@ -136,12 +136,8 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
||||
if (k.k->type == KEY_TYPE_error)
|
||||
return -BCH_ERR_key_type_error;
|
||||
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
|
||||
if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
|
||||
return -BCH_ERR_extent_poisened;
|
||||
|
||||
rcu_read_lock();
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
u64 pick_latency;
|
||||
@ -592,29 +588,35 @@ static void bch2_extent_crc_pack(union bch_extent_crc *dst,
|
||||
struct bch_extent_crc_unpacked src,
|
||||
enum bch_extent_entry_type type)
|
||||
{
|
||||
#define set_common_fields(_dst, _src) \
|
||||
_dst.type = 1 << type; \
|
||||
_dst.csum_type = _src.csum_type, \
|
||||
_dst.compression_type = _src.compression_type, \
|
||||
_dst._compressed_size = _src.compressed_size - 1, \
|
||||
_dst._uncompressed_size = _src.uncompressed_size - 1, \
|
||||
_dst.offset = _src.offset
|
||||
#define common_fields(_src) \
|
||||
.type = BIT(type), \
|
||||
.csum_type = _src.csum_type, \
|
||||
.compression_type = _src.compression_type, \
|
||||
._compressed_size = _src.compressed_size - 1, \
|
||||
._uncompressed_size = _src.uncompressed_size - 1, \
|
||||
.offset = _src.offset
|
||||
|
||||
switch (type) {
|
||||
case BCH_EXTENT_ENTRY_crc32:
|
||||
set_common_fields(dst->crc32, src);
|
||||
dst->crc32.csum = (u32 __force) *((__le32 *) &src.csum.lo);
|
||||
dst->crc32 = (struct bch_extent_crc32) {
|
||||
common_fields(src),
|
||||
.csum = (u32 __force) *((__le32 *) &src.csum.lo),
|
||||
};
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_crc64:
|
||||
set_common_fields(dst->crc64, src);
|
||||
dst->crc64.nonce = src.nonce;
|
||||
dst->crc64.csum_lo = (u64 __force) src.csum.lo;
|
||||
dst->crc64.csum_hi = (u64 __force) *((__le16 *) &src.csum.hi);
|
||||
dst->crc64 = (struct bch_extent_crc64) {
|
||||
common_fields(src),
|
||||
.nonce = src.nonce,
|
||||
.csum_lo = (u64 __force) src.csum.lo,
|
||||
.csum_hi = (u64 __force) *((__le16 *) &src.csum.hi),
|
||||
};
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_crc128:
|
||||
set_common_fields(dst->crc128, src);
|
||||
dst->crc128.nonce = src.nonce;
|
||||
dst->crc128.csum = src.csum;
|
||||
dst->crc128 = (struct bch_extent_crc128) {
|
||||
common_fields(src),
|
||||
.nonce = src.nonce,
|
||||
.csum = src.csum,
|
||||
};
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
|
@ -225,11 +225,11 @@ static void bchfs_read(struct btree_trans *trans,
|
||||
|
||||
bch2_read_extent(trans, rbio, iter.pos,
|
||||
data_btree, k, offset_into_extent, flags);
|
||||
swap(rbio->bio.bi_iter.bi_size, bytes);
|
||||
|
||||
if (flags & BCH_READ_last_fragment)
|
||||
break;
|
||||
|
||||
swap(rbio->bio.bi_iter.bi_size, bytes);
|
||||
bio_advance(&rbio->bio, bytes);
|
||||
err:
|
||||
if (ret &&
|
||||
|
@ -5,8 +5,8 @@
|
||||
#include "chardev.h"
|
||||
#include "dirent.h"
|
||||
#include "fs.h"
|
||||
#include "fs-common.h"
|
||||
#include "fs-ioctl.h"
|
||||
#include "namei.h"
|
||||
#include "quota.h"
|
||||
|
||||
#include <linux/compat.h>
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include "errcode.h"
|
||||
#include "extents.h"
|
||||
#include "fs.h"
|
||||
#include "fs-common.h"
|
||||
#include "fs-io.h"
|
||||
#include "fs-ioctl.h"
|
||||
#include "fs-io-buffered.h"
|
||||
@ -22,6 +21,7 @@
|
||||
#include "io_read.h"
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "namei.h"
|
||||
#include "quota.h"
|
||||
#include "rebalance.h"
|
||||
#include "snapshot.h"
|
||||
@ -641,7 +641,9 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), &inum);
|
||||
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
|
||||
|
||||
ret = bch2_dirent_read_target(trans, dir, d, &inum);
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
if (ret)
|
||||
@ -651,30 +653,30 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
|
||||
if (inode)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Note: if check/repair needs it, we commit before
|
||||
* bch2_inode_hash_init_insert(), as after that point we can't take a
|
||||
* restart - not in the top level loop with a commit_do(), like we
|
||||
* usually do:
|
||||
*/
|
||||
|
||||
struct bch_subvolume subvol;
|
||||
struct bch_inode_unpacked inode_u;
|
||||
ret = bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
|
||||
bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?:
|
||||
bch2_check_dirent_target(trans, &dirent_iter, d, &inode_u, false) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
|
||||
PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
|
||||
|
||||
/*
|
||||
* don't remove it: check_inodes might find another inode that points
|
||||
* back to this dirent
|
||||
*/
|
||||
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
|
||||
c, "dirent to missing inode:\n %s",
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* regular files may have hardlinks: */
|
||||
if (bch2_fs_inconsistent_on(bch2_inode_should_have_single_bp(&inode_u) &&
|
||||
!bkey_eq(k.k->p, POS(inode_u.bi_dir, inode_u.bi_dir_offset)),
|
||||
c,
|
||||
"dirent points to inode that does not point back:\n %s",
|
||||
(bch2_bkey_val_to_text(&buf, c, k),
|
||||
prt_printf(&buf, "\n "),
|
||||
bch2_inode_unpacked_to_text(&buf, &inode_u),
|
||||
buf.buf))) {
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &dirent_iter);
|
||||
printbuf_exit(&buf);
|
||||
@ -2177,7 +2179,7 @@ static int bch2_fs_get_tree(struct fs_context *fc)
|
||||
|
||||
/* Some options can't be parsed until after the fs is started: */
|
||||
opts = bch2_opts_empty();
|
||||
ret = bch2_parse_mount_opts(c, &opts, NULL, opts_parse->parse_later.buf);
|
||||
ret = bch2_parse_mount_opts(c, &opts, NULL, opts_parse->parse_later.buf, false);
|
||||
if (ret)
|
||||
goto err_stop_fs;
|
||||
|
||||
@ -2331,6 +2333,8 @@ static int bch2_fs_parse_param(struct fs_context *fc,
|
||||
int ret = bch2_parse_one_mount_opt(c, &opts->opts,
|
||||
&opts->parse_later, param->key,
|
||||
param->string);
|
||||
if (ret)
|
||||
pr_err("Error parsing option %s: %s", param->key, bch2_err_str(ret));
|
||||
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
@ -10,10 +10,10 @@
|
||||
#include "dirent.h"
|
||||
#include "error.h"
|
||||
#include "fs.h"
|
||||
#include "fs-common.h"
|
||||
#include "fsck.h"
|
||||
#include "inode.h"
|
||||
#include "keylist.h"
|
||||
#include "namei.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "snapshot.h"
|
||||
#include "super.h"
|
||||
@ -23,13 +23,6 @@
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/dcache.h> /* struct qstr */
|
||||
|
||||
static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
|
||||
struct bkey_s_c_dirent d)
|
||||
{
|
||||
return inode->bi_dir == d.k->p.inode &&
|
||||
inode->bi_dir_offset == d.k->p.offset;
|
||||
}
|
||||
|
||||
static int dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
@ -116,29 +109,6 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr),
|
||||
BTREE_ITER_all_snapshots, k, ret) {
|
||||
if (k.k->p.offset != inode_nr)
|
||||
break;
|
||||
if (!bkey_is_inode(k.k))
|
||||
continue;
|
||||
ret = bch2_inode_unpack(k, inode);
|
||||
goto found;
|
||||
}
|
||||
ret = -BCH_ERR_ENOENT_inode;
|
||||
found:
|
||||
bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lookup_inode(struct btree_trans *trans, u64 inode_nr, u32 snapshot,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
@ -179,32 +149,6 @@ static int lookup_dirent_in_snapshot(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bch_inode_unpacked dir_inode;
|
||||
struct bch_hash_info dir_hash_info;
|
||||
int ret;
|
||||
|
||||
ret = lookup_first_inode(trans, pos.inode, &dir_inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
dir_hash_info = bch2_hash_info_init(c, &dir_inode);
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent);
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
|
||||
&dir_hash_info, &iter,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
err:
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find any subvolume associated with a tree of snapshots
|
||||
* We can't rely on master_subvol - it might have been deleted.
|
||||
@ -548,7 +492,7 @@ static int remove_backpointer(struct btree_trans *trans,
|
||||
SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot));
|
||||
int ret = bkey_err(d) ?:
|
||||
dirent_points_to_inode(c, d, inode) ?:
|
||||
__remove_dirent(trans, d.k->p);
|
||||
bch2_fsck_remove_dirent(trans, d.k->p);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
@ -1985,169 +1929,6 @@ static int check_subdir_dirents_count(struct btree_trans *trans, struct inode_wa
|
||||
trans_was_restarted(trans, restart_count);
|
||||
}
|
||||
|
||||
noinline_for_stack
|
||||
static int check_dirent_inode_dirent(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c_dirent d,
|
||||
struct bch_inode_unpacked *target)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct btree_iter bp_iter = { NULL };
|
||||
int ret = 0;
|
||||
|
||||
if (inode_points_to_dirent(target, d))
|
||||
return 0;
|
||||
|
||||
if (!target->bi_dir &&
|
||||
!target->bi_dir_offset) {
|
||||
fsck_err_on(S_ISDIR(target->bi_mode),
|
||||
trans, inode_dir_missing_backpointer,
|
||||
"directory with missing backpointer\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, d.s_c),
|
||||
prt_printf(&buf, "\n"),
|
||||
bch2_inode_unpacked_to_text(&buf, target),
|
||||
buf.buf));
|
||||
|
||||
fsck_err_on(target->bi_flags & BCH_INODE_unlinked,
|
||||
trans, inode_unlinked_but_has_dirent,
|
||||
"inode unlinked but has dirent\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, d.s_c),
|
||||
prt_printf(&buf, "\n"),
|
||||
bch2_inode_unpacked_to_text(&buf, target),
|
||||
buf.buf));
|
||||
|
||||
target->bi_flags &= ~BCH_INODE_unlinked;
|
||||
target->bi_dir = d.k->p.inode;
|
||||
target->bi_dir_offset = d.k->p.offset;
|
||||
return __bch2_fsck_write_inode(trans, target);
|
||||
}
|
||||
|
||||
if (bch2_inode_should_have_single_bp(target) &&
|
||||
!fsck_err(trans, inode_wrong_backpointer,
|
||||
"dirent points to inode that does not point back:\n %s",
|
||||
(bch2_bkey_val_to_text(&buf, c, d.s_c),
|
||||
prt_printf(&buf, "\n "),
|
||||
bch2_inode_unpacked_to_text(&buf, target),
|
||||
buf.buf)))
|
||||
goto err;
|
||||
|
||||
struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
|
||||
SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot));
|
||||
ret = bkey_err(bp_dirent);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
goto err;
|
||||
|
||||
bool backpointer_exists = !ret;
|
||||
ret = 0;
|
||||
|
||||
if (fsck_err_on(!backpointer_exists,
|
||||
trans, inode_wrong_backpointer,
|
||||
"inode %llu:%u has wrong backpointer:\n"
|
||||
"got %llu:%llu\n"
|
||||
"should be %llu:%llu",
|
||||
target->bi_inum, target->bi_snapshot,
|
||||
target->bi_dir,
|
||||
target->bi_dir_offset,
|
||||
d.k->p.inode,
|
||||
d.k->p.offset)) {
|
||||
target->bi_dir = d.k->p.inode;
|
||||
target->bi_dir_offset = d.k->p.offset;
|
||||
ret = __bch2_fsck_write_inode(trans, target);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, d.s_c);
|
||||
prt_newline(&buf);
|
||||
if (backpointer_exists)
|
||||
bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
|
||||
|
||||
if (fsck_err_on(backpointer_exists &&
|
||||
(S_ISDIR(target->bi_mode) ||
|
||||
target->bi_subvol),
|
||||
trans, inode_dir_multiple_links,
|
||||
"%s %llu:%u with multiple links\n%s",
|
||||
S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
|
||||
target->bi_inum, target->bi_snapshot, buf.buf)) {
|
||||
ret = __remove_dirent(trans, d.k->p);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* hardlinked file with nlink 0:
|
||||
* We're just adjusting nlink here so check_nlinks() will pick
|
||||
* it up, it ignores inodes with nlink 0
|
||||
*/
|
||||
if (fsck_err_on(backpointer_exists && !target->bi_nlink,
|
||||
trans, inode_multiple_links_but_nlink_0,
|
||||
"inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
|
||||
target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
|
||||
target->bi_nlink++;
|
||||
target->bi_flags &= ~BCH_INODE_unlinked;
|
||||
ret = __bch2_fsck_write_inode(trans, target);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
out:
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
printbuf_exit(&buf);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
noinline_for_stack
|
||||
static int check_dirent_target(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c_dirent d,
|
||||
struct bch_inode_unpacked *target)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i_dirent *n;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
ret = check_dirent_inode_dirent(trans, iter, d, target);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (fsck_err_on(d.v->d_type != inode_d_type(target),
|
||||
trans, dirent_d_type_wrong,
|
||||
"incorrect d_type: got %s, should be %s:\n%s",
|
||||
bch2_d_type_str(d.v->d_type),
|
||||
bch2_d_type_str(inode_d_type(target)),
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
|
||||
n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bkey_reassemble(&n->k_i, d.s_c);
|
||||
n->v.d_type = inode_d_type(target);
|
||||
if (n->v.d_type == DT_SUBVOL) {
|
||||
n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
|
||||
n->v.d_child_subvol = cpu_to_le32(target->bi_subvol);
|
||||
} else {
|
||||
n->v.d_inum = cpu_to_le64(target->bi_inum);
|
||||
}
|
||||
|
||||
ret = bch2_trans_update(trans, iter, &n->k_i, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
d = dirent_i_to_s_c(n);
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* find a subvolume that's a descendent of @snapshot: */
|
||||
static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid)
|
||||
{
|
||||
@ -2247,7 +2028,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
|
||||
if (fsck_err(trans, dirent_to_missing_subvol,
|
||||
"dirent points to missing subvolume\n%s",
|
||||
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)))
|
||||
return __remove_dirent(trans, d.k->p);
|
||||
return bch2_fsck_remove_dirent(trans, d.k->p);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
@ -2291,7 +2072,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = check_dirent_target(trans, iter, d, &subvol_root);
|
||||
ret = bch2_check_dirent_target(trans, iter, d, &subvol_root, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
out:
|
||||
@ -2378,13 +2159,13 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k),
|
||||
buf.buf))) {
|
||||
ret = __remove_dirent(trans, d.k->p);
|
||||
ret = bch2_fsck_remove_dirent(trans, d.k->p);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
darray_for_each(target->inodes, i) {
|
||||
ret = check_dirent_target(trans, iter, d, &i->inode);
|
||||
ret = bch2_check_dirent_target(trans, iter, d, &i->inode, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -3240,7 +3021,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
|
||||
if (arg.opts) {
|
||||
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
|
||||
ret = PTR_ERR_OR_ZERO(optstr) ?:
|
||||
bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr);
|
||||
bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr, false);
|
||||
if (!IS_ERR(optstr))
|
||||
kfree(optstr);
|
||||
|
||||
@ -3348,7 +3129,7 @@ long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg)
|
||||
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(optstr) ?:
|
||||
bch2_parse_mount_opts(c, &thr->opts, NULL, optstr);
|
||||
bch2_parse_mount_opts(c, &thr->opts, NULL, optstr, false);
|
||||
if (!IS_ERR(optstr))
|
||||
kfree(optstr);
|
||||
|
||||
|
@ -731,10 +731,9 @@ int bch2_trigger_inode(struct btree_trans *trans,
|
||||
bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq);
|
||||
}
|
||||
|
||||
s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k);
|
||||
if ((flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) && nr) {
|
||||
struct disk_accounting_pos acc = { .type = BCH_DISK_ACCOUNTING_nr_inodes };
|
||||
int ret = bch2_disk_accounting_mod(trans, &acc, &nr, 1, flags & BTREE_TRIGGER_gc);
|
||||
s64 nr[1] = { bkey_is_inode(new.k) - bkey_is_inode(old.k) };
|
||||
if ((flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) && nr[0]) {
|
||||
int ret = bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc, nr, nr_inodes);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -1079,7 +1078,7 @@ retry:
|
||||
bch2_fs_inconsistent(c,
|
||||
"inode %llu:%u not found when deleting",
|
||||
inum.inum, snapshot);
|
||||
ret = -EIO;
|
||||
ret = -BCH_ERR_ENOENT_inode;
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1243,7 +1242,7 @@ retry:
|
||||
bch2_fs_inconsistent(c,
|
||||
"inode %llu:%u not found when deleting",
|
||||
inum, snapshot);
|
||||
ret = -EIO;
|
||||
ret = -BCH_ERR_ENOENT_inode;
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -277,6 +277,7 @@ static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *i
|
||||
bool inode_has_bp = inode->bi_dir || inode->bi_dir_offset;
|
||||
|
||||
return S_ISDIR(inode->bi_mode) ||
|
||||
inode->bi_subvol ||
|
||||
(!inode->bi_nlink && inode_has_bp);
|
||||
}
|
||||
|
||||
|
@ -295,6 +295,13 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
|
||||
bool *read_full,
|
||||
struct bch_io_failures *failed)
|
||||
{
|
||||
/*
|
||||
* We're in the retry path, but we don't know what to repair yet, and we
|
||||
* don't want to do a promote here:
|
||||
*/
|
||||
if (failed && !failed->nr)
|
||||
return NULL;
|
||||
|
||||
struct bch_fs *c = trans->c;
|
||||
/*
|
||||
* if failed != NULL we're not actually doing a promote, we're
|
||||
@ -429,6 +436,71 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
|
||||
bio_endio(&rbio->bio);
|
||||
}
|
||||
|
||||
static void get_rbio_extent(struct btree_trans *trans,
|
||||
struct bch_read_bio *rbio,
|
||||
struct bkey_buf *sk)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret = lockrestart_do(trans,
|
||||
bkey_err(k = bch2_bkey_get_iter(trans, &iter,
|
||||
rbio->data_btree, rbio->data_pos, 0)));
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
bkey_for_each_ptr(ptrs, ptr)
|
||||
if (bch2_extent_ptr_eq(*ptr, rbio->pick.ptr)) {
|
||||
bch2_bkey_buf_reassemble(sk, trans->c, k);
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
}
|
||||
|
||||
static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
|
||||
enum btree_id btree, struct bkey_s_c read_k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
struct data_update *u = rbio_data_update(rbio);
|
||||
if (u)
|
||||
read_k = bkey_i_to_s_c(u->k.k);
|
||||
|
||||
u64 flags = bch2_bkey_extent_flags(read_k);
|
||||
if (flags & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
|
||||
return 0;
|
||||
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, btree, bkey_start_pos(read_k.k),
|
||||
BTREE_ITER_intent);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!bkey_and_val_eq(k, read_k))
|
||||
goto out;
|
||||
|
||||
struct bkey_i *new = bch2_trans_kmalloc(trans,
|
||||
bkey_bytes(k.k) + sizeof(struct bch_extent_flags));
|
||||
ret = PTR_ERR_OR_ZERO(new) ?:
|
||||
(bkey_reassemble(new, k), 0) ?:
|
||||
bch2_bkey_extent_flags_set(c, new, flags|BIT_ULL(BCH_EXTENT_FLAG_poisoned)) ?:
|
||||
bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, 0);
|
||||
|
||||
/*
|
||||
* Propagate key change back to data update path, in particular so it
|
||||
* knows the extent has been poisoned and it's safe to change the
|
||||
* checksum
|
||||
*/
|
||||
if (u && !ret)
|
||||
bch2_bkey_buf_copy(&u->k, c, new);
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int bch2_read_retry_nodecode(struct btree_trans *trans,
|
||||
struct bch_read_bio *rbio,
|
||||
struct bvec_iter bvec_iter,
|
||||
@ -462,7 +534,8 @@ retry:
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_data_read_retry))
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
|
||||
bch2_err_matches(ret, BCH_ERR_data_read_retry))
|
||||
goto retry;
|
||||
|
||||
if (ret) {
|
||||
@ -486,13 +559,21 @@ static void bch2_rbio_retry(struct work_struct *work)
|
||||
.inum = rbio->read_pos.inode,
|
||||
};
|
||||
struct bch_io_failures failed = { .nr = 0 };
|
||||
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
struct bkey_buf sk;
|
||||
bch2_bkey_buf_init(&sk);
|
||||
bkey_init(&sk.k->k);
|
||||
|
||||
trace_io_read_retry(&rbio->bio);
|
||||
this_cpu_add(c->counters[BCH_COUNTER_io_read_retry],
|
||||
bvec_iter_sectors(rbio->bvec_iter));
|
||||
|
||||
if (bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
|
||||
get_rbio_extent(trans, rbio, &sk);
|
||||
|
||||
if (!bkey_deleted(&sk.k->k) &&
|
||||
bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
|
||||
bch2_mark_io_failure(&failed, &rbio->pick,
|
||||
rbio->ret == -BCH_ERR_data_read_retry_csum_err);
|
||||
|
||||
@ -513,7 +594,7 @@ static void bch2_rbio_retry(struct work_struct *work)
|
||||
|
||||
int ret = rbio->data_update
|
||||
? bch2_read_retry_nodecode(trans, rbio, iter, &failed, flags)
|
||||
: __bch2_read(trans, rbio, iter, inum, &failed, flags);
|
||||
: __bch2_read(trans, rbio, iter, inum, &failed, &sk, flags);
|
||||
|
||||
if (ret) {
|
||||
rbio->ret = ret;
|
||||
@ -534,6 +615,7 @@ static void bch2_rbio_retry(struct work_struct *work)
|
||||
}
|
||||
|
||||
bch2_rbio_done(rbio);
|
||||
bch2_bkey_buf_exit(&sk, c);
|
||||
bch2_trans_put(trans);
|
||||
}
|
||||
|
||||
@ -958,6 +1040,10 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
||||
bvec_iter_sectors(iter));
|
||||
goto out_read_done;
|
||||
}
|
||||
|
||||
if ((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) &&
|
||||
!orig->data_update)
|
||||
return -BCH_ERR_extent_poisened;
|
||||
retry_pick:
|
||||
ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev);
|
||||
|
||||
@ -966,6 +1052,16 @@ retry_pick:
|
||||
goto hole;
|
||||
|
||||
if (unlikely(ret < 0)) {
|
||||
if (ret == -BCH_ERR_data_read_csum_err) {
|
||||
int ret2 = maybe_poison_extent(trans, orig, data_btree, k);
|
||||
if (ret2) {
|
||||
ret = ret2;
|
||||
goto err;
|
||||
}
|
||||
|
||||
trace_and_count(c, io_read_fail_and_poison, &orig->bio);
|
||||
}
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
|
||||
prt_printf(&buf, "%s\n ", bch2_err_str(ret));
|
||||
@ -1263,12 +1359,15 @@ out_read_done:
|
||||
|
||||
int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
|
||||
struct bvec_iter bvec_iter, subvol_inum inum,
|
||||
struct bch_io_failures *failed, unsigned flags)
|
||||
struct bch_io_failures *failed,
|
||||
struct bkey_buf *prev_read,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_buf sk;
|
||||
struct bkey_s_c k;
|
||||
enum btree_id data_btree;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(rbio->data_update);
|
||||
@ -1279,7 +1378,7 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
|
||||
BTREE_ITER_slots);
|
||||
|
||||
while (1) {
|
||||
enum btree_id data_btree = BTREE_ID_extents;
|
||||
data_btree = BTREE_ID_extents;
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
@ -1311,6 +1410,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
|
||||
|
||||
k = bkey_i_to_s_c(sk.k);
|
||||
|
||||
if (unlikely(flags & BCH_READ_in_retry)) {
|
||||
if (!bkey_and_val_eq(k, bkey_i_to_s_c(prev_read->k)))
|
||||
failed->nr = 0;
|
||||
bch2_bkey_buf_copy(prev_read, c, sk.k);
|
||||
}
|
||||
|
||||
/*
|
||||
* With indirect extents, the amount of data to read is the min
|
||||
* of the original extent and the indirect extent:
|
||||
@ -1326,13 +1431,14 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
|
||||
ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos,
|
||||
data_btree, k,
|
||||
offset_into_extent, failed, flags, -1);
|
||||
swap(bvec_iter.bi_size, bytes);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (flags & BCH_READ_last_fragment)
|
||||
break;
|
||||
|
||||
swap(bvec_iter.bi_size, bytes);
|
||||
bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
|
||||
err:
|
||||
if (ret == -BCH_ERR_data_read_retry_csum_err_maybe_userspace)
|
||||
@ -1344,9 +1450,7 @@ err:
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (ret) {
|
||||
if (unlikely(ret)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum,
|
||||
@ -1362,6 +1466,7 @@ err:
|
||||
bch2_rbio_done(rbio);
|
||||
}
|
||||
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
bch2_bkey_buf_exit(&sk, c);
|
||||
return ret;
|
||||
}
|
||||
|
@ -137,12 +137,15 @@ static inline void bch2_read_extent(struct btree_trans *trans,
|
||||
enum btree_id data_btree, struct bkey_s_c k,
|
||||
unsigned offset_into_extent, unsigned flags)
|
||||
{
|
||||
__bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
|
||||
data_btree, k, offset_into_extent, NULL, flags, -1);
|
||||
int ret = __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
|
||||
data_btree, k, offset_into_extent, NULL, flags, -1);
|
||||
/* __bch2_read_extent only returns errors if BCH_READ_in_retry is set */
|
||||
WARN(ret, "unhandled error from __bch2_read_extent()");
|
||||
}
|
||||
|
||||
int __bch2_read(struct btree_trans *, struct bch_read_bio *, struct bvec_iter,
|
||||
subvol_inum, struct bch_io_failures *, unsigned flags);
|
||||
subvol_inum,
|
||||
struct bch_io_failures *, struct bkey_buf *, unsigned flags);
|
||||
|
||||
static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
|
||||
subvol_inum inum)
|
||||
@ -152,7 +155,7 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
|
||||
rbio->subvol = inum.subvol;
|
||||
|
||||
bch2_trans_run(c,
|
||||
__bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL,
|
||||
__bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL,
|
||||
BCH_READ_retry_if_stale|
|
||||
BCH_READ_may_promote|
|
||||
BCH_READ_user_mapped));
|
||||
|
@ -402,61 +402,36 @@ static int bch2_write_index_default(struct bch_write_op *op)
|
||||
|
||||
/* Writes */
|
||||
|
||||
void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
struct bch_write_op *op, u64 offset, const char *fmt, ...)
|
||||
void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, ...)
|
||||
{
|
||||
if (op->subvol)
|
||||
lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, out,
|
||||
(subvol_inum) { op->subvol, op->pos.inode, },
|
||||
offset << 9));
|
||||
else {
|
||||
struct bpos pos = op->pos;
|
||||
pos.offset = offset;
|
||||
lockrestart_do(trans, bch2_inum_snap_offset_err_msg_trans(trans, out, pos));
|
||||
}
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_str(out, "write error: ");
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (op->flags & BCH_WRITE_move) {
|
||||
struct data_update *u = container_of(op, struct data_update, op);
|
||||
|
||||
prt_printf(out, "\n from internal move ");
|
||||
bch2_bkey_val_to_text(out, op->c, bkey_i_to_s_c(u->k.k));
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, u64 offset,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
if (op->subvol)
|
||||
bch2_inum_offset_err_msg(op->c, out,
|
||||
if (op->subvol) {
|
||||
bch2_inum_offset_err_msg(op->c, &buf,
|
||||
(subvol_inum) { op->subvol, op->pos.inode, },
|
||||
offset << 9);
|
||||
else {
|
||||
} else {
|
||||
struct bpos pos = op->pos;
|
||||
pos.offset = offset;
|
||||
bch2_inum_snap_offset_err_msg(op->c, out, pos);
|
||||
bch2_inum_snap_offset_err_msg(op->c, &buf, pos);
|
||||
}
|
||||
|
||||
prt_str(out, "write error: ");
|
||||
prt_str(&buf, "write error: ");
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(out, fmt, args);
|
||||
prt_vprintf(&buf, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (op->flags & BCH_WRITE_move) {
|
||||
struct data_update *u = container_of(op, struct data_update, op);
|
||||
|
||||
prt_printf(out, "\n from internal move ");
|
||||
bch2_bkey_val_to_text(out, op->c, bkey_i_to_s_c(u->k.k));
|
||||
prt_printf(&buf, "\n from internal move ");
|
||||
bch2_bkey_val_to_text(&buf, op->c, bkey_i_to_s_c(u->k.k));
|
||||
}
|
||||
|
||||
bch_err_ratelimited(op->c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
@ -554,7 +529,7 @@ static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
|
||||
test_bit(ptr->dev, op->failed.d));
|
||||
|
||||
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src)))
|
||||
return -EIO;
|
||||
return -BCH_ERR_data_write_io;
|
||||
}
|
||||
|
||||
if (dst != src)
|
||||
@ -598,11 +573,8 @@ static void __bch2_write_index(struct bch_write_op *op)
|
||||
if (unlikely(ret && !bch2_err_matches(ret, EROFS))) {
|
||||
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k),
|
||||
bch2_write_op_error(op, bkey_start_offset(&insert->k),
|
||||
"btree update error: %s", bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
@ -611,7 +583,7 @@ static void __bch2_write_index(struct bch_write_op *op)
|
||||
out:
|
||||
/* If some a bucket wasn't written, we can't erasure code it: */
|
||||
for_each_set_bit(dev, op->failed.d, BCH_SB_MEMBERS_MAX)
|
||||
bch2_open_bucket_write_error(c, &op->open_buckets, dev);
|
||||
bch2_open_bucket_write_error(c, &op->open_buckets, dev, -BCH_ERR_data_write_io);
|
||||
|
||||
bch2_open_buckets_put(c, &op->open_buckets);
|
||||
return;
|
||||
@ -837,7 +809,6 @@ static int bch2_write_rechecksum(struct bch_fs *c,
|
||||
{
|
||||
struct bio *bio = &op->wbio.bio;
|
||||
struct bch_extent_crc_unpacked new_crc;
|
||||
int ret;
|
||||
|
||||
/* bch2_rechecksum_bio() can't encrypt or decrypt data: */
|
||||
|
||||
@ -845,10 +816,10 @@ static int bch2_write_rechecksum(struct bch_fs *c,
|
||||
bch2_csum_type_is_encryption(new_csum_type))
|
||||
new_csum_type = op->crc.csum_type;
|
||||
|
||||
ret = bch2_rechecksum_bio(c, bio, op->version, op->crc,
|
||||
NULL, &new_crc,
|
||||
op->crc.offset, op->crc.live_size,
|
||||
new_csum_type);
|
||||
int ret = bch2_rechecksum_bio(c, bio, op->version, op->crc,
|
||||
NULL, &new_crc,
|
||||
op->crc.offset, op->crc.live_size,
|
||||
new_csum_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -858,44 +829,12 @@ static int bch2_write_rechecksum(struct bch_fs *c,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_write_decrypt(struct bch_write_op *op)
|
||||
{
|
||||
struct bch_fs *c = op->c;
|
||||
struct nonce nonce = extent_nonce(op->version, op->crc);
|
||||
struct bch_csum csum;
|
||||
int ret;
|
||||
|
||||
if (!bch2_csum_type_is_encryption(op->crc.csum_type))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If we need to decrypt data in the write path, we'll no longer be able
|
||||
* to verify the existing checksum (poly1305 mac, in this case) after
|
||||
* it's decrypted - this is the last point we'll be able to reverify the
|
||||
* checksum:
|
||||
*/
|
||||
csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
|
||||
if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
|
||||
return -EIO;
|
||||
|
||||
ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
|
||||
op->crc.csum_type = 0;
|
||||
op->crc.csum = (struct bch_csum) { 0, 0 };
|
||||
return ret;
|
||||
}
|
||||
|
||||
static enum prep_encoded_ret {
|
||||
PREP_ENCODED_OK,
|
||||
PREP_ENCODED_ERR,
|
||||
PREP_ENCODED_CHECKSUM_ERR,
|
||||
PREP_ENCODED_DO_WRITE,
|
||||
} bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp)
|
||||
static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp)
|
||||
{
|
||||
struct bch_fs *c = op->c;
|
||||
struct bio *bio = &op->wbio.bio;
|
||||
|
||||
if (!(op->flags & BCH_WRITE_data_encoded))
|
||||
return PREP_ENCODED_OK;
|
||||
struct bch_csum csum;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(bio_sectors(bio) != op->crc.compressed_size);
|
||||
|
||||
@ -906,12 +845,13 @@ static enum prep_encoded_ret {
|
||||
(op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) ||
|
||||
op->incompressible)) {
|
||||
if (!crc_is_compressed(op->crc) &&
|
||||
op->csum_type != op->crc.csum_type &&
|
||||
bch2_write_rechecksum(c, op, op->csum_type) &&
|
||||
!c->opts.no_data_io)
|
||||
return PREP_ENCODED_CHECKSUM_ERR;
|
||||
op->csum_type != op->crc.csum_type) {
|
||||
ret = bch2_write_rechecksum(c, op, op->csum_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return PREP_ENCODED_DO_WRITE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -919,20 +859,24 @@ static enum prep_encoded_ret {
|
||||
* is, we have to decompress it:
|
||||
*/
|
||||
if (crc_is_compressed(op->crc)) {
|
||||
struct bch_csum csum;
|
||||
|
||||
if (bch2_write_decrypt(op))
|
||||
return PREP_ENCODED_CHECKSUM_ERR;
|
||||
|
||||
/* Last point we can still verify checksum: */
|
||||
csum = bch2_checksum_bio(c, op->crc.csum_type,
|
||||
extent_nonce(op->version, op->crc),
|
||||
bio);
|
||||
struct nonce nonce = extent_nonce(op->version, op->crc);
|
||||
csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, bio);
|
||||
if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
|
||||
return PREP_ENCODED_CHECKSUM_ERR;
|
||||
goto csum_err;
|
||||
|
||||
if (bch2_bio_uncompress_inplace(op, bio))
|
||||
return PREP_ENCODED_ERR;
|
||||
if (bch2_csum_type_is_encryption(op->crc.csum_type)) {
|
||||
ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, bio);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
op->crc.csum_type = 0;
|
||||
op->crc.csum = (struct bch_csum) { 0, 0 };
|
||||
}
|
||||
|
||||
ret = bch2_bio_uncompress_inplace(op, bio);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -944,22 +888,44 @@ static enum prep_encoded_ret {
|
||||
* If the data is checksummed and we're only writing a subset,
|
||||
* rechecksum and adjust bio to point to currently live data:
|
||||
*/
|
||||
if ((op->crc.live_size != op->crc.uncompressed_size ||
|
||||
op->crc.csum_type != op->csum_type) &&
|
||||
bch2_write_rechecksum(c, op, op->csum_type) &&
|
||||
!c->opts.no_data_io)
|
||||
return PREP_ENCODED_CHECKSUM_ERR;
|
||||
if (op->crc.live_size != op->crc.uncompressed_size ||
|
||||
op->crc.csum_type != op->csum_type) {
|
||||
ret = bch2_write_rechecksum(c, op, op->csum_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we want to compress the data, it has to be decrypted:
|
||||
*/
|
||||
if ((op->compression_opt ||
|
||||
bch2_csum_type_is_encryption(op->crc.csum_type) !=
|
||||
bch2_csum_type_is_encryption(op->csum_type)) &&
|
||||
bch2_write_decrypt(op))
|
||||
return PREP_ENCODED_CHECKSUM_ERR;
|
||||
if (bch2_csum_type_is_encryption(op->crc.csum_type) &&
|
||||
(op->compression_opt || op->crc.csum_type != op->csum_type)) {
|
||||
struct nonce nonce = extent_nonce(op->version, op->crc);
|
||||
csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, bio);
|
||||
if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
|
||||
goto csum_err;
|
||||
|
||||
return PREP_ENCODED_OK;
|
||||
ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, bio);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
op->crc.csum_type = 0;
|
||||
op->crc.csum = (struct bch_csum) { 0, 0 };
|
||||
}
|
||||
|
||||
return 0;
|
||||
csum_err:
|
||||
bch2_write_op_error(op, op->pos.offset,
|
||||
"error verifying existing checksum while moving existing data (memory corruption?)\n"
|
||||
" expected %0llx:%0llx got %0llx:%0llx type %s",
|
||||
op->crc.csum.hi,
|
||||
op->crc.csum.lo,
|
||||
csum.hi,
|
||||
csum.lo,
|
||||
op->crc.csum_type < BCH_CSUM_NR
|
||||
? __bch2_csum_types[op->crc.csum_type]
|
||||
: "(unknown)");
|
||||
return -BCH_ERR_data_write_csum;
|
||||
}
|
||||
|
||||
static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
|
||||
@ -974,29 +940,28 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
|
||||
bool page_alloc_failed = false;
|
||||
int ret, more = 0;
|
||||
|
||||
if (op->incompressible)
|
||||
op->compression_opt = 0;
|
||||
|
||||
BUG_ON(!bio_sectors(src));
|
||||
|
||||
ec_buf = bch2_writepoint_ec_buf(c, wp);
|
||||
|
||||
switch (bch2_write_prep_encoded_data(op, wp)) {
|
||||
case PREP_ENCODED_OK:
|
||||
break;
|
||||
case PREP_ENCODED_ERR:
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
case PREP_ENCODED_CHECKSUM_ERR:
|
||||
goto csum_err;
|
||||
case PREP_ENCODED_DO_WRITE:
|
||||
/* XXX look for bug here */
|
||||
if (ec_buf) {
|
||||
dst = bch2_write_bio_alloc(c, wp, src,
|
||||
&page_alloc_failed,
|
||||
ec_buf);
|
||||
bio_copy_data(dst, src);
|
||||
bounce = true;
|
||||
if (unlikely(op->flags & BCH_WRITE_data_encoded)) {
|
||||
ret = bch2_write_prep_encoded_data(op, wp);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
if (ret) {
|
||||
if (ec_buf) {
|
||||
dst = bch2_write_bio_alloc(c, wp, src,
|
||||
&page_alloc_failed,
|
||||
ec_buf);
|
||||
bio_copy_data(dst, src);
|
||||
bounce = true;
|
||||
}
|
||||
init_append_extent(op, wp, op->version, op->crc);
|
||||
goto do_write;
|
||||
}
|
||||
init_append_extent(op, wp, op->version, op->crc);
|
||||
goto do_write;
|
||||
}
|
||||
|
||||
if (ec_buf ||
|
||||
@ -1089,12 +1054,13 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
|
||||
* data can't be modified (by userspace) while it's in
|
||||
* flight.
|
||||
*/
|
||||
if (bch2_rechecksum_bio(c, src, version, op->crc,
|
||||
ret = bch2_rechecksum_bio(c, src, version, op->crc,
|
||||
&crc, &op->crc,
|
||||
src_len >> 9,
|
||||
bio_sectors(src) - (src_len >> 9),
|
||||
op->csum_type))
|
||||
goto csum_err;
|
||||
op->csum_type);
|
||||
if (ret)
|
||||
goto err;
|
||||
/*
|
||||
* rchecksum_bio sets compression_type on crc from op->crc,
|
||||
* this isn't always correct as sometimes we're changing
|
||||
@ -1104,12 +1070,12 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
|
||||
crc.nonce = nonce;
|
||||
} else {
|
||||
if ((op->flags & BCH_WRITE_data_encoded) &&
|
||||
bch2_rechecksum_bio(c, src, version, op->crc,
|
||||
(ret = bch2_rechecksum_bio(c, src, version, op->crc,
|
||||
NULL, &op->crc,
|
||||
src_len >> 9,
|
||||
bio_sectors(src) - (src_len >> 9),
|
||||
op->crc.csum_type))
|
||||
goto csum_err;
|
||||
op->crc.csum_type)))
|
||||
goto err;
|
||||
|
||||
crc.compressed_size = dst_len >> 9;
|
||||
crc.uncompressed_size = src_len >> 9;
|
||||
@ -1168,16 +1134,6 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
|
||||
do_write:
|
||||
*_dst = dst;
|
||||
return more;
|
||||
csum_err:
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"error verifying existing checksum while rewriting existing data (memory corruption?)");
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
ret = -EIO;
|
||||
err:
|
||||
if (to_wbio(dst)->bounce)
|
||||
bch2_bio_free_pages_pool(c, dst);
|
||||
@ -1255,38 +1211,35 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
|
||||
{
|
||||
struct bch_fs *c = op->c;
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
int ret = 0;
|
||||
|
||||
for_each_keylist_key(&op->insert_keys, orig) {
|
||||
int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents,
|
||||
ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents,
|
||||
bkey_start_pos(&orig->k), orig->k.p,
|
||||
BTREE_ITER_intent, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
||||
bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size);
|
||||
}));
|
||||
|
||||
if (ret && !bch2_err_matches(ret, EROFS)) {
|
||||
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k),
|
||||
"btree update error: %s", bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
op->error = ret;
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_trans_put(trans);
|
||||
|
||||
if (ret && !bch2_err_matches(ret, EROFS)) {
|
||||
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
|
||||
bch2_write_op_error(op, bkey_start_offset(&insert->k),
|
||||
"btree update error: %s", bch2_err_str(ret));
|
||||
}
|
||||
|
||||
if (ret)
|
||||
op->error = ret;
|
||||
}
|
||||
|
||||
static void __bch2_nocow_write_done(struct bch_write_op *op)
|
||||
{
|
||||
if (unlikely(op->flags & BCH_WRITE_io_error)) {
|
||||
op->error = -EIO;
|
||||
op->error = -BCH_ERR_data_write_io;
|
||||
} else if (unlikely(op->flags & BCH_WRITE_convert_unwritten))
|
||||
bch2_nocow_write_convert_unwritten(op);
|
||||
}
|
||||
@ -1436,11 +1389,8 @@ err:
|
||||
darray_exit(&buckets);
|
||||
|
||||
if (ret) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
bch2_write_op_error(op, op->pos.offset,
|
||||
"%s(): btree lookup error: %s", __func__, bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
op->error = ret;
|
||||
op->flags |= BCH_WRITE_submitted;
|
||||
}
|
||||
@ -1480,7 +1430,7 @@ err_bucket_stale:
|
||||
"pointer to invalid bucket in nocow path on device %llu\n %s",
|
||||
stale_at->b.inode,
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||
ret = -EIO;
|
||||
ret = -BCH_ERR_data_write_invalid_ptr;
|
||||
} else {
|
||||
/* We can retry this: */
|
||||
ret = -BCH_ERR_transaction_restart;
|
||||
@ -1558,13 +1508,9 @@ err:
|
||||
op->flags |= BCH_WRITE_submitted;
|
||||
|
||||
if (unlikely(ret < 0)) {
|
||||
if (!(op->flags & BCH_WRITE_alloc_nowait)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
if (!(op->flags & BCH_WRITE_alloc_nowait))
|
||||
bch2_write_op_error(op, op->pos.offset,
|
||||
"%s(): %s", __func__, bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
op->error = ret;
|
||||
break;
|
||||
}
|
||||
@ -1691,11 +1637,8 @@ CLOSURE_CALLBACK(bch2_write)
|
||||
wbio_init(bio)->put_bio = false;
|
||||
|
||||
if (unlikely(bio->bi_iter.bi_size & (c->opts.block_size - 1))) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"misaligned write");
|
||||
printbuf_exit(&buf);
|
||||
op->error = -EIO;
|
||||
bch2_write_op_error(op, op->pos.offset, "misaligned write");
|
||||
op->error = -BCH_ERR_data_write_misaligned;
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -14,13 +14,8 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
|
||||
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
|
||||
enum bch_data_type, const struct bkey_i *, bool);
|
||||
|
||||
__printf(5, 6)
|
||||
void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
struct bch_write_op *op, u64, const char *, ...);
|
||||
|
||||
__printf(4, 5)
|
||||
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, u64,
|
||||
const char *, ...);
|
||||
__printf(3, 4)
|
||||
void bch2_write_op_error(struct bch_write_op *op, u64, const char *, ...);
|
||||
|
||||
#define BCH_WRITE_FLAGS() \
|
||||
x(alloc_nowait) \
|
||||
|
@ -62,8 +62,7 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
prt_printf(out, "expires:\t");
|
||||
prt_printf(out, "%li jiffies\n", buf->expires - jiffies);
|
||||
prt_printf(out, "expires:\t%li jiffies\n", buf->expires - jiffies);
|
||||
|
||||
prt_printf(out, "flags:\t");
|
||||
if (buf->noflush)
|
||||
@ -142,6 +141,8 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
|
||||
bool stuck = false;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
buf.atomic++;
|
||||
|
||||
if (!(error == -BCH_ERR_journal_full ||
|
||||
error == -BCH_ERR_journal_pin_full) ||
|
||||
nr_unwritten_journal_entries(j) ||
|
||||
@ -172,7 +173,7 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
|
||||
bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)",
|
||||
bch2_err_str(error));
|
||||
bch2_journal_debug_to_text(&buf, j);
|
||||
bch_err(c, "%s", buf.buf);
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
bch2_journal_pins_to_text(&buf, j);
|
||||
@ -726,10 +727,10 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
|
||||
remaining_wait))
|
||||
return ret;
|
||||
|
||||
bch_err(c, "Journal stuck? Waited for 10 seconds, err %s", bch2_err_str(ret));
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_journal_debug_to_text(&buf, j);
|
||||
bch_err(c, "Journal stuck? Waited for 10 seconds...\n%s",
|
||||
buf.buf);
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
||||
closure_wait_event(&j->async_wait,
|
||||
@ -1510,7 +1511,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
||||
unsigned nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) {
|
||||
ja->bio[i] = kmalloc(struct_size(ja->bio[i], bio.bi_inline_vecs,
|
||||
ja->bio[i] = kzalloc(struct_size(ja->bio[i], bio.bi_inline_vecs,
|
||||
nr_bvecs), GFP_KERNEL);
|
||||
if (!ja->bio[i])
|
||||
return -BCH_ERR_ENOMEM_dev_journal_init;
|
||||
|
@ -1624,7 +1624,7 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
|
||||
if (!w->devs_written.nr) {
|
||||
bch_err(c, "unable to write journal to sufficient devices");
|
||||
err = -EIO;
|
||||
err = -BCH_ERR_journal_write_err;
|
||||
} else {
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
|
||||
w->devs_written);
|
||||
|
@ -645,7 +645,6 @@ static u64 journal_seq_to_flush(struct journal *j)
|
||||
* @j: journal object
|
||||
* @direct: direct or background reclaim?
|
||||
* @kicked: requested to run since we last ran?
|
||||
* Returns: 0 on success, or -EIO if the journal has been shutdown
|
||||
*
|
||||
* Background journal reclaim writes out btree nodes. It should be run
|
||||
* early enough so that we never completely run out of journal buckets.
|
||||
@ -685,10 +684,9 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
||||
if (kthread && kthread_should_stop())
|
||||
break;
|
||||
|
||||
if (bch2_journal_error(j)) {
|
||||
ret = -EIO;
|
||||
ret = bch2_journal_error(j);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_journal_do_discards(j);
|
||||
|
||||
|
@ -126,26 +126,40 @@ static void move_write_done(struct bch_write_op *op)
|
||||
|
||||
static void move_write(struct moving_io *io)
|
||||
{
|
||||
struct bch_fs *c = io->write.op.c;
|
||||
struct moving_context *ctxt = io->write.ctxt;
|
||||
struct bch_read_bio *rbio = &io->write.rbio;
|
||||
|
||||
if (ctxt->stats) {
|
||||
if (io->write.rbio.bio.bi_status)
|
||||
if (rbio->bio.bi_status)
|
||||
atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
|
||||
&ctxt->stats->sectors_error_uncorrected);
|
||||
else if (io->write.rbio.saw_error)
|
||||
else if (rbio->saw_error)
|
||||
atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
|
||||
&ctxt->stats->sectors_error_corrected);
|
||||
}
|
||||
|
||||
if (unlikely(io->write.rbio.ret ||
|
||||
io->write.rbio.bio.bi_status ||
|
||||
io->write.data_opts.scrub)) {
|
||||
/*
|
||||
* If the extent has been bitrotted, we're going to have to give it a
|
||||
* new checksum in order to move it - but the poison bit will ensure
|
||||
* that userspace still gets the appropriate error.
|
||||
*/
|
||||
if (unlikely(rbio->ret == -BCH_ERR_data_read_csum_err &&
|
||||
(bch2_bkey_extent_flags(bkey_i_to_s_c(io->write.k.k)) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)))) {
|
||||
struct bch_extent_crc_unpacked crc = rbio->pick.crc;
|
||||
struct nonce nonce = extent_nonce(rbio->version, crc);
|
||||
|
||||
rbio->pick.crc.csum = bch2_checksum_bio(c, rbio->pick.crc.csum_type,
|
||||
nonce, &rbio->bio);
|
||||
rbio->ret = 0;
|
||||
}
|
||||
|
||||
if (unlikely(rbio->ret || io->write.data_opts.scrub)) {
|
||||
move_free(io);
|
||||
return;
|
||||
}
|
||||
|
||||
if (trace_io_move_write_enabled()) {
|
||||
struct bch_fs *c = io->write.op.c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k));
|
||||
@ -528,6 +542,37 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move requires non extents iterators, and there's also no need for it to
|
||||
* signal indirect_extent_missing_error:
|
||||
*/
|
||||
static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c_reflink_p p)
|
||||
{
|
||||
if (unlikely(REFLINK_P_ERROR(p.v)))
|
||||
return bkey_s_c_null;
|
||||
|
||||
struct bpos reflink_pos = POS(0, REFLINK_P_IDX(p.v));
|
||||
|
||||
bch2_trans_iter_init(trans, iter,
|
||||
BTREE_ID_reflink, reflink_pos,
|
||||
BTREE_ITER_not_extents);
|
||||
|
||||
struct bkey_s_c k = bch2_btree_iter_peek(iter);
|
||||
if (!k.k || bkey_err(k)) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return k;
|
||||
}
|
||||
|
||||
if (bkey_lt(reflink_pos, bkey_start_pos(k.k))) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
static int bch2_move_data_btree(struct moving_context *ctxt,
|
||||
struct bpos start,
|
||||
struct bpos end,
|
||||
@ -592,17 +637,16 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
|
||||
k.k->type == KEY_TYPE_reflink_p &&
|
||||
REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) {
|
||||
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
|
||||
s64 offset_into_extent = 0;
|
||||
|
||||
bch2_trans_iter_exit(trans, &reflink_iter);
|
||||
k = bch2_lookup_indirect_extent(trans, &reflink_iter, &offset_into_extent, p, true, 0);
|
||||
k = bch2_lookup_indirect_extent_for_move(trans, &reflink_iter, p);
|
||||
ret = bkey_err(k);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (bkey_deleted(k.k))
|
||||
if (!k.k)
|
||||
goto next_nondata;
|
||||
|
||||
/*
|
||||
@ -611,7 +655,6 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
|
||||
* pointer - need to fixup iter->k
|
||||
*/
|
||||
extent_iter = &reflink_iter;
|
||||
offset_into_extent = 0;
|
||||
}
|
||||
|
||||
if (!bkey_extent_is_direct_data(k.k))
|
||||
|
@ -32,7 +32,7 @@ struct bch_move_stats {
|
||||
|
||||
struct move_bucket_key {
|
||||
struct bpos bucket;
|
||||
u8 gen;
|
||||
unsigned gen;
|
||||
};
|
||||
|
||||
struct move_bucket {
|
||||
|
@ -4,8 +4,8 @@
|
||||
#include "acl.h"
|
||||
#include "btree_update.h"
|
||||
#include "dirent.h"
|
||||
#include "fs-common.h"
|
||||
#include "inode.h"
|
||||
#include "namei.h"
|
||||
#include "subvolume.h"
|
||||
#include "xattr.h"
|
||||
|
||||
@ -564,6 +564,8 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* inum_to_path */
|
||||
|
||||
static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsigned n)
|
||||
{
|
||||
bch2_printbuf_make_room(out, n);
|
||||
@ -654,3 +656,179 @@ disconnected:
|
||||
prt_str_reversed(path, "(disconnected)");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* fsck */
|
||||
|
||||
static int bch2_check_dirent_inode_dirent(struct btree_trans *trans,
|
||||
struct bkey_s_c_dirent d,
|
||||
struct bch_inode_unpacked *target,
|
||||
bool in_fsck)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct btree_iter bp_iter = { NULL };
|
||||
int ret = 0;
|
||||
|
||||
if (inode_points_to_dirent(target, d))
|
||||
return 0;
|
||||
|
||||
if (!target->bi_dir &&
|
||||
!target->bi_dir_offset) {
|
||||
fsck_err_on(S_ISDIR(target->bi_mode),
|
||||
trans, inode_dir_missing_backpointer,
|
||||
"directory with missing backpointer\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, d.s_c),
|
||||
prt_printf(&buf, "\n"),
|
||||
bch2_inode_unpacked_to_text(&buf, target),
|
||||
buf.buf));
|
||||
|
||||
fsck_err_on(target->bi_flags & BCH_INODE_unlinked,
|
||||
trans, inode_unlinked_but_has_dirent,
|
||||
"inode unlinked but has dirent\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, d.s_c),
|
||||
prt_printf(&buf, "\n"),
|
||||
bch2_inode_unpacked_to_text(&buf, target),
|
||||
buf.buf));
|
||||
|
||||
target->bi_flags &= ~BCH_INODE_unlinked;
|
||||
target->bi_dir = d.k->p.inode;
|
||||
target->bi_dir_offset = d.k->p.offset;
|
||||
return __bch2_fsck_write_inode(trans, target);
|
||||
}
|
||||
|
||||
if (bch2_inode_should_have_single_bp(target) &&
|
||||
!fsck_err(trans, inode_wrong_backpointer,
|
||||
"dirent points to inode that does not point back:\n %s",
|
||||
(bch2_bkey_val_to_text(&buf, c, d.s_c),
|
||||
prt_printf(&buf, "\n "),
|
||||
bch2_inode_unpacked_to_text(&buf, target),
|
||||
buf.buf)))
|
||||
goto err;
|
||||
|
||||
struct bkey_s_c_dirent bp_dirent =
|
||||
bch2_bkey_get_iter_typed(trans, &bp_iter, BTREE_ID_dirents,
|
||||
SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot),
|
||||
0, dirent);
|
||||
ret = bkey_err(bp_dirent);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
goto err;
|
||||
|
||||
bool backpointer_exists = !ret;
|
||||
ret = 0;
|
||||
|
||||
if (!backpointer_exists) {
|
||||
if (fsck_err(trans, inode_wrong_backpointer,
|
||||
"inode %llu:%u has wrong backpointer:\n"
|
||||
"got %llu:%llu\n"
|
||||
"should be %llu:%llu",
|
||||
target->bi_inum, target->bi_snapshot,
|
||||
target->bi_dir,
|
||||
target->bi_dir_offset,
|
||||
d.k->p.inode,
|
||||
d.k->p.offset)) {
|
||||
target->bi_dir = d.k->p.inode;
|
||||
target->bi_dir_offset = d.k->p.offset;
|
||||
ret = __bch2_fsck_write_inode(trans, target);
|
||||
}
|
||||
} else {
|
||||
bch2_bkey_val_to_text(&buf, c, d.s_c);
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
|
||||
|
||||
if (S_ISDIR(target->bi_mode) || target->bi_subvol) {
|
||||
/*
|
||||
* XXX: verify connectivity of the other dirent
|
||||
* up to the root before removing this one
|
||||
*
|
||||
* Additionally, bch2_lookup would need to cope with the
|
||||
* dirent it found being removed - or should we remove
|
||||
* the other one, even though the inode points to it?
|
||||
*/
|
||||
if (in_fsck) {
|
||||
if (fsck_err(trans, inode_dir_multiple_links,
|
||||
"%s %llu:%u with multiple links\n%s",
|
||||
S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
|
||||
target->bi_inum, target->bi_snapshot, buf.buf))
|
||||
ret = bch2_fsck_remove_dirent(trans, d.k->p);
|
||||
} else {
|
||||
bch2_fs_inconsistent(c,
|
||||
"%s %llu:%u with multiple links\n%s",
|
||||
S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
|
||||
target->bi_inum, target->bi_snapshot, buf.buf);
|
||||
}
|
||||
|
||||
goto out;
|
||||
} else {
|
||||
/*
|
||||
* hardlinked file with nlink 0:
|
||||
* We're just adjusting nlink here so check_nlinks() will pick
|
||||
* it up, it ignores inodes with nlink 0
|
||||
*/
|
||||
if (fsck_err_on(!target->bi_nlink,
|
||||
trans, inode_multiple_links_but_nlink_0,
|
||||
"inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
|
||||
target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
|
||||
target->bi_nlink++;
|
||||
target->bi_flags &= ~BCH_INODE_unlinked;
|
||||
ret = __bch2_fsck_write_inode(trans, target);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
}
|
||||
out:
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
printbuf_exit(&buf);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __bch2_check_dirent_target(struct btree_trans *trans,
|
||||
struct btree_iter *dirent_iter,
|
||||
struct bkey_s_c_dirent d,
|
||||
struct bch_inode_unpacked *target,
|
||||
bool in_fsck)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
ret = bch2_check_dirent_inode_dirent(trans, d, target, in_fsck);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (fsck_err_on(d.v->d_type != inode_d_type(target),
|
||||
trans, dirent_d_type_wrong,
|
||||
"incorrect d_type: got %s, should be %s:\n%s",
|
||||
bch2_d_type_str(d.v->d_type),
|
||||
bch2_d_type_str(inode_d_type(target)),
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
|
||||
struct bkey_i_dirent *n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bkey_reassemble(&n->k_i, d.s_c);
|
||||
n->v.d_type = inode_d_type(target);
|
||||
if (n->v.d_type == DT_SUBVOL) {
|
||||
n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
|
||||
n->v.d_child_subvol = cpu_to_le32(target->bi_subvol);
|
||||
} else {
|
||||
n->v.d_inum = cpu_to_le64(target->bi_inum);
|
||||
}
|
||||
|
||||
ret = bch2_trans_update(trans, dirent_iter, &n->k_i, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_FS_COMMON_H
|
||||
#define _BCACHEFS_FS_COMMON_H
|
||||
#ifndef _BCACHEFS_NAMEI_H
|
||||
#define _BCACHEFS_NAMEI_H
|
||||
|
||||
#include "dirent.h"
|
||||
|
||||
@ -44,4 +44,29 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *,
|
||||
|
||||
int bch2_inum_to_path(struct btree_trans *, subvol_inum, struct printbuf *);
|
||||
|
||||
#endif /* _BCACHEFS_FS_COMMON_H */
|
||||
int __bch2_check_dirent_target(struct btree_trans *,
|
||||
struct btree_iter *,
|
||||
struct bkey_s_c_dirent,
|
||||
struct bch_inode_unpacked *, bool);
|
||||
|
||||
static inline bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
|
||||
struct bkey_s_c_dirent d)
|
||||
{
|
||||
return inode->bi_dir == d.k->p.inode &&
|
||||
inode->bi_dir_offset == d.k->p.offset;
|
||||
}
|
||||
|
||||
static inline int bch2_check_dirent_target(struct btree_trans *trans,
|
||||
struct btree_iter *dirent_iter,
|
||||
struct bkey_s_c_dirent d,
|
||||
struct bch_inode_unpacked *target,
|
||||
bool in_fsck)
|
||||
{
|
||||
if (likely(inode_points_to_dirent(target, d) &&
|
||||
d.v->d_type == inode_d_type(target)))
|
||||
return 0;
|
||||
|
||||
return __bch2_check_dirent_target(trans, dirent_iter, d, target, in_fsck);
|
||||
}
|
||||
|
||||
#endif /* _BCACHEFS_NAMEI_H */
|
@ -44,7 +44,7 @@ const char * const __bch2_btree_ids[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static const char * const __bch2_csum_types[] = {
|
||||
const char * const __bch2_csum_types[] = {
|
||||
BCH_CSUM_TYPES()
|
||||
NULL
|
||||
};
|
||||
@ -219,10 +219,10 @@ typedef void (*sb_opt_set_fn)(struct bch_sb *, u64);
|
||||
typedef u64 (*member_opt_get_fn)(const struct bch_member *);
|
||||
typedef void (*member_opt_set_fn)(struct bch_member *, u64);
|
||||
|
||||
static const sb_opt_get_fn BCH2_NO_SB_OPT = NULL;
|
||||
static const sb_opt_set_fn SET_BCH2_NO_SB_OPT = NULL;
|
||||
static const member_opt_get_fn BCH2_NO_MEMBER_OPT = NULL;
|
||||
static const member_opt_set_fn SET_BCH2_NO_MEMBER_OPT = NULL;
|
||||
__maybe_unused static const sb_opt_get_fn BCH2_NO_SB_OPT = NULL;
|
||||
__maybe_unused static const sb_opt_set_fn SET_BCH2_NO_SB_OPT = NULL;
|
||||
__maybe_unused static const member_opt_get_fn BCH2_NO_MEMBER_OPT = NULL;
|
||||
__maybe_unused static const member_opt_set_fn SET_BCH2_NO_MEMBER_OPT = NULL;
|
||||
|
||||
#define type_compatible_or_null(_p, _type) \
|
||||
__builtin_choose_expr( \
|
||||
@ -551,14 +551,15 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts,
|
||||
goto bad_opt;
|
||||
|
||||
ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err);
|
||||
if (ret == -BCH_ERR_option_needs_open_fs && parse_later) {
|
||||
prt_printf(parse_later, "%s=%s,", name, val);
|
||||
if (parse_later->allocation_failure) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
if (ret == -BCH_ERR_option_needs_open_fs) {
|
||||
ret = 0;
|
||||
|
||||
if (parse_later) {
|
||||
prt_printf(parse_later, "%s=%s,", name, val);
|
||||
if (parse_later->allocation_failure)
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -569,28 +570,24 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts,
|
||||
bch2_opt_set_by_id(opts, id, v);
|
||||
|
||||
ret = 0;
|
||||
goto out;
|
||||
|
||||
bad_opt:
|
||||
pr_err("Bad mount option %s", name);
|
||||
ret = -BCH_ERR_option_name;
|
||||
goto out;
|
||||
|
||||
bad_val:
|
||||
pr_err("Invalid mount option %s", err.buf);
|
||||
ret = -BCH_ERR_option_value;
|
||||
|
||||
out:
|
||||
printbuf_exit(&err);
|
||||
return ret;
|
||||
bad_opt:
|
||||
ret = -BCH_ERR_option_name;
|
||||
goto out;
|
||||
bad_val:
|
||||
ret = -BCH_ERR_option_value;
|
||||
goto out;
|
||||
}
|
||||
|
||||
int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
|
||||
struct printbuf *parse_later, char *options)
|
||||
struct printbuf *parse_later, char *options,
|
||||
bool ignore_unknown)
|
||||
{
|
||||
char *copied_opts, *copied_opts_start;
|
||||
char *opt, *name, *val;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
if (!options)
|
||||
return 0;
|
||||
@ -615,14 +612,14 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
|
||||
val = opt;
|
||||
|
||||
ret = bch2_parse_one_mount_opt(c, opts, parse_later, name, val);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret == -BCH_ERR_option_name && ignore_unknown)
|
||||
ret = 0;
|
||||
if (ret) {
|
||||
pr_err("Error parsing option %s: %s", name, bch2_err_str(ret));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
goto out;
|
||||
|
||||
out:
|
||||
kfree(copied_opts_start);
|
||||
return ret;
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ extern const char * const bch2_version_upgrade_opts[];
|
||||
extern const char * const bch2_sb_features[];
|
||||
extern const char * const bch2_sb_compat[];
|
||||
extern const char * const __bch2_btree_ids[];
|
||||
extern const char * const __bch2_csum_types[];
|
||||
extern const char * const __bch2_csum_opts[];
|
||||
extern const char * const __bch2_compression_types[];
|
||||
extern const char * const bch2_compression_opts[];
|
||||
@ -499,11 +500,6 @@ enum fsck_err_opts {
|
||||
OPT_STR(bch2_member_states), \
|
||||
BCH_MEMBER_STATE, BCH_MEMBER_STATE_rw, \
|
||||
"state", "rw,ro,failed,spare") \
|
||||
x(fs_size, u64, \
|
||||
OPT_DEVICE|OPT_HIDDEN, \
|
||||
OPT_UINT(0, S64_MAX), \
|
||||
BCH2_NO_MEMBER_OPT, 0, \
|
||||
"size", "Size of filesystem on device") \
|
||||
x(bucket_size, u32, \
|
||||
OPT_DEVICE|OPT_HUMAN_READABLE|OPT_SB_FIELD_SECTORS, \
|
||||
OPT_UINT(0, S64_MAX), \
|
||||
@ -640,7 +636,7 @@ int bch2_opts_check_may_set(struct bch_fs *);
|
||||
int bch2_parse_one_mount_opt(struct bch_fs *, struct bch_opts *,
|
||||
struct printbuf *, const char *, const char *);
|
||||
int bch2_parse_mount_opts(struct bch_fs *, struct bch_opts *, struct printbuf *,
|
||||
char *);
|
||||
char *, bool);
|
||||
|
||||
/* inode opts: */
|
||||
|
||||
|
@ -95,6 +95,9 @@ static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c,
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
|
||||
if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
|
||||
return 0;
|
||||
|
||||
return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) |
|
||||
bch2_bkey_ptrs_need_move(c, opts, ptrs);
|
||||
}
|
||||
@ -107,6 +110,9 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
|
||||
if (!opts)
|
||||
return 0;
|
||||
|
||||
if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
|
||||
return 0;
|
||||
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
u64 sectors = 0;
|
||||
|
@ -13,12 +13,12 @@
|
||||
#include "disk_accounting.h"
|
||||
#include "errcode.h"
|
||||
#include "error.h"
|
||||
#include "fs-common.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "logged_ops.h"
|
||||
#include "move.h"
|
||||
#include "namei.h"
|
||||
#include "quota.h"
|
||||
#include "rebalance.h"
|
||||
#include "recovery.h"
|
||||
|
@ -16,6 +16,7 @@ enum counters_flags {
|
||||
x(io_read_split, 33, TYPE_COUNTER) \
|
||||
x(io_read_reuse_race, 34, TYPE_COUNTER) \
|
||||
x(io_read_retry, 32, TYPE_COUNTER) \
|
||||
x(io_read_fail_and_poison, 82, TYPE_COUNTER) \
|
||||
x(io_write, 1, TYPE_SECTORS) \
|
||||
x(io_move, 2, TYPE_SECTORS) \
|
||||
x(io_move_read, 35, TYPE_SECTORS) \
|
||||
|
@ -311,13 +311,14 @@ enum bch_fsck_flags {
|
||||
x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \
|
||||
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
|
||||
x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
|
||||
x(accounting_key_nr_counters_wrong, 307, FSCK_AUTOFIX) \
|
||||
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
|
||||
x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \
|
||||
x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \
|
||||
x(directory_size_mismatch, 303, FSCK_AUTOFIX) \
|
||||
x(dirent_cf_name_too_big, 304, 0) \
|
||||
x(dirent_stray_data_after_cf_name, 305, 0) \
|
||||
x(MAX, 307, 0)
|
||||
x(MAX, 308, 0)
|
||||
|
||||
enum bch_sb_error_id {
|
||||
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
|
||||
|
@ -1990,15 +1990,12 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (ca->mi.freespace_initialized) {
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_dev_data_type,
|
||||
.dev_data_type.dev = ca->dev_idx,
|
||||
.dev_data_type.data_type = BCH_DATA_free,
|
||||
};
|
||||
u64 v[3] = { nbuckets - old_nbuckets, 0, 0 };
|
||||
|
||||
ret = bch2_trans_commit_do(ca->fs, NULL, NULL, 0,
|
||||
bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), false)) ?:
|
||||
bch2_disk_accounting_mod2(trans, false, v, dev_data_type,
|
||||
.dev = ca->dev_idx,
|
||||
.data_type = BCH_DATA_free)) ?:
|
||||
bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
@ -148,6 +148,7 @@ write_attribute(trigger_btree_key_cache_shrink);
|
||||
write_attribute(trigger_freelist_wakeup);
|
||||
write_attribute(trigger_btree_updates);
|
||||
read_attribute(gc_gens_pos);
|
||||
__sysfs_attribute(read_fua_test, 0400);
|
||||
|
||||
read_attribute(uuid);
|
||||
read_attribute(minor);
|
||||
@ -310,6 +311,116 @@ static void bch2_fs_usage_base_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
prt_printf(out, "nr_inodes:\t%llu\n", b.nr_inodes);
|
||||
}
|
||||
|
||||
static int bch2_read_fua_test(struct printbuf *out, struct bch_dev *ca)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct bio *bio = NULL;
|
||||
void *buf = NULL;
|
||||
unsigned bs = c->opts.block_size, iters;
|
||||
u64 end, test_duration = NSEC_PER_SEC * 2;
|
||||
struct bch2_time_stats stats_nofua, stats_fua, stats_random;
|
||||
int ret = 0;
|
||||
|
||||
bch2_time_stats_init_no_pcpu(&stats_nofua);
|
||||
bch2_time_stats_init_no_pcpu(&stats_fua);
|
||||
bch2_time_stats_init_no_pcpu(&stats_random);
|
||||
|
||||
if (!bch2_dev_get_ioref(c, ca->dev_idx, READ)) {
|
||||
prt_str(out, "offline\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct block_device *bdev = ca->disk_sb.bdev;
|
||||
|
||||
bio = bio_kmalloc(1, GFP_KERNEL);
|
||||
if (!bio) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
buf = kmalloc(bs, GFP_KERNEL);
|
||||
if (!buf)
|
||||
goto err;
|
||||
|
||||
end = ktime_get_ns() + test_duration;
|
||||
for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) {
|
||||
bio_init(bio, bdev, bio->bi_inline_vecs, 1, READ);
|
||||
bch2_bio_map(bio, buf, bs);
|
||||
|
||||
u64 submit_time = ktime_get_ns();
|
||||
ret = submit_bio_wait(bio);
|
||||
bch2_time_stats_update(&stats_nofua, submit_time);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
end = ktime_get_ns() + test_duration;
|
||||
for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) {
|
||||
bio_init(bio, bdev, bio->bi_inline_vecs, 1, REQ_FUA|READ);
|
||||
bch2_bio_map(bio, buf, bs);
|
||||
|
||||
u64 submit_time = ktime_get_ns();
|
||||
ret = submit_bio_wait(bio);
|
||||
bch2_time_stats_update(&stats_fua, submit_time);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
u64 dev_size = ca->mi.nbuckets * bucket_bytes(ca);
|
||||
|
||||
end = ktime_get_ns() + test_duration;
|
||||
for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) {
|
||||
bio_init(bio, bdev, bio->bi_inline_vecs, 1, READ);
|
||||
bio->bi_iter.bi_sector = (bch2_get_random_u64_below(dev_size) & ~((u64) bs - 1)) >> 9;
|
||||
bch2_bio_map(bio, buf, bs);
|
||||
|
||||
u64 submit_time = ktime_get_ns();
|
||||
ret = submit_bio_wait(bio);
|
||||
bch2_time_stats_update(&stats_random, submit_time);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
u64 ns_nofua = mean_and_variance_get_mean(stats_nofua.duration_stats);
|
||||
u64 ns_fua = mean_and_variance_get_mean(stats_fua.duration_stats);
|
||||
u64 ns_rand = mean_and_variance_get_mean(stats_random.duration_stats);
|
||||
|
||||
u64 stddev_nofua = mean_and_variance_get_stddev(stats_nofua.duration_stats);
|
||||
u64 stddev_fua = mean_and_variance_get_stddev(stats_fua.duration_stats);
|
||||
u64 stddev_rand = mean_and_variance_get_stddev(stats_random.duration_stats);
|
||||
|
||||
printbuf_tabstop_push(out, 8);
|
||||
printbuf_tabstop_push(out, 12);
|
||||
printbuf_tabstop_push(out, 12);
|
||||
prt_printf(out, "This test must be run on an idle drive for accurate results\n");
|
||||
prt_printf(out, "%s\n", dev_name(&ca->disk_sb.bdev->bd_device));
|
||||
prt_printf(out, "fua support advertized: %s\n", bdev_fua(bdev) ? "yes" : "no");
|
||||
prt_newline(out);
|
||||
prt_printf(out, "ns:\tlatency\rstddev\r\n");
|
||||
prt_printf(out, "nofua\t%llu\r%llu\r\n", ns_nofua, stddev_nofua);
|
||||
prt_printf(out, "fua\t%llu\r%llu\r\n", ns_fua, stddev_fua);
|
||||
prt_printf(out, "random\t%llu\r%llu\r\n", ns_rand, stddev_rand);
|
||||
|
||||
bool read_cache = ns_nofua * 2 < ns_rand;
|
||||
bool fua_cached = read_cache && ns_fua < (ns_nofua + ns_rand) / 2;
|
||||
|
||||
if (!read_cache)
|
||||
prt_str(out, "reads don't appear to be cached - safe\n");
|
||||
else if (!fua_cached)
|
||||
prt_str(out, "fua reads don't appear to be cached - safe\n");
|
||||
else
|
||||
prt_str(out, "fua reads appear to be cached - unsafe\n");
|
||||
err:
|
||||
kfree(buf);
|
||||
kfree(bio);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
SHOW(bch2_fs)
|
||||
{
|
||||
struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
|
||||
@ -823,6 +934,9 @@ SHOW(bch2_dev)
|
||||
if (attr == &sysfs_open_buckets)
|
||||
bch2_open_buckets_to_text(out, c, ca);
|
||||
|
||||
if (attr == &sysfs_read_fua_test)
|
||||
return bch2_read_fua_test(out, ca);
|
||||
|
||||
int opt_id = bch2_opt_lookup(attr->name);
|
||||
if (opt_id >= 0)
|
||||
return sysfs_opt_show(c, ca, opt_id, out);
|
||||
@ -879,6 +993,8 @@ struct attribute *bch2_dev_files[] = {
|
||||
&sysfs_io_latency_stats_write,
|
||||
&sysfs_congested,
|
||||
|
||||
&sysfs_read_fua_test,
|
||||
|
||||
/* debug: */
|
||||
&sysfs_alloc_debug,
|
||||
&sysfs_open_buckets,
|
||||
|
@ -10,6 +10,9 @@
|
||||
#include "eytzinger.h"
|
||||
#include "time_stats.h"
|
||||
|
||||
/* disable automatic switching to percpu mode */
|
||||
#define TIME_STATS_NONPCPU ((struct time_stat_buffer *) 1)
|
||||
|
||||
static const struct time_unit time_units[] = {
|
||||
{ "ns", 1 },
|
||||
{ "us", NSEC_PER_USEC },
|
||||
@ -123,11 +126,12 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!stats->buffer) {
|
||||
if ((unsigned long) stats->buffer <= 1) {
|
||||
spin_lock_irqsave(&stats->lock, flags);
|
||||
time_stats_update_one(stats, start, end);
|
||||
|
||||
if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT) < 32 &&
|
||||
if (!stats->buffer &&
|
||||
mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT) < 32 &&
|
||||
stats->duration_stats.n > 1024)
|
||||
stats->buffer =
|
||||
alloc_percpu_gfp(struct time_stat_buffer,
|
||||
@ -157,7 +161,8 @@ void bch2_time_stats_reset(struct bch2_time_stats *stats)
|
||||
unsigned offset = offsetof(struct bch2_time_stats, min_duration);
|
||||
memset((void *) stats + offset, 0, sizeof(*stats) - offset);
|
||||
|
||||
if (stats->buffer) {
|
||||
if (stats->buffer &&
|
||||
stats->buffer != TIME_STATS_NONPCPU) {
|
||||
int cpu;
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu_ptr(stats->buffer, cpu)->nr = 0;
|
||||
@ -167,7 +172,10 @@ void bch2_time_stats_reset(struct bch2_time_stats *stats)
|
||||
|
||||
void bch2_time_stats_exit(struct bch2_time_stats *stats)
|
||||
{
|
||||
free_percpu(stats->buffer);
|
||||
if (stats->buffer != TIME_STATS_NONPCPU) {
|
||||
free_percpu(stats->buffer);
|
||||
stats->buffer = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_time_stats_init(struct bch2_time_stats *stats)
|
||||
@ -177,3 +185,9 @@ void bch2_time_stats_init(struct bch2_time_stats *stats)
|
||||
stats->min_freq = U64_MAX;
|
||||
spin_lock_init(&stats->lock);
|
||||
}
|
||||
|
||||
void bch2_time_stats_init_no_pcpu(struct bch2_time_stats *stats)
|
||||
{
|
||||
bch2_time_stats_init(stats);
|
||||
stats->buffer = TIME_STATS_NONPCPU;
|
||||
}
|
||||
|
@ -145,6 +145,7 @@ static inline bool track_event_change(struct bch2_time_stats *stats, bool v)
|
||||
void bch2_time_stats_reset(struct bch2_time_stats *);
|
||||
void bch2_time_stats_exit(struct bch2_time_stats *);
|
||||
void bch2_time_stats_init(struct bch2_time_stats *);
|
||||
void bch2_time_stats_init_no_pcpu(struct bch2_time_stats *);
|
||||
|
||||
static inline void bch2_time_stats_quantiles_exit(struct bch2_time_stats_quantiles *statq)
|
||||
{
|
||||
|
@ -339,6 +339,11 @@ DEFINE_EVENT(bio, io_read_reuse_race,
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bio, io_read_fail_and_poison,
|
||||
TP_PROTO(struct bio *bio),
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
|
||||
/* ec.c */
|
||||
|
||||
TRACE_EVENT(stripe_create,
|
||||
|
@ -431,7 +431,7 @@ static inline void memcpy_u64s_small(void *dst, const void *src,
|
||||
static inline void __memcpy_u64s(void *dst, const void *src,
|
||||
unsigned u64s)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
#if defined(CONFIG_X86_64) && !defined(CONFIG_KMSAN)
|
||||
long d0, d1, d2;
|
||||
|
||||
asm volatile("rep ; movsq"
|
||||
@ -508,7 +508,7 @@ static inline void __memmove_u64s_up(void *_dst, const void *_src,
|
||||
u64 *dst = (u64 *) _dst + u64s - 1;
|
||||
u64 *src = (u64 *) _src + u64s - 1;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#if defined(CONFIG_X86_64) && !defined(CONFIG_KMSAN)
|
||||
long d0, d1, d2;
|
||||
|
||||
asm volatile("std ;\n"
|
||||
|
Loading…
Reference in New Issue
Block a user