mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-04-03 00:00:03 +03:00
Update bcachefs sources to ea93c26e98 fixup! bcachefs: We can handle missing btree roots for all alloc btrees
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
abe1c3bc8e
commit
a104f0407b
.bcachefs_revisionMakefileMakefile.compiler
include
linux
trace/events
libbcachefs
alloc_background.calloc_background.halloc_foreground.calloc_foreground.halloc_types.hbackpointers.cbackpointers.hbcachefs.hbcachefs_format.hbkey_methods.cbkey_methods.hbset.cbset.hbtree_cache.cbtree_io.cbtree_io.hbtree_iter.cbtree_iter.hbtree_key_cache.cbtree_key_cache.hbtree_locking.cbtree_locking.hbtree_types.hbtree_update.hbtree_update_interior.cbtree_update_leaf.cbuckets.cbuckets_waiting_for_journal.cbuckets_waiting_for_journal_types.hdata_update.cdata_update.hdebug.cdirent.cdirent.hec.cec.herrcode.herror.cerror.hextents.cextents.hfs-io.cfs.cfs.hfsck.cinode.cinode.hio.cio.hjournal.cjournal.hjournal_io.cjournal_reclaim.cjournal_types.hlru.clru.hmove.cmove.hnocow_locking.cnocow_locking.hopts.copts.hprintbuf.cprintbuf.hquota.cquota.hrecovery.creflink.creflink.hreplicas.creplicas.hreplicas_types.hsubvolume.csubvolume.hsuper-io.csuper.csuper.hsysfs.ctests.cutil.cutil.hxattr.cxattr.h
linux
@ -1 +1 @@
|
||||
0939e1c73231c779c961e1143e1ba489ef2b168c
|
||||
ea93c26e98081d8e1a5fc138e6334b3631983d77
|
||||
|
8
Makefile
8
Makefile
@ -221,14 +221,6 @@ update-bcachefs-sources:
|
||||
git add linux/generic-radix-tree.c
|
||||
cp $(LINUX_DIR)/include/linux/kmemleak.h include/linux/
|
||||
git add include/linux/kmemleak.h
|
||||
cp $(LINUX_DIR)/include/linux/printbuf.h include/linux/
|
||||
git add include/linux/printbuf.h
|
||||
cp $(LINUX_DIR)/lib/printbuf.c linux/
|
||||
git add linux/printbuf.c
|
||||
cp $(LINUX_DIR)/lib/math/mean_and_variance.c linux/
|
||||
git add linux/mean_and_variance.c
|
||||
cp $(LINUX_DIR)/include/linux/mean_and_variance.h include/linux/
|
||||
git add include/linux/mean_and_variance.h
|
||||
cp $(LINUX_DIR)/lib/math/int_sqrt.c linux/
|
||||
git add linux/int_sqrt.c
|
||||
cp $(LINUX_DIR)/scripts/Makefile.compiler ./
|
||||
|
@ -61,9 +61,13 @@ cc-option-yn = $(call try-run,\
|
||||
cc-disable-warning = $(call try-run,\
|
||||
$(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
|
||||
|
||||
# cc-ifversion
|
||||
# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
|
||||
cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || echo $(4))
|
||||
# gcc-min-version
|
||||
# Usage: cflags-$(call gcc-min-version, 70100) += -foo
|
||||
gcc-min-version = $(shell [ $(CONFIG_GCC_VERSION)0 -ge $(1)0 ] && echo y)
|
||||
|
||||
# clang-min-version
|
||||
# Usage: cflags-$(call clang-min-version, 110000) += -foo
|
||||
clang-min-version = $(shell [ $(CONFIG_CLANG_VERSION)0 -ge $(1)0 ] && echo y)
|
||||
|
||||
# ld-option
|
||||
# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
|
||||
|
@ -264,4 +264,7 @@ struct qstr {
|
||||
|
||||
static inline void dump_stack(void) {}
|
||||
|
||||
#define unsafe_memcpy(dst, src, bytes, justification) \
|
||||
memcpy(dst, src, bytes)
|
||||
|
||||
#endif
|
||||
|
@ -2,13 +2,35 @@
|
||||
#ifndef MEAN_AND_VARIANCE_H_
|
||||
#define MEAN_AND_VARIANCE_H_
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/printbuf.h>
|
||||
|
||||
#define SQRT_U64_MAX 4294967295ULL
|
||||
|
||||
/**
|
||||
* abs - return absolute value of an argument
|
||||
* @x: the value. If it is unsigned type, it is converted to signed type first.
|
||||
* char is treated as if it was signed (regardless of whether it really is)
|
||||
* but the macro's return type is preserved as char.
|
||||
*
|
||||
* Return: an absolute value of x.
|
||||
*/
|
||||
#define abs(x) __abs_choose_expr(x, long long, \
|
||||
__abs_choose_expr(x, long, \
|
||||
__abs_choose_expr(x, int, \
|
||||
__abs_choose_expr(x, short, \
|
||||
__abs_choose_expr(x, char, \
|
||||
__builtin_choose_expr( \
|
||||
__builtin_types_compatible_p(typeof(x), char), \
|
||||
(char)({ signed char __x = (x); __x<0?-__x:__x; }), \
|
||||
((void)0)))))))
|
||||
|
||||
#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \
|
||||
__builtin_types_compatible_p(typeof(x), signed type) || \
|
||||
__builtin_types_compatible_p(typeof(x), unsigned type), \
|
||||
({ signed type __x = (x); __x < 0 ? -__x : __x; }), other)
|
||||
|
||||
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
|
||||
|
||||
|
@ -81,4 +81,7 @@
|
||||
/********** net/core/page_pool.c **********/
|
||||
#define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA)
|
||||
|
||||
/********** kernel/bpf/ **********/
|
||||
#define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA))
|
||||
|
||||
#endif
|
||||
|
@ -23,5 +23,11 @@ prandom_type(u32);
|
||||
prandom_type(u64);
|
||||
#undef prandom_type
|
||||
|
||||
static inline u32 prandom_u32_max(u32 max)
|
||||
{
|
||||
return prandom_u32() % max;
|
||||
|
||||
}
|
||||
|
||||
#endif /* _LINUX_PRANDOM_H */
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
#define TASK_NEW 2048
|
||||
#define TASK_IDLE_WORKER 4096
|
||||
#define TASK_STATE_MAX 8192
|
||||
#define TASK_FREEZABLE (1U << 14)
|
||||
|
||||
/* Convenience macros for the sake of set_task_state */
|
||||
#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
|
||||
|
153
include/linux/seq_buf.h
Normal file
153
include/linux/seq_buf.h
Normal file
@ -0,0 +1,153 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_SEQ_BUF_H
|
||||
#define _LINUX_SEQ_BUF_H
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Trace sequences are used to allow a function to call several other functions
|
||||
* to create a string of data to use.
|
||||
*/
|
||||
|
||||
/**
|
||||
* seq_buf - seq buffer structure
|
||||
* @buffer: pointer to the buffer
|
||||
* @size: size of the buffer
|
||||
* @len: the amount of data inside the buffer
|
||||
* @readpos: The next position to read in the buffer.
|
||||
*/
|
||||
struct seq_buf {
|
||||
char *buffer;
|
||||
size_t size;
|
||||
size_t len;
|
||||
loff_t readpos;
|
||||
};
|
||||
|
||||
static inline void seq_buf_clear(struct seq_buf *s)
|
||||
{
|
||||
s->len = 0;
|
||||
s->readpos = 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
seq_buf_init(struct seq_buf *s, char *buf, unsigned int size)
|
||||
{
|
||||
s->buffer = buf;
|
||||
s->size = size;
|
||||
seq_buf_clear(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* seq_buf have a buffer that might overflow. When this happens
|
||||
* the len and size are set to be equal.
|
||||
*/
|
||||
static inline bool
|
||||
seq_buf_has_overflowed(struct seq_buf *s)
|
||||
{
|
||||
return s->len > s->size;
|
||||
}
|
||||
|
||||
static inline void
|
||||
seq_buf_set_overflow(struct seq_buf *s)
|
||||
{
|
||||
s->len = s->size + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* How much buffer is left on the seq_buf?
|
||||
*/
|
||||
static inline unsigned int
|
||||
seq_buf_buffer_left(struct seq_buf *s)
|
||||
{
|
||||
if (seq_buf_has_overflowed(s))
|
||||
return 0;
|
||||
|
||||
return s->size - s->len;
|
||||
}
|
||||
|
||||
/* How much buffer was written? */
|
||||
static inline unsigned int seq_buf_used(struct seq_buf *s)
|
||||
{
|
||||
return min(s->len, s->size);
|
||||
}
|
||||
|
||||
/**
|
||||
* seq_buf_terminate - Make sure buffer is nul terminated
|
||||
* @s: the seq_buf descriptor to terminate.
|
||||
*
|
||||
* This makes sure that the buffer in @s is nul terminated and
|
||||
* safe to read as a string.
|
||||
*
|
||||
* Note, if this is called when the buffer has overflowed, then
|
||||
* the last byte of the buffer is zeroed, and the len will still
|
||||
* point passed it.
|
||||
*
|
||||
* After this function is called, s->buffer is safe to use
|
||||
* in string operations.
|
||||
*/
|
||||
static inline void seq_buf_terminate(struct seq_buf *s)
|
||||
{
|
||||
if (WARN_ON(s->size == 0))
|
||||
return;
|
||||
|
||||
if (seq_buf_buffer_left(s))
|
||||
s->buffer[s->len] = 0;
|
||||
else
|
||||
s->buffer[s->size - 1] = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* seq_buf_get_buf - get buffer to write arbitrary data to
|
||||
* @s: the seq_buf handle
|
||||
* @bufp: the beginning of the buffer is stored here
|
||||
*
|
||||
* Return the number of bytes available in the buffer, or zero if
|
||||
* there's no space.
|
||||
*/
|
||||
static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp)
|
||||
{
|
||||
WARN_ON(s->len > s->size + 1);
|
||||
|
||||
if (s->len < s->size) {
|
||||
*bufp = s->buffer + s->len;
|
||||
return s->size - s->len;
|
||||
}
|
||||
|
||||
*bufp = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* seq_buf_commit - commit data to the buffer
|
||||
* @s: the seq_buf handle
|
||||
* @num: the number of bytes to commit
|
||||
*
|
||||
* Commit @num bytes of data written to a buffer previously acquired
|
||||
* by seq_buf_get. To signal an error condition, or that the data
|
||||
* didn't fit in the available space, pass a negative @num value.
|
||||
*/
|
||||
static inline void seq_buf_commit(struct seq_buf *s, int num)
|
||||
{
|
||||
if (num < 0) {
|
||||
seq_buf_set_overflow(s);
|
||||
} else {
|
||||
/* num must be negative on overflow */
|
||||
BUG_ON(s->len + num > s->size);
|
||||
s->len += num;
|
||||
}
|
||||
}
|
||||
|
||||
extern __printf(2, 3)
|
||||
int seq_buf_printf(struct seq_buf *s, const char *fmt, ...);
|
||||
extern __printf(2, 0)
|
||||
int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args);
|
||||
extern int seq_buf_to_user(struct seq_buf *s, char __user *ubuf,
|
||||
int cnt);
|
||||
extern int seq_buf_puts(struct seq_buf *s, const char *str);
|
||||
extern int seq_buf_putc(struct seq_buf *s, unsigned char c);
|
||||
|
||||
void seq_buf_human_readable_u64(struct seq_buf *, u64);
|
||||
|
||||
#endif /* _LINUX_SEQ_BUF_H */
|
@ -11,13 +11,13 @@ struct shrink_control {
|
||||
|
||||
#define SHRINK_STOP (~0UL)
|
||||
|
||||
struct printbuf;
|
||||
struct seq_buf;
|
||||
struct shrinker {
|
||||
unsigned long (*count_objects)(struct shrinker *,
|
||||
struct shrink_control *sc);
|
||||
unsigned long (*scan_objects)(struct shrinker *,
|
||||
struct shrink_control *sc);
|
||||
void (*to_text)(struct printbuf *, struct shrinker *);
|
||||
void (*to_text)(struct seq_buf *, struct shrinker *);
|
||||
|
||||
int seeks; /* seeks to recreate an obj */
|
||||
long batch; /* reclaim batch size, 0 = default */
|
||||
|
@ -59,6 +59,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/osq_lock.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
@ -79,9 +80,10 @@ union six_lock_state {
|
||||
};
|
||||
|
||||
struct {
|
||||
unsigned read_lock:27;
|
||||
unsigned read_lock:26;
|
||||
unsigned write_locking:1;
|
||||
unsigned intent_lock:1;
|
||||
unsigned nospin:1;
|
||||
unsigned waiters:3;
|
||||
/*
|
||||
* seq works much like in seqlocks: it's incremented every time
|
||||
@ -104,10 +106,10 @@ enum six_lock_type {
|
||||
|
||||
struct six_lock {
|
||||
union six_lock_state state;
|
||||
unsigned intent_lock_recurse;
|
||||
struct task_struct *owner;
|
||||
unsigned __percpu *readers;
|
||||
unsigned intent_lock_recurse;
|
||||
unsigned long ip;
|
||||
struct optimistic_spin_queue osq;
|
||||
raw_spinlock_t wait_lock;
|
||||
struct list_head wait_list;
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
@ -148,12 +150,37 @@ do { \
|
||||
#define __SIX_VAL(field, _v) (((union six_lock_state) { .field = _v }).v)
|
||||
|
||||
#define __SIX_LOCK(type) \
|
||||
bool six_trylock_##type(struct six_lock *); \
|
||||
bool six_relock_##type(struct six_lock *, u32); \
|
||||
int six_lock_##type(struct six_lock *, six_lock_should_sleep_fn, void *);\
|
||||
int six_lock_waiter_##type(struct six_lock *, struct six_lock_waiter *, \
|
||||
six_lock_should_sleep_fn, void *); \
|
||||
void six_unlock_##type(struct six_lock *);
|
||||
bool six_trylock_ip_##type(struct six_lock *, unsigned long); \
|
||||
bool six_relock_ip_##type(struct six_lock *, u32, unsigned long); \
|
||||
int six_lock_ip_##type(struct six_lock *, six_lock_should_sleep_fn, \
|
||||
void *, unsigned long); \
|
||||
int six_lock_ip_waiter_##type(struct six_lock *, struct six_lock_waiter *,\
|
||||
six_lock_should_sleep_fn, void *, unsigned long);\
|
||||
void six_unlock_ip_##type(struct six_lock *, unsigned long); \
|
||||
\
|
||||
static inline bool six_trylock_##type(struct six_lock *lock) \
|
||||
{ \
|
||||
return six_trylock_ip_##type(lock, _THIS_IP_); \
|
||||
} \
|
||||
static inline bool six_relock_##type(struct six_lock *lock, u32 seq) \
|
||||
{ \
|
||||
return six_relock_ip_##type(lock, seq, _THIS_IP_); \
|
||||
} \
|
||||
static inline int six_lock_##type(struct six_lock *lock, \
|
||||
six_lock_should_sleep_fn fn, void *p)\
|
||||
{ \
|
||||
return six_lock_ip_##type(lock, fn, p, _THIS_IP_); \
|
||||
} \
|
||||
static inline int six_lock_waiter_##type(struct six_lock *lock, \
|
||||
struct six_lock_waiter *wait, \
|
||||
six_lock_should_sleep_fn fn, void *p) \
|
||||
{ \
|
||||
return six_lock_ip_waiter_##type(lock, wait, fn, p, _THIS_IP_); \
|
||||
} \
|
||||
static inline void six_unlock_##type(struct six_lock *lock) \
|
||||
{ \
|
||||
return six_unlock_ip_##type(lock, _THIS_IP_); \
|
||||
}
|
||||
|
||||
__SIX_LOCK(read)
|
||||
__SIX_LOCK(intent)
|
||||
@ -189,6 +216,14 @@ static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
SIX_LOCK_DISPATCH(type, six_lock, lock, should_sleep_fn, p);
|
||||
}
|
||||
|
||||
static inline int six_lock_type_ip_waiter(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
{
|
||||
SIX_LOCK_DISPATCH(type, six_lock_ip_waiter, lock, wait, should_sleep_fn, p, ip);
|
||||
}
|
||||
|
||||
static inline int six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
|
@ -174,6 +174,11 @@ static inline void *kmem_cache_alloc(struct kmem_cache *c, gfp_t gfp)
|
||||
return kmalloc(c->obj_size, gfp);
|
||||
}
|
||||
|
||||
static inline void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t gfp)
|
||||
{
|
||||
return kzalloc(c->obj_size, gfp);
|
||||
}
|
||||
|
||||
static inline void kmem_cache_free(struct kmem_cache *c, void *p)
|
||||
{
|
||||
kfree(p);
|
||||
|
@ -18,10 +18,12 @@ struct __wait_queue {
|
||||
struct list_head task_list;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
struct wait_queue_head {
|
||||
spinlock_t lock;
|
||||
struct list_head task_list;
|
||||
} wait_queue_head_t;
|
||||
};
|
||||
|
||||
typedef struct wait_queue_head wait_queue_head_t;
|
||||
|
||||
void wake_up(wait_queue_head_t *);
|
||||
void wake_up_all(wait_queue_head_t *);
|
||||
@ -42,7 +44,7 @@ int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *ke
|
||||
.task_list = { &(name).task_list, &(name).task_list } }
|
||||
|
||||
#define DECLARE_WAIT_QUEUE_HEAD(name) \
|
||||
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
|
||||
struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
|
||||
|
||||
static inline void init_waitqueue_head(wait_queue_head_t *q)
|
||||
{
|
||||
|
@ -514,34 +514,10 @@ DEFINE_EVENT(bch_fs, gc_gens_end,
|
||||
|
||||
/* Allocator */
|
||||
|
||||
TRACE_EVENT(bucket_alloc,
|
||||
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
|
||||
bool user, u64 bucket),
|
||||
TP_ARGS(ca, alloc_reserve, user, bucket),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
__array(char, reserve, 16 )
|
||||
__field(bool, user )
|
||||
__field(u64, bucket )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = ca->dev;
|
||||
strscpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve));
|
||||
__entry->user = user;
|
||||
__entry->bucket = bucket;
|
||||
),
|
||||
|
||||
TP_printk("%d,%d reserve %s user %u bucket %llu",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->reserve,
|
||||
__entry->user,
|
||||
__entry->bucket)
|
||||
);
|
||||
|
||||
TRACE_EVENT(bucket_alloc_fail,
|
||||
DECLARE_EVENT_CLASS(bucket_alloc,
|
||||
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
|
||||
bool user,
|
||||
u64 bucket,
|
||||
u64 free,
|
||||
u64 avail,
|
||||
u64 copygc_wait_amount,
|
||||
@ -549,12 +525,15 @@ TRACE_EVENT(bucket_alloc_fail,
|
||||
struct bucket_alloc_state *s,
|
||||
bool nonblocking,
|
||||
const char *err),
|
||||
TP_ARGS(ca, alloc_reserve, free, avail, copygc_wait_amount, copygc_waiting_for,
|
||||
TP_ARGS(ca, alloc_reserve, user, bucket, free, avail,
|
||||
copygc_wait_amount, copygc_waiting_for,
|
||||
s, nonblocking, err),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
__array(char, reserve, 16 )
|
||||
__field(bool, user )
|
||||
__field(u64, bucket )
|
||||
__field(u64, free )
|
||||
__field(u64, avail )
|
||||
__field(u64, copygc_wait_amount )
|
||||
@ -571,6 +550,8 @@ TRACE_EVENT(bucket_alloc_fail,
|
||||
TP_fast_assign(
|
||||
__entry->dev = ca->dev;
|
||||
strscpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve));
|
||||
__entry->user = user;
|
||||
__entry->bucket = bucket;
|
||||
__entry->free = free;
|
||||
__entry->avail = avail;
|
||||
__entry->copygc_wait_amount = copygc_wait_amount;
|
||||
@ -584,9 +565,11 @@ TRACE_EVENT(bucket_alloc_fail,
|
||||
strscpy(__entry->err, err, sizeof(__entry->err));
|
||||
),
|
||||
|
||||
TP_printk("%d,%d reserve %s free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u nocow %llu err %s",
|
||||
TP_printk("%d,%d reserve %s user %u bucket %llu free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nocow %llu nonblocking %u err %s",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->reserve,
|
||||
__entry->user,
|
||||
__entry->bucket,
|
||||
__entry->free,
|
||||
__entry->avail,
|
||||
__entry->copygc_wait_amount,
|
||||
@ -595,11 +578,43 @@ TRACE_EVENT(bucket_alloc_fail,
|
||||
__entry->open,
|
||||
__entry->need_journal_commit,
|
||||
__entry->nouse,
|
||||
__entry->nonblocking,
|
||||
__entry->nocow,
|
||||
__entry->nonblocking,
|
||||
__entry->err)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bucket_alloc, bucket_alloc,
|
||||
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
|
||||
bool user,
|
||||
u64 bucket,
|
||||
u64 free,
|
||||
u64 avail,
|
||||
u64 copygc_wait_amount,
|
||||
s64 copygc_waiting_for,
|
||||
struct bucket_alloc_state *s,
|
||||
bool nonblocking,
|
||||
const char *err),
|
||||
TP_ARGS(ca, alloc_reserve, user, bucket, free, avail,
|
||||
copygc_wait_amount, copygc_waiting_for,
|
||||
s, nonblocking, err)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
|
||||
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
|
||||
bool user,
|
||||
u64 bucket,
|
||||
u64 free,
|
||||
u64 avail,
|
||||
u64 copygc_wait_amount,
|
||||
s64 copygc_waiting_for,
|
||||
struct bucket_alloc_state *s,
|
||||
bool nonblocking,
|
||||
const char *err),
|
||||
TP_ARGS(ca, alloc_reserve, user, bucket, free, avail,
|
||||
copygc_wait_amount, copygc_waiting_for,
|
||||
s, nonblocking, err)
|
||||
);
|
||||
|
||||
TRACE_EVENT(discard_buckets,
|
||||
TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
|
||||
u64 need_journal_commit, u64 discarded, const char *err),
|
||||
@ -673,7 +688,7 @@ DEFINE_EVENT(bkey, move_extent_finish,
|
||||
TP_ARGS(k)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bkey, move_extent_race,
|
||||
DEFINE_EVENT(bkey, move_extent_fail,
|
||||
TP_PROTO(const struct bkey *k),
|
||||
TP_ARGS(k)
|
||||
);
|
||||
|
144
include/trace/events/lock.h
Normal file
144
include/trace/events/lock.h
Normal file
@ -0,0 +1,144 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM lock
|
||||
|
||||
#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_LOCK_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
/* flags for lock:contention_begin */
|
||||
#define LCB_F_SPIN (1U << 0)
|
||||
#define LCB_F_READ (1U << 1)
|
||||
#define LCB_F_WRITE (1U << 2)
|
||||
#define LCB_F_RT (1U << 3)
|
||||
#define LCB_F_PERCPU (1U << 4)
|
||||
#define LCB_F_MUTEX (1U << 5)
|
||||
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
|
||||
#include <linux/lockdep.h>
|
||||
|
||||
TRACE_EVENT(lock_acquire,
|
||||
|
||||
TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
|
||||
int trylock, int read, int check,
|
||||
struct lockdep_map *next_lock, unsigned long ip),
|
||||
|
||||
TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, flags)
|
||||
__string(name, lock->name)
|
||||
__field(void *, lockdep_addr)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
|
||||
__assign_str(name, lock->name);
|
||||
__entry->lockdep_addr = lock;
|
||||
),
|
||||
|
||||
TP_printk("%p %s%s%s", __entry->lockdep_addr,
|
||||
(__entry->flags & 1) ? "try " : "",
|
||||
(__entry->flags & 2) ? "read " : "",
|
||||
__get_str(name))
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(lock,
|
||||
|
||||
TP_PROTO(struct lockdep_map *lock, unsigned long ip),
|
||||
|
||||
TP_ARGS(lock, ip),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string( name, lock->name )
|
||||
__field( void *, lockdep_addr )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(name, lock->name);
|
||||
__entry->lockdep_addr = lock;
|
||||
),
|
||||
|
||||
TP_printk("%p %s", __entry->lockdep_addr, __get_str(name))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(lock, lock_release,
|
||||
|
||||
TP_PROTO(struct lockdep_map *lock, unsigned long ip),
|
||||
|
||||
TP_ARGS(lock, ip)
|
||||
);
|
||||
|
||||
#ifdef CONFIG_LOCK_STAT
|
||||
|
||||
DEFINE_EVENT(lock, lock_contended,
|
||||
|
||||
TP_PROTO(struct lockdep_map *lock, unsigned long ip),
|
||||
|
||||
TP_ARGS(lock, ip)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(lock, lock_acquired,
|
||||
|
||||
TP_PROTO(struct lockdep_map *lock, unsigned long ip),
|
||||
|
||||
TP_ARGS(lock, ip)
|
||||
);
|
||||
|
||||
#endif /* CONFIG_LOCK_STAT */
|
||||
#endif /* CONFIG_LOCKDEP */
|
||||
|
||||
TRACE_EVENT(contention_begin,
|
||||
|
||||
TP_PROTO(void *lock, unsigned int flags),
|
||||
|
||||
TP_ARGS(lock, flags),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(void *, lock_addr)
|
||||
__field(unsigned int, flags)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->lock_addr = lock;
|
||||
__entry->flags = flags;
|
||||
),
|
||||
|
||||
TP_printk("%p (flags=%s)", __entry->lock_addr,
|
||||
__print_flags(__entry->flags, "|",
|
||||
{ LCB_F_SPIN, "SPIN" },
|
||||
{ LCB_F_READ, "READ" },
|
||||
{ LCB_F_WRITE, "WRITE" },
|
||||
{ LCB_F_RT, "RT" },
|
||||
{ LCB_F_PERCPU, "PERCPU" },
|
||||
{ LCB_F_MUTEX, "MUTEX" }
|
||||
))
|
||||
);
|
||||
|
||||
TRACE_EVENT(contention_end,
|
||||
|
||||
TP_PROTO(void *lock, int ret),
|
||||
|
||||
TP_ARGS(lock, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(void *, lock_addr)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->lock_addr = lock;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_LOCK_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
@ -222,7 +222,7 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
|
||||
}
|
||||
|
||||
int bch2_alloc_v1_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
|
||||
|
||||
@ -237,7 +237,7 @@ int bch2_alloc_v1_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
}
|
||||
|
||||
int bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_alloc_unpacked u;
|
||||
|
||||
@ -250,7 +250,7 @@ int bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
}
|
||||
|
||||
int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_alloc_unpacked u;
|
||||
|
||||
@ -263,9 +263,10 @@ int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
}
|
||||
|
||||
int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
|
||||
int rw = flags & WRITE;
|
||||
|
||||
if (alloc_v4_u64s(a.v) != bkey_val_u64s(k.k)) {
|
||||
prt_printf(err, "bad val size (%lu != %u)",
|
||||
@ -279,11 +280,9 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX this is wrong, we'll be checking updates that happened from
|
||||
* before BCH_FS_CHECK_BACKPOINTERS_DONE
|
||||
*/
|
||||
if (rw == WRITE && test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
|
||||
if (rw == WRITE &&
|
||||
!(flags & BKEY_INVALID_FROM_JOURNAL) &&
|
||||
test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
|
||||
unsigned i, bp_len = 0;
|
||||
|
||||
for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); i++)
|
||||
@ -621,7 +620,7 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset)
|
||||
}
|
||||
|
||||
int bch2_bucket_gens_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
if (bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens)) {
|
||||
prt_printf(err, "bad val size (%lu != %zu)",
|
||||
@ -1607,7 +1606,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
struct bch_dev *ca;
|
||||
struct bkey_i_alloc_v4 *a;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bool did_discard = false;
|
||||
int ret = 0;
|
||||
|
||||
ca = bch_dev_bkey_exists(c, pos.inode);
|
||||
@ -1683,15 +1681,13 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
k.k->p.offset * ca->mi.bucket_size,
|
||||
ca->mi.bucket_size,
|
||||
GFP_KERNEL);
|
||||
*discard_pos_done = iter.pos;
|
||||
|
||||
ret = bch2_trans_relock(trans);
|
||||
ret = bch2_trans_relock_notrace(trans);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
*discard_pos_done = iter.pos;
|
||||
did_discard = true;
|
||||
|
||||
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
|
||||
a->v.data_type = alloc_data_type(a->v, a->v.data_type);
|
||||
write:
|
||||
@ -1701,11 +1697,10 @@ write:
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (did_discard) {
|
||||
this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]);
|
||||
(*discarded)++;
|
||||
}
|
||||
this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]);
|
||||
(*discarded)++;
|
||||
out:
|
||||
(*seen)++;
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
printbuf_exit(&buf);
|
||||
@ -1742,7 +1737,7 @@ static void bch2_do_discards_work(struct work_struct *work)
|
||||
if (need_journal_commit * 2 > seen)
|
||||
bch2_journal_flush_async(&c->journal, NULL);
|
||||
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
|
||||
|
||||
trace_discard_buckets(c, seen, open, need_journal_commit, discarded,
|
||||
bch2_err_str(ret));
|
||||
@ -1750,44 +1745,45 @@ static void bch2_do_discards_work(struct work_struct *work)
|
||||
|
||||
void bch2_do_discards(struct bch_fs *c)
|
||||
{
|
||||
if (percpu_ref_tryget_live(&c->writes) &&
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) &&
|
||||
!queue_work(system_long_wq, &c->discard_work))
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
|
||||
}
|
||||
|
||||
static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
struct btree_iter *lru_iter,
|
||||
struct bpos bucket,
|
||||
struct bkey_s_c lru_k,
|
||||
s64 *nr_to_invalidate)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter alloc_iter = { NULL };
|
||||
struct bkey_i_alloc_v4 *a;
|
||||
struct bkey_i_alloc_v4 *a = NULL;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
|
||||
unsigned cached_sectors;
|
||||
int ret = 0;
|
||||
|
||||
if (*nr_to_invalidate <= 0)
|
||||
return 1;
|
||||
|
||||
if (!bch2_dev_bucket_exists(c, bucket)) {
|
||||
prt_str(&buf, "lru entry points to invalid bucket");
|
||||
goto err;
|
||||
}
|
||||
|
||||
a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket);
|
||||
ret = PTR_ERR_OR_ZERO(a);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (lru_pos_time(lru_iter->pos) != alloc_lru_idx(a->v)) {
|
||||
prt_printf(&buf, "alloc key does not point back to lru entry when invalidating bucket:\n ");
|
||||
bch2_bpos_to_text(&buf, lru_iter->pos);
|
||||
prt_printf(&buf, "\n ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
|
||||
prt_str(&buf, "alloc key does not point back to lru entry when invalidating bucket:");
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch_err(c, "%s", buf.buf);
|
||||
if (test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
|
||||
bch2_inconsistent_error(c);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
goto out;
|
||||
if (a->v.data_type != BCH_DATA_cached) {
|
||||
prt_str(&buf, "lru entry points to non cached bucket:");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!a->v.cached_sectors)
|
||||
@ -1816,6 +1812,26 @@ out:
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
err:
|
||||
prt_str(&buf, "\n lru key: ");
|
||||
bch2_bkey_val_to_text(&buf, c, lru_k);
|
||||
|
||||
prt_str(&buf, "\n lru entry: ");
|
||||
bch2_lru_pos_to_text(&buf, lru_iter->pos);
|
||||
|
||||
prt_str(&buf, "\n alloc key: ");
|
||||
if (!a)
|
||||
bch2_bpos_to_text(&buf, bucket);
|
||||
else
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
|
||||
|
||||
bch_err(c, "%s", buf.buf);
|
||||
if (test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
|
||||
bch2_inconsistent_error(c);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
@ -1838,9 +1854,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
lru_pos(ca->dev_idx, 0, 0),
|
||||
lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
|
||||
BTREE_ITER_INTENT, k,
|
||||
invalidate_one_bucket(&trans, &iter,
|
||||
u64_to_bucket(k.k->p.offset),
|
||||
&nr_to_invalidate));
|
||||
invalidate_one_bucket(&trans, &iter, k, &nr_to_invalidate));
|
||||
|
||||
if (ret < 0) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
@ -1849,14 +1863,14 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
}
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||
}
|
||||
|
||||
void bch2_do_invalidates(struct bch_fs *c)
|
||||
{
|
||||
if (percpu_ref_tryget_live(&c->writes) &&
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) &&
|
||||
!queue_work(system_long_wq, &c->invalidate_work))
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||
}
|
||||
|
||||
static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
@ -122,10 +122,10 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s
|
||||
|
||||
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
|
||||
|
||||
int bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_alloc_v4_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
int bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
int bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
int bch2_alloc_v4_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
void bch2_alloc_v4_swab(struct bkey_s);
|
||||
void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
@ -158,7 +158,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
.atomic_trigger = bch2_mark_alloc, \
|
||||
})
|
||||
|
||||
int bch2_bucket_gens_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_bucket_gens_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \
|
||||
|
@ -58,6 +58,17 @@ const char * const bch2_alloc_reserves[] = {
|
||||
* reference _after_ doing the index update that makes its allocation reachable.
|
||||
*/
|
||||
|
||||
void bch2_reset_alloc_cursors(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_member_device_rcu(ca, c, i, NULL)
|
||||
ca->alloc_cursor = 0;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob)
|
||||
{
|
||||
open_bucket_idx_t idx = ob - c->open_buckets;
|
||||
@ -272,7 +283,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
||||
}
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
return ob;
|
||||
}
|
||||
|
||||
@ -418,12 +428,11 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct open_bucket *ob = NULL;
|
||||
u64 alloc_start = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
|
||||
u64 alloc_cursor = max(alloc_start, READ_ONCE(ca->alloc_cursor));
|
||||
int ret;
|
||||
|
||||
s->cur_bucket = max_t(u64, s->cur_bucket, ca->mi.first_bucket);
|
||||
s->cur_bucket = max_t(u64, s->cur_bucket, ca->new_fs_bucket_idx);
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, s->cur_bucket),
|
||||
again:
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor),
|
||||
BTREE_ITER_SLOTS, k, ret) {
|
||||
struct bch_alloc_v4 a_convert;
|
||||
const struct bch_alloc_v4 *a;
|
||||
@ -448,9 +457,17 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
s->cur_bucket = iter.pos.offset;
|
||||
ca->alloc_cursor = alloc_cursor;
|
||||
|
||||
return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
|
||||
if (!ob && ret)
|
||||
ob = ERR_PTR(ret);
|
||||
|
||||
if (!ob && alloc_cursor > alloc_start) {
|
||||
alloc_cursor = alloc_start;
|
||||
goto again;
|
||||
}
|
||||
|
||||
return ob;
|
||||
}
|
||||
|
||||
static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
|
||||
@ -462,33 +479,34 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct open_bucket *ob = NULL;
|
||||
u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(ca->alloc_cursor));
|
||||
u64 alloc_cursor = alloc_start;
|
||||
int ret;
|
||||
|
||||
BUG_ON(ca->new_fs_bucket_idx);
|
||||
|
||||
/*
|
||||
* XXX:
|
||||
* On transaction restart, we'd like to restart from the bucket we were
|
||||
* at previously
|
||||
*/
|
||||
again:
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace,
|
||||
POS(ca->dev_idx, s->cur_bucket), 0, k, ret) {
|
||||
POS(ca->dev_idx, alloc_cursor), 0, k, ret) {
|
||||
if (k.k->p.inode != ca->dev_idx)
|
||||
break;
|
||||
|
||||
for (s->cur_bucket = max(s->cur_bucket, bkey_start_offset(k.k));
|
||||
s->cur_bucket < k.k->p.offset;
|
||||
s->cur_bucket++) {
|
||||
for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k));
|
||||
alloc_cursor < k.k->p.offset;
|
||||
alloc_cursor++) {
|
||||
ret = btree_trans_too_many_iters(trans);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
ob = ERR_PTR(ret);
|
||||
break;
|
||||
}
|
||||
|
||||
s->buckets_seen++;
|
||||
|
||||
ob = try_alloc_bucket(trans, ca, reserve,
|
||||
s->cur_bucket, s, k, cl);
|
||||
if (ob)
|
||||
alloc_cursor, s, k, cl);
|
||||
if (ob) {
|
||||
iter.path->preserve = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ob || ret)
|
||||
@ -496,7 +514,17 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
return ob ?: ERR_PTR(ret);
|
||||
ca->alloc_cursor = alloc_cursor;
|
||||
|
||||
if (!ob && ret)
|
||||
ob = ERR_PTR(ret);
|
||||
|
||||
if (!ob && alloc_start > ca->mi.first_bucket) {
|
||||
alloc_cursor = alloc_start = ca->mi.first_bucket;
|
||||
goto again;
|
||||
}
|
||||
|
||||
return ob;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -514,9 +542,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
|
||||
struct bch_fs *c = trans->c;
|
||||
struct open_bucket *ob = NULL;
|
||||
bool freespace = READ_ONCE(ca->mi.freespace_initialized);
|
||||
u64 start = freespace ? 0 : ca->bucket_alloc_trans_early_cursor;
|
||||
u64 avail;
|
||||
struct bucket_alloc_state s = { .cur_bucket = start };
|
||||
struct bucket_alloc_state s = { 0 };
|
||||
bool waiting = false;
|
||||
again:
|
||||
bch2_dev_usage_read_fast(ca, usage);
|
||||
@ -561,28 +588,31 @@ alloc:
|
||||
if (s.skipped_need_journal_commit * 2 > avail)
|
||||
bch2_journal_flush_async(&c->journal, NULL);
|
||||
|
||||
if (!ob && !freespace && start) {
|
||||
start = s.cur_bucket = 0;
|
||||
goto alloc;
|
||||
}
|
||||
|
||||
if (!ob && freespace && !test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
|
||||
freespace = false;
|
||||
goto alloc;
|
||||
}
|
||||
|
||||
if (!freespace)
|
||||
ca->bucket_alloc_trans_early_cursor = s.cur_bucket;
|
||||
err:
|
||||
if (!ob)
|
||||
ob = ERR_PTR(-BCH_ERR_no_buckets_found);
|
||||
|
||||
if (!IS_ERR(ob))
|
||||
trace_and_count(c, bucket_alloc, ca, bch2_alloc_reserves[reserve],
|
||||
may_alloc_partial, ob->bucket);
|
||||
trace_and_count(c, bucket_alloc, ca,
|
||||
bch2_alloc_reserves[reserve],
|
||||
may_alloc_partial,
|
||||
ob->bucket,
|
||||
usage->d[BCH_DATA_free].buckets,
|
||||
avail,
|
||||
bch2_copygc_wait_amount(c),
|
||||
c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
|
||||
&s,
|
||||
cl == NULL,
|
||||
"");
|
||||
else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart))
|
||||
trace_and_count(c, bucket_alloc_fail,
|
||||
ca, bch2_alloc_reserves[reserve],
|
||||
trace_and_count(c, bucket_alloc_fail, ca,
|
||||
bch2_alloc_reserves[reserve],
|
||||
may_alloc_partial,
|
||||
0,
|
||||
usage->d[BCH_DATA_free].buckets,
|
||||
avail,
|
||||
bch2_copygc_wait_amount(c),
|
||||
@ -1130,16 +1160,16 @@ out:
|
||||
* Get us an open_bucket we can allocate from, return with it locked:
|
||||
*/
|
||||
int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
||||
unsigned target,
|
||||
unsigned erasure_code,
|
||||
struct write_point_specifier write_point,
|
||||
struct bch_devs_list *devs_have,
|
||||
unsigned nr_replicas,
|
||||
unsigned nr_replicas_required,
|
||||
enum alloc_reserve reserve,
|
||||
unsigned flags,
|
||||
struct closure *cl,
|
||||
struct write_point **wp_ret)
|
||||
unsigned target,
|
||||
unsigned erasure_code,
|
||||
struct write_point_specifier write_point,
|
||||
struct bch_devs_list *devs_have,
|
||||
unsigned nr_replicas,
|
||||
unsigned nr_replicas_required,
|
||||
enum alloc_reserve reserve,
|
||||
unsigned flags,
|
||||
struct closure *cl,
|
||||
struct write_point **wp_ret)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct write_point *wp;
|
||||
@ -1336,3 +1366,33 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
spin_unlock(&ob->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static const char * const bch2_write_point_states[] = {
|
||||
#define x(n) #n,
|
||||
WRITE_POINT_STATES()
|
||||
#undef x
|
||||
NULL
|
||||
};
|
||||
|
||||
void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct write_point *wp;
|
||||
unsigned i;
|
||||
|
||||
for (wp = c->write_points;
|
||||
wp < c->write_points + ARRAY_SIZE(c->write_points);
|
||||
wp++) {
|
||||
prt_printf(out, "%lu: ", wp->write_point);
|
||||
prt_human_readable_u64(out, wp->sectors_allocated);
|
||||
|
||||
prt_printf(out, " last wrote: ");
|
||||
bch2_pr_time_units(out, sched_clock() - wp->last_used);
|
||||
|
||||
for (i = 0; i < WRITE_POINT_STATE_NR; i++) {
|
||||
prt_printf(out, " %s: ", bch2_write_point_states[i]);
|
||||
bch2_pr_time_units(out, wp->time[i]);
|
||||
}
|
||||
|
||||
prt_newline(out);
|
||||
}
|
||||
}
|
||||
|
@ -16,6 +16,8 @@ struct bch_devs_List;
|
||||
|
||||
extern const char * const bch2_alloc_reserves[];
|
||||
|
||||
void bch2_reset_alloc_cursors(struct bch_fs *);
|
||||
|
||||
struct dev_alloc_list {
|
||||
unsigned nr;
|
||||
u8 devs[BCH_SB_MEMBERS_MAX];
|
||||
@ -178,7 +180,8 @@ bch2_alloc_sectors_append_ptrs_inlined(struct bch_fs *c, struct write_point *wp,
|
||||
unsigned i;
|
||||
|
||||
BUG_ON(sectors > wp->sectors_free);
|
||||
wp->sectors_free -= sectors;
|
||||
wp->sectors_free -= sectors;
|
||||
wp->sectors_allocated += sectors;
|
||||
|
||||
open_bucket_for_each(c, &wp->ptrs, ob, i) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
|
||||
@ -219,4 +222,6 @@ void bch2_fs_allocator_foreground_init(struct bch_fs *);
|
||||
|
||||
void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include "fifo.h"
|
||||
|
||||
struct bucket_alloc_state {
|
||||
u64 cur_bucket;
|
||||
u64 buckets_seen;
|
||||
u64 skipped_open;
|
||||
u64 skipped_need_journal_commit;
|
||||
@ -75,6 +74,19 @@ struct dev_stripe_state {
|
||||
u64 next_alloc[BCH_SB_MEMBERS_MAX];
|
||||
};
|
||||
|
||||
#define WRITE_POINT_STATES() \
|
||||
x(stopped) \
|
||||
x(waiting_io) \
|
||||
x(waiting_work) \
|
||||
x(running)
|
||||
|
||||
enum write_point_state {
|
||||
#define x(n) WRITE_POINT_##n,
|
||||
WRITE_POINT_STATES()
|
||||
#undef x
|
||||
WRITE_POINT_STATE_NR
|
||||
};
|
||||
|
||||
struct write_point {
|
||||
struct {
|
||||
struct hlist_node node;
|
||||
@ -88,6 +100,8 @@ struct write_point {
|
||||
|
||||
struct open_buckets ptrs;
|
||||
struct dev_stripe_state stripe;
|
||||
|
||||
u64 sectors_allocated;
|
||||
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
|
||||
|
||||
struct {
|
||||
@ -95,6 +109,10 @@ struct write_point {
|
||||
|
||||
struct list_head writes;
|
||||
spinlock_t writes_lock;
|
||||
|
||||
enum write_point_state state;
|
||||
u64 last_state_change;
|
||||
u64 time[WRITE_POINT_STATE_NR];
|
||||
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
|
||||
};
|
||||
|
||||
|
@ -69,7 +69,7 @@ static bool extent_matches_bp(struct bch_fs *c,
|
||||
}
|
||||
|
||||
int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
|
||||
struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include "super.h"
|
||||
|
||||
int bch2_backpointer_invalid(const struct bch_fs *, struct bkey_s_c k,
|
||||
int, struct printbuf *);
|
||||
unsigned, struct printbuf *);
|
||||
void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *);
|
||||
void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_backpointer_swab(struct bkey_s);
|
||||
|
@ -210,6 +210,10 @@
|
||||
#include "opts.h"
|
||||
#include "util.h"
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
#define BCH_WRITE_REF_DEBUG
|
||||
#endif
|
||||
|
||||
#define dynamic_fault(...) 0
|
||||
#define race_fault(...) 0
|
||||
|
||||
@ -503,7 +507,7 @@ struct bch_dev {
|
||||
|
||||
/* Allocator: */
|
||||
u64 new_fs_bucket_idx;
|
||||
u64 bucket_alloc_trans_early_cursor;
|
||||
u64 alloc_cursor;
|
||||
|
||||
unsigned nr_open_buckets;
|
||||
unsigned nr_btree_reserve;
|
||||
@ -524,7 +528,7 @@ struct bch_dev {
|
||||
|
||||
/* The rest of this all shows up in sysfs */
|
||||
atomic64_t cur_latency[2];
|
||||
struct time_stats io_latency[2];
|
||||
struct bch2_time_stats io_latency[2];
|
||||
|
||||
#define CONGESTED_MAX 1024
|
||||
atomic_t congested;
|
||||
@ -543,6 +547,7 @@ enum {
|
||||
/* shutdown: */
|
||||
BCH_FS_STOPPING,
|
||||
BCH_FS_EMERGENCY_RO,
|
||||
BCH_FS_GOING_RO,
|
||||
BCH_FS_WRITE_DISABLE_COMPLETE,
|
||||
BCH_FS_CLEAN_SHUTDOWN,
|
||||
|
||||
@ -573,8 +578,8 @@ struct btree_debug {
|
||||
#define BCH_TRANSACTIONS_NR 128
|
||||
|
||||
struct btree_transaction_stats {
|
||||
struct bch2_time_stats lock_hold_times;
|
||||
struct mutex lock;
|
||||
struct time_stats lock_hold_times;
|
||||
unsigned nr_max_paths;
|
||||
unsigned max_mem;
|
||||
char *max_paths_text;
|
||||
@ -634,6 +639,29 @@ typedef struct {
|
||||
#define BCACHEFS_ROOT_SUBVOL_INUM \
|
||||
((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
|
||||
|
||||
#define BCH_WRITE_REFS() \
|
||||
x(trans) \
|
||||
x(write) \
|
||||
x(promote) \
|
||||
x(node_rewrite) \
|
||||
x(stripe_create) \
|
||||
x(stripe_delete) \
|
||||
x(reflink) \
|
||||
x(fallocate) \
|
||||
x(discard) \
|
||||
x(invalidate) \
|
||||
x(move) \
|
||||
x(delete_dead_snapshots) \
|
||||
x(snapshot_delete_pagecache) \
|
||||
x(sysfs)
|
||||
|
||||
enum bch_write_ref {
|
||||
#define x(n) BCH_WRITE_REF_##n,
|
||||
BCH_WRITE_REFS()
|
||||
#undef x
|
||||
BCH_WRITE_REF_NR,
|
||||
};
|
||||
|
||||
struct bch_fs {
|
||||
struct closure cl;
|
||||
|
||||
@ -655,7 +683,11 @@ struct bch_fs {
|
||||
struct rw_semaphore state_lock;
|
||||
|
||||
/* Counts outstanding writes, for clean transition to read-only */
|
||||
#ifdef BCH_WRITE_REF_DEBUG
|
||||
atomic_long_t writes[BCH_WRITE_REF_NR];
|
||||
#else
|
||||
struct percpu_ref writes;
|
||||
#endif
|
||||
struct work_struct read_only_work;
|
||||
|
||||
struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX];
|
||||
@ -857,6 +889,7 @@ struct bch_fs {
|
||||
struct mutex gc_gens_lock;
|
||||
|
||||
/* IO PATH */
|
||||
struct semaphore io_in_flight;
|
||||
struct bio_set bio_read;
|
||||
struct bio_set bio_read_split;
|
||||
struct bio_set bio_write;
|
||||
@ -969,11 +1002,51 @@ struct bch_fs {
|
||||
unsigned copy_gc_enabled:1;
|
||||
bool promote_whole_extents;
|
||||
|
||||
struct time_stats times[BCH_TIME_STAT_NR];
|
||||
struct bch2_time_stats times[BCH_TIME_STAT_NR];
|
||||
|
||||
struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
|
||||
};
|
||||
|
||||
extern struct wait_queue_head bch2_read_only_wait;
|
||||
|
||||
static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref)
|
||||
{
|
||||
#ifdef BCH_WRITE_REF_DEBUG
|
||||
atomic_long_inc(&c->writes[ref]);
|
||||
#else
|
||||
percpu_ref_get(&c->writes);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
|
||||
{
|
||||
#ifdef BCH_WRITE_REF_DEBUG
|
||||
return !test_bit(BCH_FS_GOING_RO, &c->flags) &&
|
||||
atomic_long_inc_not_zero(&c->writes[ref]);
|
||||
#else
|
||||
return percpu_ref_tryget_live(&c->writes);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref)
|
||||
{
|
||||
#ifdef BCH_WRITE_REF_DEBUG
|
||||
long v = atomic_long_dec_return(&c->writes[ref]);
|
||||
|
||||
BUG_ON(v < 0);
|
||||
if (v)
|
||||
return;
|
||||
for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
|
||||
if (atomic_long_read(&c->writes[i]))
|
||||
return;
|
||||
|
||||
set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
|
||||
wake_up(&bch2_read_only_wait);
|
||||
#else
|
||||
percpu_ref_put(&c->writes);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
|
||||
{
|
||||
#ifndef NO_BCACHEFS_FS
|
||||
|
@ -1357,7 +1357,7 @@ struct bch_replicas_entry {
|
||||
|
||||
struct bch_sb_field_replicas {
|
||||
struct bch_sb_field field;
|
||||
struct bch_replicas_entry entries[0];
|
||||
struct bch_replicas_entry entries[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* BCH_SB_FIELD_quota: */
|
||||
@ -1436,7 +1436,7 @@ struct bch_sb_field_disk_groups {
|
||||
x(move_extent_read, 35) \
|
||||
x(move_extent_write, 36) \
|
||||
x(move_extent_finish, 37) \
|
||||
x(move_extent_race, 38) \
|
||||
x(move_extent_fail, 38) \
|
||||
x(move_extent_alloc_mem_fail, 39) \
|
||||
x(copygc, 40) \
|
||||
x(copygc_wait, 41) \
|
||||
@ -1705,7 +1705,6 @@ LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
|
||||
/* Obsolete, always enabled: */
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
|
||||
LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
|
||||
|
||||
|
@ -24,7 +24,7 @@ const char * const bch2_bkey_types[] = {
|
||||
};
|
||||
|
||||
static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -38,7 +38,7 @@ static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
})
|
||||
|
||||
static int empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
if (bkey_val_bytes(k.k)) {
|
||||
prt_printf(err, "incorrect value size (%zu != 0)",
|
||||
@ -54,7 +54,7 @@ static int empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
})
|
||||
|
||||
static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
if (bkey_val_bytes(k.k) != sizeof(struct bch_cookie)) {
|
||||
prt_printf(err, "incorrect value size (%zu != %zu)",
|
||||
@ -74,7 +74,7 @@ static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
})
|
||||
|
||||
static int key_type_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -95,7 +95,7 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
})
|
||||
|
||||
static int key_type_set_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
if (bkey_val_bytes(k.k)) {
|
||||
prt_printf(err, "incorrect value size (%zu != %zu)",
|
||||
@ -124,14 +124,14 @@ const struct bkey_ops bch2_bkey_ops[] = {
|
||||
};
|
||||
|
||||
int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
if (k.k->type >= KEY_TYPE_MAX) {
|
||||
prt_printf(err, "invalid type (%u >= %u)", k.k->type, KEY_TYPE_MAX);
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
return bch2_bkey_ops[k.k->type].key_invalid(c, k, rw, err);
|
||||
return bch2_bkey_ops[k.k->type].key_invalid(c, k, flags, err);
|
||||
}
|
||||
|
||||
static unsigned bch2_key_types_allowed[] = {
|
||||
@ -207,7 +207,7 @@ static unsigned bch2_key_types_allowed[] = {
|
||||
|
||||
int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum btree_node_type type,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
if (k.k->u64s < BKEY_U64s) {
|
||||
prt_printf(err, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s);
|
||||
@ -216,7 +216,7 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
|
||||
if (!(bch2_key_types_allowed[type] & (1U << k.k->type))) {
|
||||
prt_printf(err, "invalid key type for btree %s (%s)",
|
||||
bch2_btree_ids[type], bch2_bkey_types[type]);
|
||||
bch2_btree_ids[type], bch2_bkey_types[k.k->type]);
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
@ -263,10 +263,10 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
|
||||
int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum btree_node_type type,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
return __bch2_bkey_invalid(c, k, type, rw, err) ?:
|
||||
bch2_bkey_val_invalid(c, k, rw, err);
|
||||
return __bch2_bkey_invalid(c, k, type, flags, err) ?:
|
||||
bch2_bkey_val_invalid(c, k, flags, err);
|
||||
}
|
||||
|
||||
int bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k,
|
||||
@ -374,7 +374,11 @@ bool bch2_bkey_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
|
||||
{
|
||||
const struct bkey_ops *ops = &bch2_bkey_ops[l.k->type];
|
||||
|
||||
return bch2_bkey_maybe_mergable(l.k, r.k) && ops->key_merge(c, l, r);
|
||||
return bch2_bkey_maybe_mergable(l.k, r.k) &&
|
||||
(u64) l.k->size + r.k->size <= KEY_SIZE_MAX &&
|
||||
bch2_bkey_ops[l.k->type].key_merge &&
|
||||
!bch2_key_merging_disabled &&
|
||||
ops->key_merge(c, l, r);
|
||||
}
|
||||
|
||||
static const struct old_bkey_type {
|
||||
|
@ -21,7 +21,7 @@ extern const char * const bch2_bkey_types[];
|
||||
*/
|
||||
struct bkey_ops {
|
||||
int (*key_invalid)(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err);
|
||||
unsigned flags, struct printbuf *err);
|
||||
void (*val_to_text)(struct printbuf *, struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
void (*swab)(struct bkey_s);
|
||||
@ -38,11 +38,13 @@ struct bkey_ops {
|
||||
|
||||
extern const struct bkey_ops bch2_bkey_ops[];
|
||||
|
||||
int bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
#define BKEY_INVALID_FROM_JOURNAL (1 << 1)
|
||||
|
||||
int bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
|
||||
enum btree_node_type, int, struct printbuf *);
|
||||
enum btree_node_type, unsigned, struct printbuf *);
|
||||
int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
|
||||
enum btree_node_type, int, struct printbuf *);
|
||||
enum btree_node_type, unsigned, struct printbuf *);
|
||||
int bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c, struct printbuf *);
|
||||
|
||||
void bch2_bpos_to_text(struct printbuf *, struct bpos);
|
||||
@ -60,10 +62,7 @@ static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct b
|
||||
{
|
||||
return l->type == r->type &&
|
||||
!bversion_cmp(l->version, r->version) &&
|
||||
bpos_eq(l->p, bkey_start_pos(r)) &&
|
||||
(u64) l->size + r->size <= KEY_SIZE_MAX &&
|
||||
bch2_bkey_ops[l->type].key_merge &&
|
||||
!bch2_key_merging_disabled;
|
||||
bpos_eq(l->p, bkey_start_pos(r));
|
||||
}
|
||||
|
||||
bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
|
||||
@ -82,7 +81,9 @@ static inline int bch2_mark_key(struct btree_trans *trans,
|
||||
|
||||
enum btree_update_flags {
|
||||
__BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE,
|
||||
__BTREE_UPDATE_NOJOURNAL,
|
||||
__BTREE_UPDATE_KEY_CACHE_RECLAIM,
|
||||
__BTREE_UPDATE_NO_KEY_CACHE_COHERENCY,
|
||||
|
||||
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
|
||||
|
||||
@ -95,7 +96,10 @@ enum btree_update_flags {
|
||||
};
|
||||
|
||||
#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
|
||||
#define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL)
|
||||
#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
|
||||
#define BTREE_UPDATE_NO_KEY_CACHE_COHERENCY \
|
||||
(1U << __BTREE_UPDATE_NO_KEY_CACHE_COHERENCY)
|
||||
|
||||
#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN)
|
||||
|
||||
|
@ -36,16 +36,7 @@ static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
|
||||
|
||||
struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
|
||||
{
|
||||
unsigned offset = __btree_node_key_to_offset(b, k);
|
||||
struct bset_tree *t;
|
||||
|
||||
for_each_bset(b, t)
|
||||
if (offset <= t->end_offset) {
|
||||
EBUG_ON(offset < btree_bkey_first_offset(t));
|
||||
return t;
|
||||
}
|
||||
|
||||
BUG();
|
||||
return bch2_bkey_to_bset_inlined(b, k);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -291,6 +291,21 @@ static inline int bkey_cmp_p_or_unp(const struct btree *b,
|
||||
return __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
|
||||
}
|
||||
|
||||
static inline struct bset_tree *
|
||||
bch2_bkey_to_bset_inlined(struct btree *b, struct bkey_packed *k)
|
||||
{
|
||||
unsigned offset = __btree_node_key_to_offset(b, k);
|
||||
struct bset_tree *t;
|
||||
|
||||
for_each_bset(b, t)
|
||||
if (offset <= t->end_offset) {
|
||||
EBUG_ON(offset < btree_bkey_first_offset(t));
|
||||
return t;
|
||||
}
|
||||
|
||||
BUG();
|
||||
}
|
||||
|
||||
struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *);
|
||||
|
||||
struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *,
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/seq_buf.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
|
||||
@ -427,12 +428,16 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
|
||||
return btree_cache_can_free(bc);
|
||||
}
|
||||
|
||||
static void bch2_btree_cache_shrinker_to_text(struct printbuf *out, struct shrinker *shrink)
|
||||
static void bch2_btree_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
|
||||
{
|
||||
struct bch_fs *c = container_of(shrink, struct bch_fs,
|
||||
btree_cache.shrink);
|
||||
char *cbuf;
|
||||
size_t buflen = seq_buf_get_buf(s, &cbuf);
|
||||
struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
|
||||
|
||||
bch2_btree_cache_to_text(out, &c->btree_cache);
|
||||
bch2_btree_cache_to_text(&out, &c->btree_cache);
|
||||
seq_buf_commit(s, out.pos);
|
||||
}
|
||||
|
||||
void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
@ -1090,7 +1095,7 @@ retry:
|
||||
goto out;
|
||||
} else {
|
||||
lock_node:
|
||||
ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read);
|
||||
ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read, _THIS_IP_);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ERR_PTR(ret);
|
||||
|
||||
|
@ -526,11 +526,10 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
|
||||
struct btree *b, struct bset *i,
|
||||
unsigned offset, int write)
|
||||
{
|
||||
prt_printf(out, bch2_log_msg(c, ""));
|
||||
if (!write)
|
||||
prt_str(out, "error validating btree node ");
|
||||
else
|
||||
prt_str(out, "corrupt btree node before write ");
|
||||
prt_printf(out, bch2_log_msg(c, "%s"),
|
||||
write == READ
|
||||
? "error validating btree node "
|
||||
: "corrupt btree node before write ");
|
||||
if (ca)
|
||||
prt_printf(out, "on %s ", ca->name);
|
||||
prt_printf(out, "at btree ");
|
||||
@ -543,63 +542,96 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
|
||||
}
|
||||
|
||||
enum btree_err_type {
|
||||
/*
|
||||
* We can repair this locally, and we're after the checksum check so
|
||||
* there's no need to try another replica:
|
||||
*/
|
||||
BTREE_ERR_FIXABLE,
|
||||
/*
|
||||
* We can repair this if we have to, but we should try reading another
|
||||
* replica if we can:
|
||||
*/
|
||||
BTREE_ERR_WANT_RETRY,
|
||||
/*
|
||||
* Read another replica if we have one, otherwise consider the whole
|
||||
* node bad:
|
||||
*/
|
||||
BTREE_ERR_MUST_RETRY,
|
||||
BTREE_ERR_FATAL,
|
||||
BTREE_ERR_BAD_NODE,
|
||||
BTREE_ERR_INCOMPATIBLE,
|
||||
};
|
||||
|
||||
enum btree_validate_ret {
|
||||
BTREE_RETRY_READ = 64,
|
||||
};
|
||||
|
||||
static int __btree_err(enum btree_err_type type,
|
||||
struct bch_fs *c,
|
||||
struct bch_dev *ca,
|
||||
struct btree *b,
|
||||
struct bset *i,
|
||||
int write,
|
||||
bool have_retry,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct printbuf out = PRINTBUF;
|
||||
va_list args;
|
||||
int ret = -BCH_ERR_fsck_fix;
|
||||
|
||||
btree_err_msg(&out, c, ca, b, i, b->written, write);
|
||||
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(&out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (write == WRITE) {
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = c->opts.errors == BCH_ON_ERROR_continue
|
||||
? 0
|
||||
: -BCH_ERR_fsck_errors_not_fixed;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!have_retry && type == BTREE_ERR_WANT_RETRY)
|
||||
type = BTREE_ERR_FIXABLE;
|
||||
if (!have_retry && type == BTREE_ERR_MUST_RETRY)
|
||||
type = BTREE_ERR_BAD_NODE;
|
||||
|
||||
switch (type) {
|
||||
case BTREE_ERR_FIXABLE:
|
||||
mustfix_fsck_err(c, "%s", out.buf);
|
||||
ret = -BCH_ERR_fsck_fix;
|
||||
break;
|
||||
case BTREE_ERR_WANT_RETRY:
|
||||
case BTREE_ERR_MUST_RETRY:
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = BTREE_RETRY_READ;
|
||||
break;
|
||||
case BTREE_ERR_BAD_NODE:
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
bch2_topology_error(c);
|
||||
ret = -BCH_ERR_need_topology_repair;
|
||||
break;
|
||||
case BTREE_ERR_INCOMPATIBLE:
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&out);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define btree_err(type, c, ca, b, i, msg, ...) \
|
||||
({ \
|
||||
__label__ out; \
|
||||
struct printbuf out = PRINTBUF; \
|
||||
int _ret = __btree_err(type, c, ca, b, i, write, have_retry, msg, ##__VA_ARGS__);\
|
||||
\
|
||||
btree_err_msg(&out, c, ca, b, i, b->written, write); \
|
||||
prt_printf(&out, msg, ##__VA_ARGS__); \
|
||||
\
|
||||
if (type == BTREE_ERR_FIXABLE && \
|
||||
write == READ && \
|
||||
!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \
|
||||
mustfix_fsck_err(c, "%s", out.buf); \
|
||||
goto out; \
|
||||
} \
|
||||
\
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf); \
|
||||
\
|
||||
switch (write) { \
|
||||
case READ: \
|
||||
switch (type) { \
|
||||
case BTREE_ERR_FIXABLE: \
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed; \
|
||||
goto fsck_err; \
|
||||
case BTREE_ERR_WANT_RETRY: \
|
||||
if (have_retry) { \
|
||||
ret = BTREE_RETRY_READ; \
|
||||
goto fsck_err; \
|
||||
} \
|
||||
break; \
|
||||
case BTREE_ERR_MUST_RETRY: \
|
||||
ret = BTREE_RETRY_READ; \
|
||||
goto fsck_err; \
|
||||
case BTREE_ERR_FATAL: \
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed; \
|
||||
goto fsck_err; \
|
||||
} \
|
||||
break; \
|
||||
case WRITE: \
|
||||
if (bch2_fs_inconsistent(c)) { \
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed; \
|
||||
goto fsck_err; \
|
||||
} \
|
||||
break; \
|
||||
} \
|
||||
out: \
|
||||
printbuf_exit(&out); \
|
||||
true; \
|
||||
if (_ret != -BCH_ERR_fsck_fix) \
|
||||
goto fsck_err; \
|
||||
*saw_error = true; \
|
||||
})
|
||||
|
||||
#define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false)
|
||||
@ -608,6 +640,7 @@ out: \
|
||||
* When btree topology repair changes the start or end of a node, that might
|
||||
* mean we have to drop keys that are no longer inside the node:
|
||||
*/
|
||||
__cold
|
||||
void bch2_btree_node_drop_keys_outside_node(struct btree *b)
|
||||
{
|
||||
struct bset_tree *t;
|
||||
@ -658,7 +691,7 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
|
||||
static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct btree *b, struct bset *i,
|
||||
unsigned offset, unsigned sectors,
|
||||
int write, bool have_retry)
|
||||
int write, bool have_retry, bool *saw_error)
|
||||
{
|
||||
unsigned version = le16_to_cpu(i->version);
|
||||
const char *err;
|
||||
@ -669,7 +702,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
btree_err_on((version != BCH_BSET_VERSION_OLD &&
|
||||
version < bcachefs_metadata_version_min) ||
|
||||
version >= bcachefs_metadata_version_max,
|
||||
BTREE_ERR_FATAL, c, ca, b, i,
|
||||
BTREE_ERR_INCOMPATIBLE, c, ca, b, i,
|
||||
"unsupported bset version");
|
||||
|
||||
if (btree_err_on(version < c->sb.version_min,
|
||||
@ -693,7 +726,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
}
|
||||
|
||||
btree_err_on(BSET_SEPARATE_WHITEOUTS(i),
|
||||
BTREE_ERR_FATAL, c, ca, b, i,
|
||||
BTREE_ERR_INCOMPATIBLE, c, ca, b, i,
|
||||
"BSET_SEPARATE_WHITEOUTS no longer supported");
|
||||
|
||||
if (btree_err_on(offset + sectors > btree_sectors(c),
|
||||
@ -770,7 +803,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
err = bch2_bkey_format_validate(&bn->format);
|
||||
btree_err_on(err,
|
||||
BTREE_ERR_FATAL, c, ca, b, i,
|
||||
BTREE_ERR_BAD_NODE, c, ca, b, i,
|
||||
"invalid bkey format: %s", err);
|
||||
|
||||
compat_bformat(b->c.level, b->c.btree_id, version,
|
||||
@ -795,7 +828,8 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
|
||||
static int validate_bset_keys(struct bch_fs *c, struct btree *b,
|
||||
struct bset *i, int write, bool have_retry)
|
||||
struct bset *i, int write,
|
||||
bool have_retry, bool *saw_error)
|
||||
{
|
||||
unsigned version = le16_to_cpu(i->version);
|
||||
struct bkey_packed *k, *prev = NULL;
|
||||
@ -882,7 +916,7 @@ fsck_err:
|
||||
}
|
||||
|
||||
int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct btree *b, bool have_retry)
|
||||
struct btree *b, bool have_retry, bool *saw_error)
|
||||
{
|
||||
struct btree_node_entry *bne;
|
||||
struct sort_iter *iter;
|
||||
@ -897,7 +931,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
unsigned blacklisted_written, nonblacklisted_written = 0;
|
||||
unsigned ptr_written = btree_ptr_sectors_written(&b->key);
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret, retry_read = 0, write = READ;
|
||||
int ret = 0, retry_read = 0, write = READ;
|
||||
|
||||
b->version_ondisk = U16_MAX;
|
||||
/* We might get called multiple times on read retry: */
|
||||
@ -958,7 +992,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
|
||||
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
|
||||
BTREE_ERR_FATAL, c, NULL, b, NULL,
|
||||
BTREE_ERR_INCOMPATIBLE, c, NULL, b, NULL,
|
||||
"btree node does not have NEW_EXTENT_OVERWRITE set");
|
||||
|
||||
sectors = vstruct_sectors(b->data, c->block_bits);
|
||||
@ -993,14 +1027,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
le16_to_cpu(i->version));
|
||||
|
||||
ret = validate_bset(c, ca, b, i, b->written, sectors,
|
||||
READ, have_retry);
|
||||
READ, have_retry, saw_error);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
|
||||
if (!b->written)
|
||||
btree_node_set_format(b, b->data->format);
|
||||
|
||||
ret = validate_bset_keys(c, b, i, READ, have_retry);
|
||||
ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
|
||||
@ -1140,12 +1174,10 @@ out:
|
||||
printbuf_exit(&buf);
|
||||
return retry_read;
|
||||
fsck_err:
|
||||
if (ret == BTREE_RETRY_READ) {
|
||||
if (ret == BTREE_RETRY_READ)
|
||||
retry_read = 1;
|
||||
} else {
|
||||
bch2_inconsistent_error(c);
|
||||
else
|
||||
set_btree_node_read_error(b);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1195,7 +1227,7 @@ start:
|
||||
&failed, &rb->pick) > 0;
|
||||
|
||||
if (!bio->bi_status &&
|
||||
!bch2_btree_node_read_done(c, ca, b, can_retry)) {
|
||||
!bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) {
|
||||
if (retry)
|
||||
bch_info(c, "retry success");
|
||||
break;
|
||||
@ -1301,6 +1333,7 @@ static void btree_node_read_all_replicas_done(struct closure *cl)
|
||||
unsigned i, written = 0, written2 = 0;
|
||||
__le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2
|
||||
? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0;
|
||||
bool _saw_error = false, *saw_error = &_saw_error;
|
||||
|
||||
for (i = 0; i < ra->nr; i++) {
|
||||
struct btree_node *bn = ra->buf[i];
|
||||
@ -1387,13 +1420,15 @@ fsck_err:
|
||||
|
||||
if (best >= 0) {
|
||||
memcpy(b->data, ra->buf[best], btree_bytes(c));
|
||||
ret = bch2_btree_node_read_done(c, NULL, b, false);
|
||||
ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error);
|
||||
} else {
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
set_btree_node_read_error(b);
|
||||
else if (*saw_error)
|
||||
bch2_btree_node_rewrite_async(c, b);
|
||||
|
||||
for (i = 0; i < ra->nr; i++) {
|
||||
mempool_free(ra->buf[i], &c->btree_bounce_pool);
|
||||
@ -1770,6 +1805,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
struct bset *i, unsigned sectors)
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bool saw_error;
|
||||
int ret;
|
||||
|
||||
ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key),
|
||||
@ -1781,8 +1817,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = validate_bset_keys(c, b, i, WRITE, false) ?:
|
||||
validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false);
|
||||
ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?:
|
||||
validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error);
|
||||
if (ret) {
|
||||
bch2_inconsistent_error(c);
|
||||
dump_stack();
|
||||
|
@ -129,7 +129,7 @@ void bch2_btree_build_aux_trees(struct btree *);
|
||||
void bch2_btree_init_next(struct btree_trans *, struct btree *);
|
||||
|
||||
int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *,
|
||||
struct btree *, bool);
|
||||
struct btree *, bool, bool *);
|
||||
void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
|
||||
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
|
||||
const struct bkey_i *, unsigned);
|
||||
|
@ -20,18 +20,13 @@
|
||||
#include <linux/prefetch.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static void btree_trans_verify_sorted(struct btree_trans *);
|
||||
inline void bch2_btree_path_check_sort(struct btree_trans *, struct btree_path *, int);
|
||||
static __always_inline void bch2_btree_path_check_sort_fast(struct btree_trans *,
|
||||
struct btree_path *, int);
|
||||
|
||||
static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
|
||||
static inline void btree_path_list_add(struct btree_trans *, struct btree_path *,
|
||||
struct btree_path *);
|
||||
|
||||
static inline unsigned long btree_iter_ip_allocated(struct btree_iter *iter)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
#ifdef TRACK_PATH_ALLOCATED
|
||||
return iter->ip_allocated;
|
||||
#else
|
||||
return 0;
|
||||
@ -353,6 +348,8 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
|
||||
unsigned idx;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
btree_trans_sort_paths(trans);
|
||||
|
||||
trans_for_each_path_inorder(trans, path, idx) {
|
||||
int cmp = cmp_int(path->btree_id, id) ?:
|
||||
cmp_int(path->cached, key_cache);
|
||||
@ -540,7 +537,7 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans,
|
||||
unsigned clobber_u64s,
|
||||
unsigned new_u64s)
|
||||
{
|
||||
struct bset_tree *t = bch2_bkey_to_bset(b, where);
|
||||
struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where);
|
||||
struct btree_path *linked;
|
||||
|
||||
if (node_iter != &path->l[b->c.level].iter) {
|
||||
@ -595,6 +592,7 @@ static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
|
||||
bch2_btree_node_iter_peek(&l->iter, l->b));
|
||||
|
||||
path->pos = k.k ? k.k->p : l->b->key.k.p;
|
||||
trans->paths_sorted = false;
|
||||
bch2_btree_path_verify_level(trans, path, l - path->l);
|
||||
return k;
|
||||
}
|
||||
@ -608,6 +606,7 @@ static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
|
||||
bch2_btree_node_iter_prev(&l->iter, l->b));
|
||||
|
||||
path->pos = k.k ? k.k->p : l->b->data->min_key;
|
||||
trans->paths_sorted = false;
|
||||
bch2_btree_path_verify_level(trans, path, l - path->l);
|
||||
return k;
|
||||
}
|
||||
@ -963,15 +962,13 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btree_path_traverse_one(struct btree_trans *, struct btree_path *,
|
||||
unsigned, unsigned long);
|
||||
|
||||
static int bch2_btree_path_traverse_all(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_path *path;
|
||||
unsigned long trace_ip = _RET_IP_;
|
||||
int ret = 0;
|
||||
int i, ret = 0;
|
||||
|
||||
if (trans->in_traverse_all)
|
||||
return -BCH_ERR_transaction_restart_in_traverse_all;
|
||||
@ -979,12 +976,11 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
|
||||
trans->in_traverse_all = true;
|
||||
retry_all:
|
||||
trans->restarted = 0;
|
||||
trans->traverse_all_idx = U8_MAX;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
path->should_be_locked = false;
|
||||
|
||||
btree_trans_verify_sorted(trans);
|
||||
btree_trans_sort_paths(trans);
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
cond_resched();
|
||||
@ -1001,34 +997,35 @@ retry_all:
|
||||
}
|
||||
|
||||
/* Now, redo traversals in correct order: */
|
||||
trans->traverse_all_idx = 0;
|
||||
while (trans->traverse_all_idx < trans->nr_sorted) {
|
||||
path = trans->paths + trans->sorted[trans->traverse_all_idx];
|
||||
i = 0;
|
||||
while (i < trans->nr_sorted) {
|
||||
path = trans->paths + trans->sorted[i];
|
||||
|
||||
/*
|
||||
* Traversing a path can cause another path to be added at about
|
||||
* the same position:
|
||||
*/
|
||||
if (path->uptodate) {
|
||||
ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
|
||||
__btree_path_get(path, false);
|
||||
ret = bch2_btree_path_traverse_one(trans, path, 0, _THIS_IP_);
|
||||
__btree_path_put(path, false);
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
|
||||
ret == -ENOMEM)
|
||||
goto retry_all;
|
||||
if (ret)
|
||||
goto err;
|
||||
BUG_ON(path->uptodate);
|
||||
} else {
|
||||
trans->traverse_all_idx++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* BTREE_ITER_NEED_RELOCK is ok here - if we called bch2_trans_unlock()
|
||||
* and relock(), relock() won't relock since path->should_be_locked
|
||||
* isn't set yet, which is all fine
|
||||
* We used to assert that all paths had been traversed here
|
||||
* (path->uptodate < BTREE_ITER_NEED_TRAVERSE); however, since
|
||||
* path->Should_be_locked is not set yet, we we might have unlocked and
|
||||
* then failed to relock a path - that's fine.
|
||||
*/
|
||||
trans_for_each_path(trans, path)
|
||||
BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE);
|
||||
err:
|
||||
bch2_btree_cache_cannibalize_unlock(c);
|
||||
|
||||
@ -1115,10 +1112,10 @@ static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
|
||||
* On error, caller (peek_node()/peek_key()) must return NULL; the error is
|
||||
* stashed in the iterator and returned from bch2_trans_exit().
|
||||
*/
|
||||
static int btree_path_traverse_one(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned flags,
|
||||
unsigned long trace_ip)
|
||||
int bch2_btree_path_traverse_one(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned flags,
|
||||
unsigned long trace_ip)
|
||||
{
|
||||
unsigned depth_want = path->level;
|
||||
int ret = -((int) trans->restarted);
|
||||
@ -1177,31 +1174,14 @@ static int btree_path_traverse_one(struct btree_trans *trans,
|
||||
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
out:
|
||||
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted)
|
||||
panic("ret %s (%i) trans->restarted %s (%i)\n",
|
||||
bch2_err_str(ret), ret,
|
||||
bch2_err_str(trans->restarted), trans->restarted);
|
||||
bch2_btree_path_verify(trans, path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned flags)
|
||||
{
|
||||
if (0 && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
|
||||
unsigned restart_probability_bits = 4 << min(trans->restart_count, 32U);
|
||||
u64 mask = ~(~0ULL << restart_probability_bits);
|
||||
|
||||
if ((prandom_u32() & mask) == mask) {
|
||||
trace_and_count(trans->c, trans_restart_injected, trans, _RET_IP_);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject);
|
||||
}
|
||||
}
|
||||
|
||||
if (path->uptodate < BTREE_ITER_NEED_RELOCK)
|
||||
return 0;
|
||||
|
||||
return bch2_trans_cond_resched(trans) ?:
|
||||
btree_path_traverse_one(trans, path, flags, _RET_IP_);
|
||||
}
|
||||
|
||||
static inline void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
|
||||
struct btree_path *src)
|
||||
{
|
||||
@ -1237,10 +1217,6 @@ struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans,
|
||||
__btree_path_put(path, intent);
|
||||
path = btree_path_clone(trans, path, intent);
|
||||
path->preserve = false;
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
path->ip_allocated = ip;
|
||||
#endif
|
||||
btree_trans_verify_sorted(trans);
|
||||
return path;
|
||||
}
|
||||
|
||||
@ -1251,14 +1227,13 @@ __bch2_btree_path_set_pos(struct btree_trans *trans,
|
||||
{
|
||||
unsigned level = path->level;
|
||||
|
||||
EBUG_ON(trans->restarted);
|
||||
bch2_trans_verify_not_in_restart(trans);
|
||||
EBUG_ON(!path->ref);
|
||||
|
||||
path = bch2_btree_path_make_mut(trans, path, intent, ip);
|
||||
|
||||
path->pos = new_pos;
|
||||
|
||||
bch2_btree_path_check_sort_fast(trans, path, cmp);
|
||||
path->pos = new_pos;
|
||||
trans->paths_sorted = false;
|
||||
|
||||
if (unlikely(path->cached)) {
|
||||
btree_node_unlock(trans, path, 0);
|
||||
@ -1381,6 +1356,21 @@ static void bch2_path_put_nokeep(struct btree_trans *trans, struct btree_path *p
|
||||
__bch2_path_free(trans, path);
|
||||
}
|
||||
|
||||
void bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count)
|
||||
{
|
||||
panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
|
||||
trans->restart_count, restart_count,
|
||||
(void *) trans->last_restarted_ip);
|
||||
}
|
||||
|
||||
void bch2_trans_in_restart_error(struct btree_trans *trans)
|
||||
{
|
||||
panic("in transaction restart: %s, last restarted by %pS\n",
|
||||
bch2_err_str(trans->restarted),
|
||||
(void *) trans->last_restarted_ip);
|
||||
}
|
||||
|
||||
noinline __cold
|
||||
void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
@ -1421,6 +1411,7 @@ void bch2_dump_trans_updates(struct btree_trans *trans)
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
noinline __cold
|
||||
void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path)
|
||||
{
|
||||
prt_printf(out, "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos ",
|
||||
@ -1432,39 +1423,59 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path)
|
||||
bch2_bpos_to_text(out, path->pos);
|
||||
|
||||
prt_printf(out, " locks %u", path->nodes_locked);
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
#ifdef TRACK_PATH_ALLOCATED
|
||||
prt_printf(out, " %pS", (void *) path->ip_allocated);
|
||||
#endif
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
|
||||
noinline __cold
|
||||
void __bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans,
|
||||
bool nosort)
|
||||
{
|
||||
struct btree_path *path;
|
||||
unsigned idx;
|
||||
|
||||
if (!nosort)
|
||||
btree_trans_sort_paths(trans);
|
||||
|
||||
trans_for_each_path_inorder(trans, path, idx)
|
||||
bch2_btree_path_to_text(out, path);
|
||||
}
|
||||
|
||||
noinline __cold
|
||||
void bch2_dump_trans_paths_updates(struct btree_trans *trans)
|
||||
void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
|
||||
{
|
||||
__bch2_trans_paths_to_text(out, trans, false);
|
||||
}
|
||||
|
||||
noinline __cold
|
||||
void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort)
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_trans_paths_to_text(&buf, trans);
|
||||
__bch2_trans_paths_to_text(&buf, trans, nosort);
|
||||
bch2_trans_updates_to_text(&buf, trans);
|
||||
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
noinline
|
||||
noinline __cold
|
||||
void bch2_dump_trans_paths_updates(struct btree_trans *trans)
|
||||
{
|
||||
__bch2_dump_trans_paths_updates(trans, false);
|
||||
}
|
||||
|
||||
noinline __cold
|
||||
static void bch2_trans_update_max_paths(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_transaction_stats *s = btree_trans_stats(trans);
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
if (!s)
|
||||
return;
|
||||
|
||||
bch2_trans_paths_to_text(&buf, trans);
|
||||
|
||||
if (!buf.allocation_failure) {
|
||||
@ -1478,6 +1489,8 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans)
|
||||
}
|
||||
|
||||
printbuf_exit(&buf);
|
||||
|
||||
trans->nr_max_paths = hweight64(trans->paths_allocated);
|
||||
}
|
||||
|
||||
static noinline void btree_path_overflow(struct btree_trans *trans)
|
||||
@ -1497,19 +1510,24 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans,
|
||||
btree_path_overflow(trans);
|
||||
|
||||
idx = __ffs64(~trans->paths_allocated);
|
||||
trans->paths_allocated |= 1ULL << idx;
|
||||
|
||||
/*
|
||||
* Do this before marking the new path as allocated, since it won't be
|
||||
* initialized yet:
|
||||
*/
|
||||
if (unlikely(idx > trans->nr_max_paths))
|
||||
bch2_trans_update_max_paths(trans);
|
||||
|
||||
path = &trans->paths[idx];
|
||||
trans->paths_allocated |= 1ULL << idx;
|
||||
|
||||
path = &trans->paths[idx];
|
||||
path->idx = idx;
|
||||
path->ref = 0;
|
||||
path->intent_ref = 0;
|
||||
path->nodes_locked = 0;
|
||||
|
||||
btree_path_list_add(trans, pos, path);
|
||||
trans->paths_sorted = false;
|
||||
return path;
|
||||
}
|
||||
|
||||
@ -1523,10 +1541,11 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
|
||||
bool intent = flags & BTREE_ITER_INTENT;
|
||||
int i;
|
||||
|
||||
EBUG_ON(trans->restarted);
|
||||
btree_trans_verify_sorted(trans);
|
||||
bch2_trans_verify_not_in_restart(trans);
|
||||
bch2_trans_verify_locks(trans);
|
||||
|
||||
btree_trans_sort_paths(trans);
|
||||
|
||||
trans_for_each_path_inorder(trans, path, i) {
|
||||
if (__btree_path_cmp(path,
|
||||
btree_id,
|
||||
@ -1559,10 +1578,10 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
|
||||
path->nodes_locked = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(path->l); i++)
|
||||
path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
#ifdef TRACK_PATH_ALLOCATED
|
||||
path->ip_allocated = ip;
|
||||
#endif
|
||||
btree_trans_verify_sorted(trans);
|
||||
trans->paths_sorted = false;
|
||||
}
|
||||
|
||||
if (!(flags & BTREE_ITER_NOPRESERVE))
|
||||
@ -1613,7 +1632,8 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *
|
||||
EBUG_ON(ck &&
|
||||
(path->btree_id != ck->key.btree_id ||
|
||||
!bkey_eq(path->pos, ck->key.pos)));
|
||||
EBUG_ON(!ck || !ck->valid);
|
||||
if (!ck || !ck->valid)
|
||||
return bkey_s_c_null;
|
||||
|
||||
*u = ck->k->k;
|
||||
k = bkey_i_to_s_c(ck->k);
|
||||
@ -1697,7 +1717,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
|
||||
struct btree *b = NULL;
|
||||
int ret;
|
||||
|
||||
BUG_ON(trans->restarted);
|
||||
bch2_trans_verify_not_in_restart(trans);
|
||||
EBUG_ON(iter->path->cached);
|
||||
bch2_btree_iter_verify(iter);
|
||||
|
||||
@ -1798,19 +1818,18 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
|
||||
enum btree_id btree_id,
|
||||
struct bpos pos)
|
||||
static noinline
|
||||
struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
struct bkey_i *ret = NULL;
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
if (i->btree_id < btree_id)
|
||||
trans_for_each_update(iter->trans, i) {
|
||||
if (i->btree_id < iter->btree_id)
|
||||
continue;
|
||||
if (i->btree_id > btree_id)
|
||||
if (i->btree_id > iter->btree_id)
|
||||
break;
|
||||
if (bpos_lt(i->k->k.p, pos))
|
||||
if (bpos_lt(i->k->k.p, iter->path->pos))
|
||||
continue;
|
||||
if (i->key_cache_already_flushed)
|
||||
continue;
|
||||
@ -1821,30 +1840,44 @@ static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter)
|
||||
{
|
||||
return iter->flags & BTREE_ITER_WITH_UPDATES
|
||||
? __bch2_btree_trans_peek_updates(iter)
|
||||
: NULL;
|
||||
}
|
||||
|
||||
struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos start_pos,
|
||||
struct bpos end_pos)
|
||||
{
|
||||
struct bkey_i *k;
|
||||
|
||||
if (bpos_lt(start_pos, iter->journal_pos))
|
||||
if (bpos_lt(iter->path->pos, iter->journal_pos))
|
||||
iter->journal_idx = 0;
|
||||
|
||||
k = bch2_journal_keys_peek_upto(trans->c, iter->btree_id,
|
||||
iter->path->level,
|
||||
start_pos, end_pos,
|
||||
iter->path->pos,
|
||||
end_pos,
|
||||
&iter->journal_idx);
|
||||
|
||||
iter->journal_pos = k ? k->k.p : end_pos;
|
||||
return k;
|
||||
}
|
||||
|
||||
struct bkey_i *bch2_btree_journal_peek_slot(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos pos)
|
||||
static noinline
|
||||
struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
return bch2_btree_journal_peek(trans, iter, pos, pos);
|
||||
struct bkey_i *k = bch2_btree_journal_peek(trans, iter, iter->path->pos);
|
||||
|
||||
if (k) {
|
||||
iter->k = k->k;
|
||||
return bkey_i_to_s_c(k);
|
||||
} else {
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
}
|
||||
|
||||
static noinline
|
||||
@ -1853,7 +1886,7 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_i *next_journal =
|
||||
bch2_btree_journal_peek(trans, iter, iter->path->pos,
|
||||
bch2_btree_journal_peek(trans, iter,
|
||||
k.k ? k.k->p : path_l(iter->path)->b->key.k.p);
|
||||
|
||||
if (next_journal) {
|
||||
@ -1869,42 +1902,46 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
|
||||
* bkey_s_c_null:
|
||||
*/
|
||||
static noinline
|
||||
struct bkey_s_c __btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
|
||||
struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey u;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
if ((iter->flags & BTREE_ITER_KEY_CACHE_FILL) &&
|
||||
bpos_eq(iter->pos, pos))
|
||||
return bkey_s_c_null;
|
||||
|
||||
if (!bch2_btree_key_cache_find(c, iter->btree_id, pos))
|
||||
return bkey_s_c_null;
|
||||
|
||||
if (!iter->key_cache_path)
|
||||
iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos,
|
||||
iter->flags & BTREE_ITER_INTENT, 0,
|
||||
iter->flags|BTREE_ITER_CACHED,
|
||||
iter->flags|BTREE_ITER_CACHED|
|
||||
BTREE_ITER_CACHED_NOFILL,
|
||||
_THIS_IP_);
|
||||
|
||||
iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos,
|
||||
iter->flags & BTREE_ITER_INTENT,
|
||||
btree_iter_ip_allocated(iter));
|
||||
|
||||
ret = bch2_btree_path_traverse(trans, iter->key_cache_path, iter->flags|BTREE_ITER_CACHED);
|
||||
ret = bch2_btree_path_traverse(trans, iter->key_cache_path,
|
||||
iter->flags|BTREE_ITER_CACHED) ?:
|
||||
bch2_btree_path_relock(trans, iter->path, _THIS_IP_);
|
||||
if (unlikely(ret))
|
||||
return bkey_s_c_err(ret);
|
||||
|
||||
btree_path_set_should_be_locked(iter->key_cache_path);
|
||||
|
||||
return bch2_btree_path_peek_slot(iter->key_cache_path, &u);
|
||||
}
|
||||
|
||||
static noinline
|
||||
struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
|
||||
{
|
||||
struct bkey_s_c ret = __btree_trans_peek_key_cache(iter, pos);
|
||||
int err = bkey_err(ret) ?: bch2_btree_path_relock(iter->trans, iter->path, _THIS_IP_);
|
||||
|
||||
return err ? bkey_s_c_err(err) : ret;
|
||||
k = bch2_btree_path_peek_slot(iter->key_cache_path, &u);
|
||||
if (k.k && !bkey_err(k)) {
|
||||
iter->k = u;
|
||||
k.k = &iter->k;
|
||||
}
|
||||
return k;
|
||||
}
|
||||
|
||||
static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
|
||||
@ -1959,9 +1996,8 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
|
||||
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
|
||||
k = btree_trans_peek_journal(trans, iter, k);
|
||||
|
||||
next_update = iter->flags & BTREE_ITER_WITH_UPDATES
|
||||
? btree_trans_peek_updates(trans, iter->btree_id, search_key)
|
||||
: NULL;
|
||||
next_update = btree_trans_peek_updates(iter);
|
||||
|
||||
if (next_update &&
|
||||
bpos_le(next_update->k.p,
|
||||
k.k ? k.k->p : l->b->key.k.p)) {
|
||||
@ -2114,8 +2150,8 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
|
||||
btree_path_set_should_be_locked(iter->path);
|
||||
out_no_locked:
|
||||
if (iter->update_path) {
|
||||
if (iter->update_path->uptodate &&
|
||||
(ret = bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_)))
|
||||
ret = bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_);
|
||||
if (unlikely(ret))
|
||||
k = bkey_s_c_err(ret);
|
||||
else
|
||||
btree_path_set_should_be_locked(iter->update_path);
|
||||
@ -2293,8 +2329,6 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
k = btree_path_level_prev(trans, iter->path,
|
||||
&iter->path->l[0], &iter->k);
|
||||
|
||||
bch2_btree_path_check_sort(trans, iter->path, 0);
|
||||
|
||||
if (likely(k.k)) {
|
||||
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) {
|
||||
if (k.k->p.snapshot == iter->snapshot)
|
||||
@ -2419,9 +2453,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
!(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
|
||||
struct bkey_i *next_update;
|
||||
|
||||
if ((iter->flags & BTREE_ITER_WITH_UPDATES) &&
|
||||
(next_update = btree_trans_peek_updates(trans,
|
||||
iter->btree_id, search_key)) &&
|
||||
if ((next_update = btree_trans_peek_updates(iter)) &&
|
||||
bpos_eq(next_update->k.p, iter->pos)) {
|
||||
iter->k = next_update->k;
|
||||
k = bkey_i_to_s_c(next_update);
|
||||
@ -2429,15 +2461,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
}
|
||||
|
||||
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
|
||||
(next_update = bch2_btree_journal_peek_slot(trans,
|
||||
iter, iter->pos))) {
|
||||
iter->k = next_update->k;
|
||||
k = bkey_i_to_s_c(next_update);
|
||||
(k = btree_trans_peek_slot_journal(trans, iter)).k)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
|
||||
(k = __btree_trans_peek_key_cache(iter, iter->pos)).k) {
|
||||
(k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
|
||||
if (!bkey_err(k))
|
||||
iter->k = *k.k;
|
||||
/* We're not returning a key from iter->path: */
|
||||
@ -2529,27 +2557,29 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
|
||||
|
||||
/* new transactional stuff: */
|
||||
|
||||
static inline void btree_path_verify_sorted_ref(struct btree_trans *trans,
|
||||
struct btree_path *path)
|
||||
{
|
||||
EBUG_ON(path->sorted_idx >= trans->nr_sorted);
|
||||
EBUG_ON(trans->sorted[path->sorted_idx] != path->idx);
|
||||
EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
|
||||
}
|
||||
|
||||
static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
static void btree_trans_verify_sorted_refs(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < trans->nr_sorted; i++)
|
||||
btree_path_verify_sorted_ref(trans, trans->paths + trans->sorted[i]);
|
||||
#endif
|
||||
BUG_ON(trans->nr_sorted != hweight64(trans->paths_allocated));
|
||||
|
||||
trans_for_each_path(trans, path) {
|
||||
BUG_ON(path->sorted_idx >= trans->nr_sorted);
|
||||
BUG_ON(trans->sorted[path->sorted_idx] != path->idx);
|
||||
}
|
||||
|
||||
for (i = 0; i < trans->nr_sorted; i++) {
|
||||
unsigned idx = trans->sorted[i];
|
||||
|
||||
EBUG_ON(!(trans->paths_allocated & (1ULL << idx)));
|
||||
BUG_ON(trans->paths[idx].sorted_idx != i);
|
||||
}
|
||||
}
|
||||
|
||||
static void btree_trans_verify_sorted(struct btree_trans *trans)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
struct btree_path *path, *prev = NULL;
|
||||
unsigned i;
|
||||
|
||||
@ -2558,80 +2588,54 @@ static void btree_trans_verify_sorted(struct btree_trans *trans)
|
||||
|
||||
trans_for_each_path_inorder(trans, path, i) {
|
||||
if (prev && btree_path_cmp(prev, path) > 0) {
|
||||
bch2_dump_trans_paths_updates(trans);
|
||||
__bch2_dump_trans_paths_updates(trans, true);
|
||||
panic("trans paths out of order!\n");
|
||||
}
|
||||
prev = path;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {}
|
||||
static inline void btree_trans_verify_sorted(struct btree_trans *trans) {}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void btree_path_swap(struct btree_trans *trans,
|
||||
struct btree_path *l, struct btree_path *r)
|
||||
void __bch2_btree_trans_sort_paths(struct btree_trans *trans)
|
||||
{
|
||||
swap(l->sorted_idx, r->sorted_idx);
|
||||
swap(trans->sorted[l->sorted_idx],
|
||||
trans->sorted[r->sorted_idx]);
|
||||
int i, l = 0, r = trans->nr_sorted, inc = 1;
|
||||
bool swapped;
|
||||
|
||||
btree_path_verify_sorted_ref(trans, l);
|
||||
btree_path_verify_sorted_ref(trans, r);
|
||||
}
|
||||
btree_trans_verify_sorted_refs(trans);
|
||||
|
||||
static inline struct btree_path *sib_btree_path(struct btree_trans *trans,
|
||||
struct btree_path *path, int sib)
|
||||
{
|
||||
unsigned idx = (unsigned) path->sorted_idx + sib;
|
||||
if (trans->paths_sorted)
|
||||
goto out;
|
||||
|
||||
EBUG_ON(sib != -1 && sib != 1);
|
||||
/*
|
||||
* Cocktail shaker sort: this is efficient because iterators will be
|
||||
* mostly sorted.
|
||||
*/
|
||||
do {
|
||||
swapped = false;
|
||||
|
||||
return idx < trans->nr_sorted
|
||||
? trans->paths + trans->sorted[idx]
|
||||
: NULL;
|
||||
}
|
||||
|
||||
static __always_inline void bch2_btree_path_check_sort_fast(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
int cmp)
|
||||
{
|
||||
struct btree_path *n;
|
||||
int cmp2;
|
||||
|
||||
EBUG_ON(!cmp);
|
||||
|
||||
while ((n = sib_btree_path(trans, path, cmp)) &&
|
||||
(cmp2 = btree_path_cmp(n, path)) &&
|
||||
cmp2 != cmp)
|
||||
btree_path_swap(trans, n, path);
|
||||
|
||||
btree_trans_verify_sorted(trans);
|
||||
}
|
||||
|
||||
inline void bch2_btree_path_check_sort(struct btree_trans *trans, struct btree_path *path,
|
||||
int cmp)
|
||||
{
|
||||
struct btree_path *n;
|
||||
|
||||
if (cmp <= 0) {
|
||||
n = prev_btree_path(trans, path);
|
||||
if (n && btree_path_cmp(n, path) > 0) {
|
||||
do {
|
||||
btree_path_swap(trans, n, path);
|
||||
n = prev_btree_path(trans, path);
|
||||
} while (n && btree_path_cmp(n, path) > 0);
|
||||
|
||||
goto out;
|
||||
for (i = inc > 0 ? l : r - 2;
|
||||
i + 1 < r && i >= l;
|
||||
i += inc) {
|
||||
if (btree_path_cmp(trans->paths + trans->sorted[i],
|
||||
trans->paths + trans->sorted[i + 1]) > 0) {
|
||||
swap(trans->sorted[i], trans->sorted[i + 1]);
|
||||
trans->paths[trans->sorted[i]].sorted_idx = i;
|
||||
trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1;
|
||||
swapped = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cmp >= 0) {
|
||||
n = next_btree_path(trans, path);
|
||||
if (n && btree_path_cmp(path, n) > 0) {
|
||||
do {
|
||||
btree_path_swap(trans, path, n);
|
||||
n = next_btree_path(trans, path);
|
||||
} while (n && btree_path_cmp(path, n) > 0);
|
||||
}
|
||||
}
|
||||
if (inc > 0)
|
||||
--r;
|
||||
else
|
||||
l++;
|
||||
inc = -inc;
|
||||
} while (swapped);
|
||||
|
||||
trans->paths_sorted = true;
|
||||
out:
|
||||
btree_trans_verify_sorted(trans);
|
||||
}
|
||||
@ -2642,15 +2646,18 @@ static inline void btree_path_list_remove(struct btree_trans *trans,
|
||||
unsigned i;
|
||||
|
||||
EBUG_ON(path->sorted_idx >= trans->nr_sorted);
|
||||
|
||||
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
trans->nr_sorted--;
|
||||
memmove_u64s_down_small(trans->sorted + path->sorted_idx,
|
||||
trans->sorted + path->sorted_idx + 1,
|
||||
DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
|
||||
#else
|
||||
array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx);
|
||||
|
||||
#endif
|
||||
for (i = path->sorted_idx; i < trans->nr_sorted; i++)
|
||||
trans->paths[trans->sorted[i]].sorted_idx = i;
|
||||
|
||||
path->sorted_idx = U8_MAX;
|
||||
|
||||
btree_trans_verify_sorted_refs(trans);
|
||||
}
|
||||
|
||||
static inline void btree_path_list_add(struct btree_trans *trans,
|
||||
@ -2659,16 +2666,17 @@ static inline void btree_path_list_add(struct btree_trans *trans,
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
btree_trans_verify_sorted_refs(trans);
|
||||
|
||||
path->sorted_idx = pos ? pos->sorted_idx + 1 : 0;
|
||||
|
||||
if (unlikely(trans->in_traverse_all) &&
|
||||
trans->traverse_all_idx != U8_MAX &&
|
||||
trans->traverse_all_idx >= path->sorted_idx)
|
||||
trans->traverse_all_idx++;
|
||||
path->sorted_idx = pos ? pos->sorted_idx + 1 : trans->nr_sorted;
|
||||
|
||||
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1,
|
||||
trans->sorted + path->sorted_idx,
|
||||
DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
|
||||
trans->nr_sorted++;
|
||||
trans->sorted[path->sorted_idx] = path->idx;
|
||||
#else
|
||||
array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx);
|
||||
#endif
|
||||
|
||||
for (i = path->sorted_idx; i < trans->nr_sorted; i++)
|
||||
trans->paths[trans->sorted[i]].sorted_idx = i;
|
||||
@ -2812,14 +2820,6 @@ u32 bch2_trans_begin(struct btree_trans *trans)
|
||||
trans->restart_count++;
|
||||
trans->mem_top = 0;
|
||||
|
||||
if (trans->fs_usage_deltas) {
|
||||
trans->fs_usage_deltas->used = 0;
|
||||
memset((void *) trans->fs_usage_deltas +
|
||||
offsetof(struct replicas_delta_list, memset_start), 0,
|
||||
(void *) &trans->fs_usage_deltas->memset_end -
|
||||
(void *) &trans->fs_usage_deltas->memset_start);
|
||||
}
|
||||
|
||||
trans_for_each_path(trans, path) {
|
||||
path->should_be_locked = false;
|
||||
|
||||
@ -2850,25 +2850,19 @@ u32 bch2_trans_begin(struct btree_trans *trans)
|
||||
bch2_trans_relock(trans);
|
||||
}
|
||||
|
||||
if (unlikely(time_after(jiffies, trans->srcu_lock_time + HZ)))
|
||||
if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
|
||||
bch2_trans_reset_srcu_lock(trans);
|
||||
|
||||
trans->last_restarted_ip = _RET_IP_;
|
||||
if (trans->restarted)
|
||||
if (trans->restarted) {
|
||||
bch2_btree_path_traverse_all(trans);
|
||||
trans->notrace_relock_fail = false;
|
||||
}
|
||||
|
||||
trans->last_begin_time = local_clock();
|
||||
return trans->restart_count;
|
||||
}
|
||||
|
||||
void bch2_trans_verify_not_restarted(struct btree_trans *trans, u32 restart_count)
|
||||
{
|
||||
if (trans_was_restarted(trans, restart_count))
|
||||
panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
|
||||
trans->restart_count, restart_count,
|
||||
(void *) trans->last_restarted_ip);
|
||||
}
|
||||
|
||||
static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
|
||||
{
|
||||
size_t paths_bytes = sizeof(struct btree_path) * BTREE_ITER_MAX;
|
||||
@ -2908,7 +2902,6 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
|
||||
__acquires(&c->btree_trans_barrier)
|
||||
{
|
||||
struct btree_transaction_stats *s;
|
||||
struct btree_trans *pos;
|
||||
|
||||
BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
|
||||
|
||||
@ -2944,16 +2937,20 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
|
||||
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
trans->srcu_lock_time = jiffies;
|
||||
|
||||
mutex_lock(&c->btree_trans_lock);
|
||||
list_for_each_entry(pos, &c->btree_trans_list, list) {
|
||||
if (trans->locking_wait.task->pid < pos->locking_wait.task->pid) {
|
||||
list_add_tail(&trans->list, &pos->list);
|
||||
goto list_add_done;
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
|
||||
struct btree_trans *pos;
|
||||
|
||||
mutex_lock(&c->btree_trans_lock);
|
||||
list_for_each_entry(pos, &c->btree_trans_list, list) {
|
||||
if (trans->locking_wait.task->pid < pos->locking_wait.task->pid) {
|
||||
list_add_tail(&trans->list, &pos->list);
|
||||
goto list_add_done;
|
||||
}
|
||||
}
|
||||
}
|
||||
list_add_tail(&trans->list, &c->btree_trans_list);
|
||||
list_add_tail(&trans->list, &c->btree_trans_list);
|
||||
list_add_done:
|
||||
mutex_unlock(&c->btree_trans_lock);
|
||||
mutex_unlock(&c->btree_trans_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_btree_paths_leaked(struct btree_trans *trans)
|
||||
@ -2998,9 +2995,11 @@ void bch2_trans_exit(struct btree_trans *trans)
|
||||
|
||||
check_btree_paths_leaked(trans);
|
||||
|
||||
mutex_lock(&c->btree_trans_lock);
|
||||
list_del(&trans->list);
|
||||
mutex_unlock(&c->btree_trans_lock);
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
|
||||
mutex_lock(&c->btree_trans_lock);
|
||||
list_del(&trans->list);
|
||||
mutex_unlock(&c->btree_trans_lock);
|
||||
}
|
||||
|
||||
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
|
||||
|
||||
@ -3098,7 +3097,9 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
|
||||
|
||||
b = READ_ONCE(trans->locking);
|
||||
if (b) {
|
||||
prt_str(out, " want");
|
||||
prt_printf(out, " blocked for %lluus on",
|
||||
div_u64(local_clock() - trans->locking_wait.start_time,
|
||||
1000));
|
||||
prt_newline(out);
|
||||
prt_printf(out, " %c", lock_types[trans->locking_wait.lock_want]);
|
||||
bch2_btree_bkey_cached_common_to_text(out, b);
|
||||
@ -3112,8 +3113,10 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
|
||||
|
||||
for (s = c->btree_transaction_stats;
|
||||
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
|
||||
s++)
|
||||
s++) {
|
||||
kfree(s->max_paths_text);
|
||||
bch2_time_stats_exit(&s->lock_hold_times);
|
||||
}
|
||||
|
||||
if (c->btree_trans_barrier_initialized)
|
||||
cleanup_srcu_struct(&c->btree_trans_barrier);
|
||||
@ -3123,11 +3126,16 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
|
||||
|
||||
int bch2_fs_btree_iter_init(struct bch_fs *c)
|
||||
{
|
||||
unsigned i, nr = BTREE_ITER_MAX;
|
||||
struct btree_transaction_stats *s;
|
||||
unsigned nr = BTREE_ITER_MAX;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c->btree_transaction_stats); i++)
|
||||
mutex_init(&c->btree_transaction_stats[i].lock);
|
||||
for (s = c->btree_transaction_stats;
|
||||
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
|
||||
s++) {
|
||||
bch2_time_stats_init(&s->lock_hold_times);
|
||||
mutex_init(&s->lock);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&c->btree_trans_list);
|
||||
mutex_init(&c->btree_trans_lock);
|
||||
|
@ -54,6 +54,16 @@ static inline struct btree *btree_node_parent(struct btree_path *path,
|
||||
|
||||
/* Iterate over paths within a transaction: */
|
||||
|
||||
void __bch2_btree_trans_sort_paths(struct btree_trans *);
|
||||
|
||||
static inline void btree_trans_sort_paths(struct btree_trans *trans)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
|
||||
trans->paths_sorted)
|
||||
return;
|
||||
__bch2_btree_trans_sort_paths(trans);
|
||||
}
|
||||
|
||||
static inline struct btree_path *
|
||||
__trans_next_path(struct btree_trans *trans, unsigned idx)
|
||||
{
|
||||
@ -72,8 +82,6 @@ __trans_next_path(struct btree_trans *trans, unsigned idx)
|
||||
return &trans->paths[idx];
|
||||
}
|
||||
|
||||
void bch2_btree_path_check_sort(struct btree_trans *, struct btree_path *, int);
|
||||
|
||||
#define trans_for_each_path_from(_trans, _path, _start) \
|
||||
for (_path = __trans_next_path((_trans), _start); \
|
||||
(_path); \
|
||||
@ -95,9 +103,10 @@ static inline struct btree_path *next_btree_path(struct btree_trans *trans, stru
|
||||
|
||||
static inline struct btree_path *prev_btree_path(struct btree_trans *trans, struct btree_path *path)
|
||||
{
|
||||
EBUG_ON(path->sorted_idx >= trans->nr_sorted);
|
||||
return path->sorted_idx
|
||||
? trans->paths + trans->sorted[path->sorted_idx - 1]
|
||||
unsigned idx = path ? path->sorted_idx : trans->nr_sorted;
|
||||
|
||||
return idx
|
||||
? trans->paths + trans->sorted[idx - 1]
|
||||
: NULL;
|
||||
}
|
||||
|
||||
@ -106,6 +115,11 @@ static inline struct btree_path *prev_btree_path(struct btree_trans *trans, stru
|
||||
((_path) = (_trans)->paths + trans->sorted[_i]), (_i) < (_trans)->nr_sorted;\
|
||||
_i++)
|
||||
|
||||
#define trans_for_each_path_inorder_reverse(_trans, _path, _i) \
|
||||
for (_i = trans->nr_sorted - 1; \
|
||||
((_path) = (_trans)->paths + trans->sorted[_i]), (_i) >= 0;\
|
||||
--_i)
|
||||
|
||||
static inline bool __path_has_node(const struct btree_path *path,
|
||||
const struct btree *b)
|
||||
{
|
||||
@ -161,6 +175,18 @@ bch2_btree_path_set_pos(struct btree_trans *trans,
|
||||
: path;
|
||||
}
|
||||
|
||||
int __must_check bch2_btree_path_traverse_one(struct btree_trans *, struct btree_path *,
|
||||
unsigned, unsigned long);
|
||||
|
||||
static inline int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned flags)
|
||||
{
|
||||
if (path->uptodate < BTREE_ITER_NEED_RELOCK)
|
||||
return 0;
|
||||
|
||||
return bch2_btree_path_traverse_one(trans, path, flags, _RET_IP_);
|
||||
}
|
||||
|
||||
int __must_check bch2_btree_path_traverse(struct btree_trans *,
|
||||
struct btree_path *, unsigned);
|
||||
struct btree_path *bch2_path_get(struct btree_trans *, enum btree_id, struct bpos,
|
||||
@ -193,6 +219,7 @@ int bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *);
|
||||
void bch2_path_put(struct btree_trans *, struct btree_path *, bool);
|
||||
|
||||
int bch2_trans_relock(struct btree_trans *);
|
||||
int bch2_trans_relock_notrace(struct btree_trans *);
|
||||
void bch2_trans_unlock(struct btree_trans *);
|
||||
bool bch2_trans_locked(struct btree_trans *);
|
||||
|
||||
@ -201,7 +228,22 @@ static inline bool trans_was_restarted(struct btree_trans *trans, u32 restart_co
|
||||
return restart_count != trans->restart_count;
|
||||
}
|
||||
|
||||
void bch2_trans_verify_not_restarted(struct btree_trans *, u32);
|
||||
void bch2_trans_restart_error(struct btree_trans *, u32);
|
||||
|
||||
static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans,
|
||||
u32 restart_count)
|
||||
{
|
||||
if (trans_was_restarted(trans, restart_count))
|
||||
bch2_trans_restart_error(trans, restart_count);
|
||||
}
|
||||
|
||||
void bch2_trans_in_restart_error(struct btree_trans *);
|
||||
|
||||
static inline void bch2_trans_verify_not_in_restart(struct btree_trans *trans)
|
||||
{
|
||||
if (trans->restarted)
|
||||
bch2_trans_in_restart_error(trans);
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int err)
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "journal_reclaim.h"
|
||||
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/seq_buf.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static inline bool btree_uses_pcpu_readers(enum btree_id id)
|
||||
@ -56,13 +57,12 @@ static bool bkey_cached_lock_for_evict(struct bkey_cached *ck)
|
||||
if (!six_trylock_intent(&ck->c.lock))
|
||||
return false;
|
||||
|
||||
if (!six_trylock_write(&ck->c.lock)) {
|
||||
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||
six_unlock_intent(&ck->c.lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||
six_unlock_write(&ck->c.lock);
|
||||
if (!six_trylock_write(&ck->c.lock)) {
|
||||
six_unlock_intent(&ck->c.lock);
|
||||
return false;
|
||||
}
|
||||
@ -197,6 +197,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bkey_cached *ck = NULL;
|
||||
bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id);
|
||||
int ret;
|
||||
|
||||
if (!pcpu_readers) {
|
||||
#ifdef __KERNEL__
|
||||
@ -244,7 +245,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
|
||||
if (ck) {
|
||||
int ret;
|
||||
|
||||
ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent);
|
||||
ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent, _THIS_IP_);
|
||||
if (unlikely(ret)) {
|
||||
bkey_cached_move_to_freelist(bc, ck);
|
||||
return ERR_PTR(ret);
|
||||
@ -264,22 +265,33 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
|
||||
return ck;
|
||||
}
|
||||
|
||||
/* GFP_NOFS because we're holding btree locks: */
|
||||
ck = kmem_cache_alloc(bch2_key_cache, GFP_NOFS|__GFP_ZERO);
|
||||
if (likely(ck)) {
|
||||
INIT_LIST_HEAD(&ck->list);
|
||||
__six_lock_init(&ck->c.lock, "b->c.lock", &bch2_btree_node_lock_key);
|
||||
if (pcpu_readers)
|
||||
six_lock_pcpu_alloc(&ck->c.lock);
|
||||
ck = kmem_cache_zalloc(bch2_key_cache, GFP_NOWAIT|__GFP_NOWARN);
|
||||
if (likely(ck))
|
||||
goto init;
|
||||
|
||||
ck->c.cached = true;
|
||||
BUG_ON(!six_trylock_intent(&ck->c.lock));
|
||||
BUG_ON(!six_trylock_write(&ck->c.lock));
|
||||
*was_new = true;
|
||||
return ck;
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
ck = kmem_cache_zalloc(bch2_key_cache, GFP_KERNEL);
|
||||
|
||||
ret = bch2_trans_relock(trans);
|
||||
if (ret) {
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
if (!ck)
|
||||
return NULL;
|
||||
init:
|
||||
INIT_LIST_HEAD(&ck->list);
|
||||
__six_lock_init(&ck->c.lock, "b->c.lock", &bch2_btree_node_lock_key);
|
||||
if (pcpu_readers)
|
||||
six_lock_pcpu_alloc(&ck->c.lock);
|
||||
|
||||
ck->c.cached = true;
|
||||
BUG_ON(!six_trylock_intent(&ck->c.lock));
|
||||
BUG_ON(!six_trylock_write(&ck->c.lock));
|
||||
*was_new = true;
|
||||
return ck;
|
||||
}
|
||||
|
||||
static struct bkey_cached *
|
||||
@ -369,24 +381,23 @@ static int btree_key_cache_fill(struct btree_trans *trans,
|
||||
struct btree_path *ck_path,
|
||||
struct bkey_cached *ck)
|
||||
{
|
||||
struct btree_path *path;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
unsigned new_u64s = 0;
|
||||
struct bkey_i *new_k = NULL;
|
||||
struct bkey u;
|
||||
int ret;
|
||||
|
||||
path = bch2_path_get(trans, ck->key.btree_id,
|
||||
ck->key.pos, 0, 0, 0, _THIS_IP_);
|
||||
ret = bch2_btree_path_traverse(trans, path, 0);
|
||||
bch2_trans_iter_init(trans, &iter, ck->key.btree_id, ck->key.pos,
|
||||
BTREE_ITER_KEY_CACHE_FILL|
|
||||
BTREE_ITER_CACHED_NOFILL);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
k = bch2_btree_path_peek_slot(path, &u);
|
||||
|
||||
if (!bch2_btree_node_relock(trans, ck_path, 0)) {
|
||||
trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -405,12 +416,30 @@ static int btree_key_cache_fill(struct btree_trans *trans,
|
||||
|
||||
if (new_u64s > ck->u64s) {
|
||||
new_u64s = roundup_pow_of_two(new_u64s);
|
||||
new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS);
|
||||
new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN);
|
||||
if (!new_k) {
|
||||
bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
|
||||
bch2_btree_ids[ck->key.btree_id], new_u64s);
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
|
||||
if (!new_k) {
|
||||
bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
|
||||
bch2_btree_ids[ck->key.btree_id], new_u64s);
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!bch2_btree_node_relock(trans, ck_path, 0)) {
|
||||
kfree(new_k);
|
||||
trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_trans_relock(trans);
|
||||
if (ret) {
|
||||
kfree(new_k);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -431,9 +460,9 @@ static int btree_key_cache_fill(struct btree_trans *trans,
|
||||
bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b);
|
||||
|
||||
/* We're not likely to need this iterator again: */
|
||||
path->preserve = false;
|
||||
set_btree_iter_dontneed(&iter);
|
||||
err:
|
||||
bch2_path_put(trans, path, 0);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -449,7 +478,7 @@ bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree
|
||||
|
||||
path->l[1].b = NULL;
|
||||
|
||||
if (bch2_btree_node_relock(trans, path, 0)) {
|
||||
if (bch2_btree_node_relock_notrace(trans, path, 0)) {
|
||||
ck = (void *) path->l[0].b;
|
||||
goto fill;
|
||||
}
|
||||
@ -487,7 +516,9 @@ retry:
|
||||
path->l[0].lock_seq = ck->c.lock.state.seq;
|
||||
path->l[0].b = (void *) ck;
|
||||
fill:
|
||||
if (!ck->valid) {
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
|
||||
if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) {
|
||||
/*
|
||||
* Using the underscore version because we haven't set
|
||||
* path->uptodate yet:
|
||||
@ -502,17 +533,23 @@ fill:
|
||||
ret = btree_key_cache_fill(trans, path, ck);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_btree_path_relock(trans, path, _THIS_IP_);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
}
|
||||
|
||||
if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
|
||||
set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
|
||||
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
BUG_ON(!ck->valid);
|
||||
BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
|
||||
BUG_ON(path->uptodate);
|
||||
|
||||
return ret;
|
||||
err:
|
||||
path->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
|
||||
btree_node_unlock(trans, path, 0);
|
||||
path->l[0].b = ERR_PTR(ret);
|
||||
@ -531,7 +568,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path
|
||||
|
||||
path->l[1].b = NULL;
|
||||
|
||||
if (bch2_btree_node_relock(trans, path, 0)) {
|
||||
if (bch2_btree_node_relock_notrace(trans, path, 0)) {
|
||||
ck = (void *) path->l[0].b;
|
||||
goto fill;
|
||||
}
|
||||
@ -696,6 +733,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
|
||||
six_unlock_read(&ck->c.lock);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (ck->seq != seq) {
|
||||
bch2_journal_pin_update(&c->journal, ck->seq, &ck->journal,
|
||||
bch2_btree_key_cache_journal_flush);
|
||||
six_unlock_read(&ck->c.lock);
|
||||
goto unlock;
|
||||
}
|
||||
six_unlock_read(&ck->c.lock);
|
||||
|
||||
ret = commit_do(&trans, NULL, NULL, 0,
|
||||
@ -725,6 +769,7 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
unsigned flags,
|
||||
struct btree_path *path,
|
||||
struct bkey_i *insert)
|
||||
{
|
||||
@ -734,7 +779,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
|
||||
BUG_ON(insert->u64s > ck->u64s);
|
||||
|
||||
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
||||
if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
||||
int difference;
|
||||
|
||||
BUG_ON(jset_u64s(insert->u64s) > trans->journal_preres.u64s);
|
||||
@ -757,8 +802,9 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
kick_reclaim = true;
|
||||
}
|
||||
|
||||
bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
|
||||
&ck->journal, bch2_btree_key_cache_journal_flush);
|
||||
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
|
||||
&ck->journal, bch2_btree_key_cache_journal_flush);
|
||||
ck->seq = trans->journal_res.seq;
|
||||
|
||||
if (kick_reclaim)
|
||||
journal_reclaim_kick(&c->journal);
|
||||
@ -978,12 +1024,16 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
|
||||
INIT_LIST_HEAD(&c->freed_nonpcpu);
|
||||
}
|
||||
|
||||
static void bch2_btree_key_cache_shrinker_to_text(struct printbuf *out, struct shrinker *shrink)
|
||||
static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
|
||||
{
|
||||
struct btree_key_cache *bc =
|
||||
container_of(shrink, struct btree_key_cache, shrink);
|
||||
char *cbuf;
|
||||
size_t buflen = seq_buf_get_buf(s, &cbuf);
|
||||
struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
|
||||
|
||||
bch2_btree_key_cache_to_text(out, bc);
|
||||
bch2_btree_key_cache_to_text(&out, bc);
|
||||
seq_buf_commit(s, out.pos);
|
||||
}
|
||||
|
||||
int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
|
||||
|
@ -29,7 +29,7 @@ bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos);
|
||||
int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *,
|
||||
unsigned);
|
||||
|
||||
bool bch2_btree_insert_key_cached(struct btree_trans *,
|
||||
bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned,
|
||||
struct btree_path *, struct bkey_i *);
|
||||
int bch2_btree_key_cache_flush(struct btree_trans *,
|
||||
enum btree_id, struct bpos);
|
||||
|
@ -99,6 +99,12 @@ static void lock_graph_up(struct lock_graph *g)
|
||||
closure_put(&g->g[--g->nr].trans->ref);
|
||||
}
|
||||
|
||||
static noinline void lock_graph_pop_all(struct lock_graph *g)
|
||||
{
|
||||
while (g->nr)
|
||||
lock_graph_up(g);
|
||||
}
|
||||
|
||||
static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
|
||||
{
|
||||
closure_get(&trans->ref);
|
||||
@ -274,7 +280,25 @@ next:
|
||||
b = &READ_ONCE(path->l[top->level].b)->c;
|
||||
|
||||
if (IS_ERR_OR_NULL(b)) {
|
||||
BUG_ON(!lock_graph_remove_non_waiters(&g));
|
||||
/*
|
||||
* If we get here, it means we raced with the
|
||||
* other thread updating its btree_path
|
||||
* structures - which means it can't be blocked
|
||||
* waiting on a lock:
|
||||
*/
|
||||
if (!lock_graph_remove_non_waiters(&g)) {
|
||||
/*
|
||||
* If lock_graph_remove_non_waiters()
|
||||
* didn't do anything, it must be
|
||||
* because we're being called by debugfs
|
||||
* checking for lock cycles, which
|
||||
* invokes us on btree_transactions that
|
||||
* aren't actually waiting on anything.
|
||||
* Just bail out:
|
||||
*/
|
||||
lock_graph_pop_all(&g);
|
||||
}
|
||||
|
||||
goto next;
|
||||
}
|
||||
|
||||
@ -335,7 +359,8 @@ int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *p
|
||||
* locked:
|
||||
*/
|
||||
six_lock_readers_add(&b->lock, -readers);
|
||||
ret = __btree_node_lock_nopath(trans, b, SIX_LOCK_write, lock_may_not_fail);
|
||||
ret = __btree_node_lock_nopath(trans, b, SIX_LOCK_write,
|
||||
lock_may_not_fail, _RET_IP_);
|
||||
six_lock_readers_add(&b->lock, readers);
|
||||
|
||||
if (ret)
|
||||
@ -407,7 +432,7 @@ bool __bch2_btree_node_relock(struct btree_trans *trans,
|
||||
return true;
|
||||
}
|
||||
fail:
|
||||
if (trace)
|
||||
if (trace && !trans->notrace_relock_fail)
|
||||
trace_and_count(trans->c, btree_path_relock_fail, trans, _RET_IP_, path, level);
|
||||
return false;
|
||||
}
|
||||
@ -504,6 +529,17 @@ bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
|
||||
return btree_path_get_locks(trans, path, false);
|
||||
}
|
||||
|
||||
int __bch2_btree_path_relock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned long trace_ip)
|
||||
{
|
||||
if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
|
||||
trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__flatten
|
||||
bool bch2_btree_path_upgrade_norestart(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned long trace_ip)
|
||||
@ -615,6 +651,21 @@ int bch2_trans_relock(struct btree_trans *trans)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_trans_relock_notrace(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
if (unlikely(trans->restarted))
|
||||
return -((int) trans->restarted);
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
if (path->should_be_locked &&
|
||||
!bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) {
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_trans_unlock(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
@ -191,7 +191,8 @@ int bch2_six_check_for_deadlock(struct six_lock *lock, void *p);
|
||||
static inline int __btree_node_lock_nopath(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b,
|
||||
enum six_lock_type type,
|
||||
bool lock_may_not_fail)
|
||||
bool lock_may_not_fail,
|
||||
unsigned long ip)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -199,8 +200,8 @@ static inline int __btree_node_lock_nopath(struct btree_trans *trans,
|
||||
trans->lock_must_abort = false;
|
||||
trans->locking = b;
|
||||
|
||||
ret = six_lock_type_waiter(&b->lock, type, &trans->locking_wait,
|
||||
bch2_six_check_for_deadlock, trans);
|
||||
ret = six_lock_type_ip_waiter(&b->lock, type, &trans->locking_wait,
|
||||
bch2_six_check_for_deadlock, trans, ip);
|
||||
WRITE_ONCE(trans->locking, NULL);
|
||||
WRITE_ONCE(trans->locking_wait.start_time, 0);
|
||||
return ret;
|
||||
@ -209,16 +210,17 @@ static inline int __btree_node_lock_nopath(struct btree_trans *trans,
|
||||
static inline int __must_check
|
||||
btree_node_lock_nopath(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b,
|
||||
enum six_lock_type type)
|
||||
enum six_lock_type type,
|
||||
unsigned long ip)
|
||||
{
|
||||
return __btree_node_lock_nopath(trans, b, type, false);
|
||||
return __btree_node_lock_nopath(trans, b, type, false, ip);
|
||||
}
|
||||
|
||||
static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b,
|
||||
enum six_lock_type type)
|
||||
{
|
||||
int ret = __btree_node_lock_nopath(trans, b, type, true);
|
||||
int ret = __btree_node_lock_nopath(trans, b, type, true, _THIS_IP_);
|
||||
|
||||
BUG_ON(ret);
|
||||
}
|
||||
@ -258,7 +260,7 @@ static inline int btree_node_lock(struct btree_trans *trans,
|
||||
|
||||
if (likely(six_trylock_type(&b->lock, type)) ||
|
||||
btree_node_lock_increment(trans, b, level, type) ||
|
||||
!(ret = btree_node_lock_nopath(trans, b, type))) {
|
||||
!(ret = btree_node_lock_nopath(trans, b, type, btree_path_ip_allocated(path)))) {
|
||||
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
|
||||
path->l[b->level].lock_taken_time = local_clock();
|
||||
#endif
|
||||
@ -312,6 +314,17 @@ bch2_btree_node_lock_write(struct btree_trans *trans,
|
||||
|
||||
bool bch2_btree_path_relock_norestart(struct btree_trans *,
|
||||
struct btree_path *, unsigned long);
|
||||
int __bch2_btree_path_relock(struct btree_trans *,
|
||||
struct btree_path *, unsigned long);
|
||||
|
||||
static inline int bch2_btree_path_relock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned long trace_ip)
|
||||
{
|
||||
return btree_node_locked(path, path->level)
|
||||
? 0
|
||||
: __bch2_btree_path_relock(trans, path, trace_ip);
|
||||
}
|
||||
|
||||
bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned, bool trace);
|
||||
|
||||
static inline bool bch2_btree_node_relock(struct btree_trans *trans,
|
||||
@ -338,17 +351,6 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
|
||||
__bch2_btree_node_relock(trans, path, level, false));
|
||||
}
|
||||
|
||||
static inline int bch2_btree_path_relock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned long trace_ip)
|
||||
{
|
||||
if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
|
||||
trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* upgrade */
|
||||
|
||||
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "buckets_types.h"
|
||||
#include "darray.h"
|
||||
#include "journal_types.h"
|
||||
#include "replicas_types.h"
|
||||
|
||||
struct open_bucket;
|
||||
struct btree_update;
|
||||
@ -217,6 +218,8 @@ struct btree_node_iter {
|
||||
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 11)
|
||||
#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 12)
|
||||
#define BTREE_ITER_NOPRESERVE (1 << 13)
|
||||
#define BTREE_ITER_CACHED_NOFILL (1 << 14)
|
||||
#define BTREE_ITER_KEY_CACHE_FILL (1 << 15)
|
||||
|
||||
enum btree_path_uptodate {
|
||||
BTREE_ITER_UPTODATE = 0,
|
||||
@ -224,6 +227,10 @@ enum btree_path_uptodate {
|
||||
BTREE_ITER_NEED_TRAVERSE = 2,
|
||||
};
|
||||
|
||||
#if defined(CONFIG_BCACHEFS_LOCK_TIME_STATS) || defined(CONFIG_BCACHEFS_DEBUG)
|
||||
#define TRACK_PATH_ALLOCATED
|
||||
#endif
|
||||
|
||||
struct btree_path {
|
||||
u8 idx;
|
||||
u8 sorted_idx;
|
||||
@ -254,7 +261,7 @@ struct btree_path {
|
||||
u64 lock_taken_time;
|
||||
#endif
|
||||
} l[BTREE_MAX_DEPTH];
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
#ifdef TRACK_PATH_ALLOCATED
|
||||
unsigned long ip_allocated;
|
||||
#endif
|
||||
};
|
||||
@ -264,6 +271,15 @@ static inline struct btree_path_level *path_l(struct btree_path *path)
|
||||
return path->l + path->level;
|
||||
}
|
||||
|
||||
static inline unsigned long btree_path_ip_allocated(struct btree_path *path)
|
||||
{
|
||||
#ifdef TRACK_PATH_ALLOCATED
|
||||
return path->ip_allocated;
|
||||
#else
|
||||
return _THIS_IP_;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* @pos - iterator's current position
|
||||
* @level - current btree depth
|
||||
@ -297,7 +313,7 @@ struct btree_iter {
|
||||
/* BTREE_ITER_WITH_JOURNAL: */
|
||||
size_t journal_idx;
|
||||
struct bpos journal_pos;
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
#ifdef TRACK_PATH_ALLOCATED
|
||||
unsigned long ip_allocated;
|
||||
#endif
|
||||
};
|
||||
@ -344,6 +360,7 @@ struct bkey_cached {
|
||||
|
||||
struct journal_preres res;
|
||||
struct journal_entry_pin journal;
|
||||
u64 seq;
|
||||
|
||||
struct bkey_i *k;
|
||||
};
|
||||
@ -412,12 +429,14 @@ struct btree_trans {
|
||||
u8 fn_idx;
|
||||
u8 nr_sorted;
|
||||
u8 nr_updates;
|
||||
u8 traverse_all_idx;
|
||||
bool used_mempool:1;
|
||||
bool in_traverse_all:1;
|
||||
bool paths_sorted:1;
|
||||
bool memory_allocation_failure:1;
|
||||
bool is_initial_gc:1;
|
||||
bool journal_transaction_names:1;
|
||||
bool journal_replay_not_finished:1;
|
||||
bool is_initial_gc:1;
|
||||
bool notrace_relock_fail:1;
|
||||
enum bch_errcode restarted:16;
|
||||
u32 restart_count;
|
||||
unsigned long last_restarted_ip;
|
||||
@ -437,7 +456,7 @@ struct btree_trans {
|
||||
unsigned mem_bytes;
|
||||
void *mem;
|
||||
|
||||
u8 sorted[BTREE_ITER_MAX];
|
||||
u8 sorted[BTREE_ITER_MAX + 8];
|
||||
struct btree_path *paths;
|
||||
struct btree_insert_entry *updates;
|
||||
|
||||
@ -450,7 +469,6 @@ struct btree_trans {
|
||||
struct journal_preres journal_preres;
|
||||
u64 *journal_seq;
|
||||
struct disk_reservation *disk_res;
|
||||
unsigned flags;
|
||||
unsigned journal_u64s;
|
||||
unsigned journal_preres_u64s;
|
||||
struct replicas_delta_list *fs_usage_deltas;
|
||||
|
@ -80,7 +80,7 @@ int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *,
|
||||
|
||||
void bch2_trans_commit_hook(struct btree_trans *,
|
||||
struct btree_trans_commit_hook *);
|
||||
int __bch2_trans_commit(struct btree_trans *);
|
||||
int __bch2_trans_commit(struct btree_trans *, unsigned);
|
||||
|
||||
int bch2_trans_log_msg(struct btree_trans *, const char *, ...);
|
||||
int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
|
||||
@ -101,9 +101,8 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
|
||||
{
|
||||
trans->disk_res = disk_res;
|
||||
trans->journal_seq = journal_seq;
|
||||
trans->flags = flags;
|
||||
|
||||
return __bch2_trans_commit(trans);
|
||||
return __bch2_trans_commit(trans, flags);
|
||||
}
|
||||
|
||||
#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \
|
||||
@ -154,6 +153,14 @@ static inline void bch2_trans_reset_updates(struct btree_trans *trans)
|
||||
trans->nr_updates = 0;
|
||||
trans->hooks = NULL;
|
||||
trans->extra_journal_entries.nr = 0;
|
||||
|
||||
if (trans->fs_usage_deltas) {
|
||||
trans->fs_usage_deltas->used = 0;
|
||||
memset((void *) trans->fs_usage_deltas +
|
||||
offsetof(struct replicas_delta_list, memset_start), 0,
|
||||
(void *) &trans->fs_usage_deltas->memset_end -
|
||||
(void *) &trans->fs_usage_deltas->memset_start);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_UPDATE_H */
|
||||
|
@ -2032,7 +2032,7 @@ void async_btree_node_rewrite_work(struct work_struct *work)
|
||||
|
||||
bch2_trans_do(c, NULL, NULL, 0,
|
||||
async_btree_node_rewrite_trans(&trans, a));
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
|
||||
kfree(a);
|
||||
}
|
||||
|
||||
@ -2040,12 +2040,12 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct async_btree_rewrite *a;
|
||||
|
||||
if (!percpu_ref_tryget_live(&c->writes))
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite))
|
||||
return;
|
||||
|
||||
a = kmalloc(sizeof(*a), GFP_NOFS);
|
||||
if (!a) {
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2102,7 +2102,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
|
||||
|
||||
btree_path_set_level_up(trans, iter2.path);
|
||||
|
||||
bch2_btree_path_check_sort(trans, iter2.path, 0);
|
||||
trans->paths_sorted = false;
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter2) ?:
|
||||
bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_NORUN);
|
||||
|
@ -24,12 +24,28 @@
|
||||
#include <linux/sort.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
/*
|
||||
* bch2_btree_path_peek_slot() for a cached iterator might return a key in a
|
||||
* different snapshot:
|
||||
*/
|
||||
struct bkey_s_c bch2_btree_path_peek_slot_exact(struct btree_path *path, struct bkey *u)
|
||||
{
|
||||
struct bkey_s_c k = bch2_btree_path_peek_slot(path, u);
|
||||
|
||||
if (k.k && bpos_eq(path->pos, k.k->p))
|
||||
return k;
|
||||
|
||||
bkey_init(u);
|
||||
u->p = path->pos;
|
||||
return (struct bkey_s_c) { u, NULL };
|
||||
}
|
||||
|
||||
static void verify_update_old_key(struct btree_trans *trans, struct btree_insert_entry *i)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey u;
|
||||
struct bkey_s_c k = bch2_btree_path_peek_slot(i->path, &u);
|
||||
struct bkey_s_c k = bch2_btree_path_peek_slot_exact(i->path, &u);
|
||||
|
||||
if (unlikely(trans->journal_replay_not_finished)) {
|
||||
struct bkey_i *j_k =
|
||||
@ -314,17 +330,15 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s,
|
||||
}
|
||||
|
||||
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
|
||||
unsigned flags)
|
||||
unsigned flags)
|
||||
{
|
||||
return bch2_journal_res_get(&trans->c->journal, &trans->journal_res,
|
||||
trans->journal_u64s,
|
||||
flags|
|
||||
(trans->flags & JOURNAL_WATERMARK_MASK));
|
||||
trans->journal_u64s, flags);
|
||||
}
|
||||
|
||||
#define JSET_ENTRY_LOG_U64s 4
|
||||
|
||||
static void journal_transaction_name(struct btree_trans *trans)
|
||||
static noinline void journal_transaction_name(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct journal *j = &c->journal;
|
||||
@ -349,9 +363,8 @@ static inline int btree_key_can_insert(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btree_key_can_insert_cached(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned u64s)
|
||||
static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags,
|
||||
struct btree_path *path, unsigned u64s)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_cached *ck = (void *) path->l[0].b;
|
||||
@ -363,7 +376,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans,
|
||||
|
||||
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
|
||||
bch2_btree_key_cache_must_wait(c) &&
|
||||
!(trans->flags & BTREE_INSERT_JOURNAL_RECLAIM))
|
||||
!(flags & BTREE_INSERT_JOURNAL_RECLAIM))
|
||||
return -BCH_ERR_btree_insert_need_journal_reclaim;
|
||||
|
||||
/*
|
||||
@ -573,7 +586,7 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans)
|
||||
}
|
||||
|
||||
static inline int
|
||||
bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
struct btree_insert_entry **stopped_at,
|
||||
unsigned long trace_ip)
|
||||
{
|
||||
@ -613,7 +626,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
u64s += i->k->k.u64s;
|
||||
ret = !i->cached
|
||||
? btree_key_can_insert(trans, insert_l(i)->b, u64s)
|
||||
: btree_key_can_insert_cached(trans, i->path, u64s);
|
||||
: btree_key_can_insert_cached(trans, flags, i->path, u64s);
|
||||
if (ret) {
|
||||
*stopped_at = i;
|
||||
return ret;
|
||||
@ -627,13 +640,15 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
* Don't get journal reservation until after we know insert will
|
||||
* succeed:
|
||||
*/
|
||||
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
||||
if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
||||
ret = bch2_trans_journal_res_get(trans,
|
||||
(flags & JOURNAL_WATERMARK_MASK)|
|
||||
JOURNAL_RES_GET_NONBLOCK);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
journal_transaction_name(trans);
|
||||
if (unlikely(trans->journal_transaction_names))
|
||||
journal_transaction_name(trans);
|
||||
} else {
|
||||
trans->journal_res.seq = c->journal.replay_journal_seq;
|
||||
}
|
||||
@ -644,7 +659,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
*/
|
||||
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
|
||||
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
|
||||
!(flags & BTREE_INSERT_JOURNAL_REPLAY)) {
|
||||
if (bch2_journal_seq_verify)
|
||||
trans_for_each_update(trans, i)
|
||||
i->k->k.version.lo = trans->journal_res.seq;
|
||||
@ -679,7 +694,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
trans->journal_res.u64s -= trans->extra_journal_entries.nr;
|
||||
}
|
||||
|
||||
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
||||
if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
||||
trans_for_each_update(trans, i) {
|
||||
struct journal *j = &c->journal;
|
||||
struct jset_entry *entry;
|
||||
@ -687,14 +702,19 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
if (i->key_cache_already_flushed)
|
||||
continue;
|
||||
|
||||
if (i->flags & BTREE_UPDATE_NOJOURNAL)
|
||||
continue;
|
||||
|
||||
verify_update_old_key(trans, i);
|
||||
|
||||
entry = bch2_journal_add_entry(j, &trans->journal_res,
|
||||
BCH_JSET_ENTRY_overwrite,
|
||||
i->btree_id, i->level,
|
||||
i->old_k.u64s);
|
||||
bkey_reassemble(&entry->start[0],
|
||||
(struct bkey_s_c) { &i->old_k, i->old_v });
|
||||
if (trans->journal_transaction_names) {
|
||||
entry = bch2_journal_add_entry(j, &trans->journal_res,
|
||||
BCH_JSET_ENTRY_overwrite,
|
||||
i->btree_id, i->level,
|
||||
i->old_k.u64s);
|
||||
bkey_reassemble(&entry->start[0],
|
||||
(struct bkey_s_c) { &i->old_k, i->old_v });
|
||||
}
|
||||
|
||||
entry = bch2_journal_add_entry(j, &trans->journal_res,
|
||||
BCH_JSET_ENTRY_btree_keys,
|
||||
@ -713,7 +733,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
if (!i->cached)
|
||||
btree_insert_key_leaf(trans, i);
|
||||
else if (!i->key_cache_already_flushed)
|
||||
bch2_btree_insert_key_cached(trans, i->path, i->k);
|
||||
bch2_btree_insert_key_cached(trans, flags, i->path, i->k);
|
||||
else {
|
||||
bch2_btree_key_cache_drop(trans, i->path);
|
||||
btree_path_set_dirty(i->path, BTREE_ITER_NEED_TRAVERSE);
|
||||
@ -762,12 +782,12 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans,
|
||||
static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, unsigned flags,
|
||||
struct btree_insert_entry *i,
|
||||
struct printbuf *err)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
int rw = (trans->flags & BTREE_INSERT_JOURNAL_REPLAY) ? READ : WRITE;
|
||||
int rw = (flags & BTREE_INSERT_JOURNAL_REPLAY) ? READ : WRITE;
|
||||
|
||||
printbuf_reset(err);
|
||||
prt_printf(err, "invalid bkey on insert from %s -> %ps",
|
||||
@ -793,7 +813,7 @@ static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans,
|
||||
/*
|
||||
* Get journal reservation, take write locks, and attempt to do btree update(s):
|
||||
*/
|
||||
static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags,
|
||||
struct btree_insert_entry **stopped_at,
|
||||
unsigned long trace_ip)
|
||||
{
|
||||
@ -804,11 +824,11 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans_for_each_update(trans, i) {
|
||||
int rw = (trans->flags & BTREE_INSERT_JOURNAL_REPLAY) ? READ : WRITE;
|
||||
int rw = (flags & BTREE_INSERT_JOURNAL_REPLAY) ? READ : WRITE;
|
||||
|
||||
if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
|
||||
i->bkey_type, rw, &buf)))
|
||||
return bch2_trans_commit_bkey_invalid(trans, i, &buf);
|
||||
return bch2_trans_commit_bkey_invalid(trans, flags, i, &buf);
|
||||
btree_insert_entry_checks(trans, i);
|
||||
}
|
||||
#endif
|
||||
@ -824,7 +844,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
if (!same_leaf_as_next(trans, i)) {
|
||||
if (u64s_delta <= 0) {
|
||||
ret = bch2_foreground_maybe_merge(trans, i->path,
|
||||
i->level, trans->flags);
|
||||
i->level, flags);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
}
|
||||
@ -835,8 +855,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
|
||||
ret = bch2_journal_preres_get(&c->journal,
|
||||
&trans->journal_preres, trans->journal_preres_u64s,
|
||||
JOURNAL_RES_GET_NONBLOCK|
|
||||
(trans->flags & JOURNAL_WATERMARK_MASK));
|
||||
(flags & JOURNAL_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK);
|
||||
if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
|
||||
ret = bch2_trans_journal_preres_get_cold(trans,
|
||||
trans->journal_preres_u64s, trace_ip);
|
||||
@ -847,7 +866,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
|
||||
ret = bch2_trans_commit_write_locked(trans, flags, stopped_at, trace_ip);
|
||||
|
||||
if (!ret && unlikely(trans->journal_replay_not_finished))
|
||||
bch2_drop_overwrites_from_journal(trans);
|
||||
@ -886,7 +905,7 @@ static int journal_reclaim_wait_done(struct bch_fs *c)
|
||||
}
|
||||
|
||||
static noinline
|
||||
int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
struct btree_insert_entry *i,
|
||||
int ret, unsigned long trace_ip)
|
||||
{
|
||||
@ -894,7 +913,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
|
||||
switch (ret) {
|
||||
case -BCH_ERR_btree_insert_btree_node_full:
|
||||
ret = bch2_btree_split_leaf(trans, i->path, trans->flags);
|
||||
ret = bch2_btree_split_leaf(trans, i->path, flags);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, i->path);
|
||||
break;
|
||||
@ -912,8 +931,8 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
case -BCH_ERR_journal_res_get_blocked:
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
|
||||
!(trans->flags & JOURNAL_WATERMARK_reserved)) {
|
||||
if ((flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
|
||||
!(flags & JOURNAL_WATERMARK_reserved)) {
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
break;
|
||||
}
|
||||
@ -948,20 +967,20 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
|
||||
|
||||
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) &&
|
||||
!(trans->flags & BTREE_INSERT_NOWAIT) &&
|
||||
(trans->flags & BTREE_INSERT_NOFAIL), c,
|
||||
!(flags & BTREE_INSERT_NOWAIT) &&
|
||||
(flags & BTREE_INSERT_NOFAIL), c,
|
||||
"%s: incorrectly got %s\n", __func__, bch2_err_str(ret));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int
|
||||
bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
|
||||
bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret;
|
||||
|
||||
if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)) ||
|
||||
if (likely(!(flags & BTREE_INSERT_LAZY_RW)) ||
|
||||
test_bit(BCH_FS_STARTED, &c->flags))
|
||||
return -BCH_ERR_erofs_trans_commit;
|
||||
|
||||
@ -972,7 +991,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
percpu_ref_get(&c->writes);
|
||||
bch2_write_ref_get(c, BCH_WRITE_REF_trans);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -997,7 +1016,7 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __bch2_trans_commit(struct btree_trans *trans)
|
||||
int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_insert_entry *i = NULL;
|
||||
@ -1008,7 +1027,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
|
||||
!trans->extra_journal_entries.nr)
|
||||
goto out_reset;
|
||||
|
||||
if (trans->flags & BTREE_INSERT_GC_LOCK_HELD)
|
||||
if (flags & BTREE_INSERT_GC_LOCK_HELD)
|
||||
lockdep_assert_held(&c->gc_lock);
|
||||
|
||||
ret = bch2_trans_commit_run_triggers(trans);
|
||||
@ -1020,9 +1039,9 @@ int __bch2_trans_commit(struct btree_trans *trans)
|
||||
goto out_reset;
|
||||
}
|
||||
|
||||
if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
|
||||
unlikely(!percpu_ref_tryget_live(&c->writes))) {
|
||||
ret = bch2_trans_commit_get_rw_cold(trans);
|
||||
if (!(flags & BTREE_INSERT_NOCHECK_RW) &&
|
||||
unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) {
|
||||
ret = bch2_trans_commit_get_rw_cold(trans, flags);
|
||||
if (ret)
|
||||
goto out_reset;
|
||||
}
|
||||
@ -1034,8 +1053,10 @@ int __bch2_trans_commit(struct btree_trans *trans)
|
||||
trans->journal_u64s = trans->extra_journal_entries.nr;
|
||||
trans->journal_preres_u64s = 0;
|
||||
|
||||
/* For journalling transaction name: */
|
||||
trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
|
||||
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
|
||||
|
||||
if (trans->journal_transaction_names)
|
||||
trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
EBUG_ON(!i->path->should_be_locked);
|
||||
@ -1052,27 +1073,32 @@ int __bch2_trans_commit(struct btree_trans *trans)
|
||||
/* we're going to journal the key being updated: */
|
||||
u64s = jset_u64s(i->k->k.u64s);
|
||||
if (i->cached &&
|
||||
likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)))
|
||||
likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY)))
|
||||
trans->journal_preres_u64s += u64s;
|
||||
|
||||
if (i->flags & BTREE_UPDATE_NOJOURNAL)
|
||||
continue;
|
||||
|
||||
trans->journal_u64s += u64s;
|
||||
|
||||
/* and we're also going to log the overwrite: */
|
||||
trans->journal_u64s += jset_u64s(i->old_k.u64s);
|
||||
if (trans->journal_transaction_names)
|
||||
trans->journal_u64s += jset_u64s(i->old_k.u64s);
|
||||
}
|
||||
|
||||
if (trans->extra_journal_res) {
|
||||
ret = bch2_disk_reservation_add(c, trans->disk_res,
|
||||
trans->extra_journal_res,
|
||||
(trans->flags & BTREE_INSERT_NOFAIL)
|
||||
(flags & BTREE_INSERT_NOFAIL)
|
||||
? BCH_DISK_RESERVATION_NOFAIL : 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
retry:
|
||||
EBUG_ON(trans->restarted);
|
||||
bch2_trans_verify_not_in_restart(trans);
|
||||
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
|
||||
|
||||
ret = do_bch2_trans_commit(trans, &i, _RET_IP_);
|
||||
ret = do_bch2_trans_commit(trans, flags, &i, _RET_IP_);
|
||||
|
||||
/* make sure we didn't drop or screw up locks: */
|
||||
bch2_trans_verify_locks(trans);
|
||||
@ -1084,22 +1110,14 @@ retry:
|
||||
out:
|
||||
bch2_journal_preres_put(&c->journal, &trans->journal_preres);
|
||||
|
||||
if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
|
||||
percpu_ref_put(&c->writes);
|
||||
if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_trans);
|
||||
out_reset:
|
||||
bch2_trans_reset_updates(trans);
|
||||
|
||||
if (trans->fs_usage_deltas) {
|
||||
trans->fs_usage_deltas->used = 0;
|
||||
memset((void *) trans->fs_usage_deltas +
|
||||
offsetof(struct replicas_delta_list, memset_start), 0,
|
||||
(void *) &trans->fs_usage_deltas->memset_end -
|
||||
(void *) &trans->fs_usage_deltas->memset_start);
|
||||
}
|
||||
|
||||
return ret;
|
||||
err:
|
||||
ret = bch2_trans_commit_error(trans, i, ret, _RET_IP_);
|
||||
ret = bch2_trans_commit_error(trans, flags, i, ret, _RET_IP_);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -1152,12 +1170,63 @@ static inline int check_pos_snapshot_overwritten(struct btree_trans *trans,
|
||||
return __check_pos_snapshot_overwritten(trans, id, pos);
|
||||
}
|
||||
|
||||
static noinline int extent_front_merge(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
struct bkey_i **insert,
|
||||
enum btree_update_flags flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i *update;
|
||||
int ret;
|
||||
|
||||
update = bch2_bkey_make_mut(trans, k);
|
||||
ret = PTR_ERR_OR_ZERO(update);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert)))
|
||||
return 0;
|
||||
|
||||
ret = check_pos_snapshot_overwritten(trans, iter->btree_id, k.k->p) ?:
|
||||
check_pos_snapshot_overwritten(trans, iter->btree_id, (*insert)->k.p);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
return 0;
|
||||
|
||||
ret = bch2_btree_delete_at(trans, iter, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
*insert = update;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline int extent_back_merge(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_i *insert,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret;
|
||||
|
||||
ret = check_pos_snapshot_overwritten(trans, iter->btree_id, insert->k.p) ?:
|
||||
check_pos_snapshot_overwritten(trans, iter->btree_id, k.k->p);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
return 0;
|
||||
|
||||
bch2_bkey_merge(c, bkey_i_to_s(insert), k);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
struct btree_iter *orig_iter,
|
||||
struct bkey_i *insert,
|
||||
enum btree_update_flags flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter, update_iter;
|
||||
struct bpos start = bkey_start_pos(&insert->k);
|
||||
struct bkey_i *update;
|
||||
@ -1175,46 +1244,15 @@ int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
if (!k.k)
|
||||
goto out;
|
||||
|
||||
if (bch2_bkey_maybe_mergable(k.k, &insert->k)) {
|
||||
/*
|
||||
* We can't merge extents if they belong to interior snapshot
|
||||
* tree nodes, and there's a snapshot in which one extent is
|
||||
* visible and the other is not - i.e. if visibility is
|
||||
* different.
|
||||
*
|
||||
* Instead of checking if visibilitiy of the two extents is
|
||||
* different, for now we just check if either has been
|
||||
* overwritten:
|
||||
*/
|
||||
ret = check_pos_snapshot_overwritten(trans, btree_id, insert->k.p);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
if (ret)
|
||||
goto nomerge1;
|
||||
|
||||
ret = check_pos_snapshot_overwritten(trans, btree_id, k.k->p);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
if (ret)
|
||||
goto nomerge1;
|
||||
|
||||
update = bch2_bkey_make_mut(trans, k);
|
||||
if ((ret = PTR_ERR_OR_ZERO(update)))
|
||||
goto err;
|
||||
|
||||
if (bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(insert))) {
|
||||
ret = bch2_btree_delete_at(trans, &iter, flags);
|
||||
if (bkey_eq(k.k->p, bkey_start_pos(&insert->k))) {
|
||||
if (bch2_bkey_maybe_mergable(k.k, &insert->k)) {
|
||||
ret = extent_front_merge(trans, &iter, k, &insert, flags);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
insert = update;
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
nomerge1:
|
||||
ret = 0;
|
||||
if (bkey_eq(k.k->p, start))
|
||||
|
||||
goto next;
|
||||
}
|
||||
|
||||
while (bkey_gt(insert->k.p, bkey_start_pos(k.k))) {
|
||||
bool front_split = bkey_lt(bkey_start_pos(k.k), start);
|
||||
@ -1323,22 +1361,10 @@ next:
|
||||
}
|
||||
|
||||
if (bch2_bkey_maybe_mergable(&insert->k, k.k)) {
|
||||
ret = check_pos_snapshot_overwritten(trans, btree_id, insert->k.p);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
ret = extent_back_merge(trans, &iter, insert, k);
|
||||
if (ret)
|
||||
goto nomerge2;
|
||||
|
||||
ret = check_pos_snapshot_overwritten(trans, btree_id, k.k->p);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
if (ret)
|
||||
goto nomerge2;
|
||||
|
||||
bch2_bkey_merge(c, bkey_i_to_s(insert), k);
|
||||
}
|
||||
nomerge2:
|
||||
ret = 0;
|
||||
out:
|
||||
if (!bkey_deleted(&insert->k)) {
|
||||
/*
|
||||
@ -1476,7 +1502,7 @@ bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *pa
|
||||
array_insert_item(trans->updates, trans->nr_updates,
|
||||
i - trans->updates, n);
|
||||
|
||||
i->old_v = bch2_btree_path_peek_slot(path, &i->old_k).v;
|
||||
i->old_v = bch2_btree_path_peek_slot_exact(path, &i->old_k).v;
|
||||
i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0;
|
||||
|
||||
if (unlikely(trans->journal_replay_not_finished)) {
|
||||
@ -1499,7 +1525,9 @@ bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *pa
|
||||
* the key cache - but the key has to exist in the btree for that to
|
||||
* work:
|
||||
*/
|
||||
if (unlikely(path->cached && bkey_deleted(&i->old_k)))
|
||||
if (path->cached &&
|
||||
bkey_deleted(&i->old_k) &&
|
||||
!(flags & BTREE_UPDATE_NO_KEY_CACHE_COHERENCY))
|
||||
return flush_new_cached_update(trans, path, i, flags, ip);
|
||||
|
||||
return 0;
|
||||
@ -1671,18 +1699,10 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
|
||||
*/
|
||||
delete.k.p = iter.pos;
|
||||
|
||||
if (iter.flags & BTREE_ITER_IS_EXTENTS) {
|
||||
unsigned max_sectors =
|
||||
KEY_SIZE_MAX & (~0 << trans->c->block_bits);
|
||||
|
||||
/* create the biggest key we can */
|
||||
bch2_key_resize(&delete.k, max_sectors);
|
||||
bch2_cut_back(end, &delete);
|
||||
|
||||
ret = bch2_extent_trim_atomic(trans, &iter, &delete);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
if (iter.flags & BTREE_ITER_IS_EXTENTS)
|
||||
bch2_key_resize(&delete.k,
|
||||
bpos_min(end, k.k->p).offset -
|
||||
iter.pos.offset);
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?:
|
||||
bch2_trans_commit(trans, &disk_res, journal_seq,
|
||||
|
@ -137,23 +137,28 @@ u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
|
||||
struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage_online *ret;
|
||||
unsigned seq, i, u64s;
|
||||
unsigned seq, i, v, u64s = fs_usage_u64s(c) + 1;
|
||||
retry:
|
||||
ret = kmalloc(u64s * sizeof(u64), GFP_NOFS);
|
||||
if (unlikely(!ret))
|
||||
return NULL;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
|
||||
ret = kmalloc(sizeof(struct bch_fs_usage_online) +
|
||||
sizeof(u64) * c->replicas.nr, GFP_NOFS);
|
||||
if (unlikely(!ret)) {
|
||||
v = fs_usage_u64s(c) + 1;
|
||||
if (unlikely(u64s != v)) {
|
||||
u64s = v;
|
||||
percpu_up_read(&c->mark_lock);
|
||||
return NULL;
|
||||
kfree(ret);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
ret->online_reserved = percpu_u64_get(c->online_reserved);
|
||||
|
||||
u64s = fs_usage_u64s(c);
|
||||
do {
|
||||
seq = read_seqcount_begin(&c->usage_lock);
|
||||
memcpy(&ret->u, c->usage_base, u64s * sizeof(u64));
|
||||
unsafe_memcpy(&ret->u, c->usage_base, u64s * sizeof(u64),
|
||||
"embedded variable length struct");
|
||||
for (i = 0; i < ARRAY_SIZE(c->usage); i++)
|
||||
acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i], u64s);
|
||||
} while (read_seqcount_retry(&c->usage_lock, seq));
|
||||
@ -1203,17 +1208,23 @@ not_found:
|
||||
" missing range %llu-%llu",
|
||||
(bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf),
|
||||
*idx, next_idx)) {
|
||||
struct bkey_i_error new;
|
||||
struct bkey_i_error *new;
|
||||
|
||||
bkey_init(&new.k);
|
||||
new.k.type = KEY_TYPE_error;
|
||||
new.k.p = bkey_start_pos(p.k);
|
||||
new.k.p.offset += *idx - start;
|
||||
bch2_key_resize(&new.k, next_idx - *idx);
|
||||
ret = __bch2_btree_insert(trans, BTREE_ID_extents, &new.k_i);
|
||||
new = bch2_trans_kmalloc(trans, sizeof(*new));
|
||||
ret = PTR_ERR_OR_ZERO(new);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bkey_init(&new->k);
|
||||
new->k.type = KEY_TYPE_error;
|
||||
new->k.p = bkey_start_pos(p.k);
|
||||
new->k.p.offset += *idx - start;
|
||||
bch2_key_resize(&new->k, next_idx - *idx);
|
||||
ret = __bch2_btree_insert(trans, BTREE_ID_extents, &new->k_i);
|
||||
}
|
||||
|
||||
*idx = next_idx;
|
||||
err:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
@ -1258,36 +1269,6 @@ int bch2_mark_reflink_p(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline __cold
|
||||
void fs_usage_apply_warn(struct btree_trans *trans,
|
||||
unsigned disk_res_sectors,
|
||||
s64 should_not_have_added)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_insert_entry *i;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_printf(&buf,
|
||||
bch2_fmt(c, "disk usage increased %lli more than %u sectors reserved)"),
|
||||
should_not_have_added, disk_res_sectors);
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
struct bkey_s_c old = { &i->old_k, i->old_v };
|
||||
|
||||
prt_str(&buf, "new ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k));
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_str(&buf, "old ");
|
||||
bch2_bkey_val_to_text(&buf, c, old);
|
||||
prt_newline(&buf);
|
||||
}
|
||||
|
||||
__WARN();
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
int bch2_trans_fs_usage_apply(struct btree_trans *trans,
|
||||
struct replicas_delta_list *deltas)
|
||||
{
|
||||
@ -1352,7 +1333,9 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans,
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
if (unlikely(warn) && !xchg(&warned_disk_usage, 1))
|
||||
fs_usage_apply_warn(trans, disk_res_sectors, should_not_have_added);
|
||||
bch2_trans_inconsistent(trans,
|
||||
"disk usage increased %lli more than %u sectors reserved)",
|
||||
should_not_have_added, disk_res_sectors);
|
||||
return 0;
|
||||
need_mark:
|
||||
/* revert changes: */
|
||||
|
@ -2,28 +2,24 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "buckets_waiting_for_journal.h"
|
||||
#include <linux/hash.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/siphash.h>
|
||||
|
||||
static inline struct bucket_hashed *
|
||||
bucket_hash(struct buckets_waiting_for_journal_table *t,
|
||||
unsigned hash_seed_idx, u64 dev_bucket)
|
||||
{
|
||||
unsigned h = siphash_1u64(dev_bucket, &t->hash_seeds[hash_seed_idx]);
|
||||
|
||||
EBUG_ON(!is_power_of_2(t->size));
|
||||
|
||||
return t->d + (h & (t->size - 1));
|
||||
return t->d + hash_64(dev_bucket ^ t->hash_seeds[hash_seed_idx], t->bits);
|
||||
}
|
||||
|
||||
static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_t size)
|
||||
static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_t bits)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
t->size = size;
|
||||
t->bits = bits;
|
||||
for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++)
|
||||
get_random_bytes(&t->hash_seeds[i], sizeof(t->hash_seeds[i]));
|
||||
memset(t->d, 0, sizeof(t->d[0]) * size);
|
||||
memset(t->d, 0, sizeof(t->d[0]) << t->bits);
|
||||
}
|
||||
|
||||
bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
|
||||
@ -97,7 +93,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
|
||||
.dev_bucket = (u64) dev << 56 | bucket,
|
||||
.journal_seq = journal_seq,
|
||||
};
|
||||
size_t i, new_size, nr_elements = 1, nr_rehashes = 0;
|
||||
size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&b->lock);
|
||||
@ -106,12 +102,13 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
|
||||
goto out;
|
||||
|
||||
t = b->t;
|
||||
for (i = 0; i < t->size; i++)
|
||||
size = 1UL << t->bits;
|
||||
for (i = 0; i < size; i++)
|
||||
nr_elements += t->d[i].journal_seq > flushed_seq;
|
||||
|
||||
new_size = nr_elements < t->size / 3 ? t->size : t->size * 2;
|
||||
new_bits = t->bits + (nr_elements * 3 > size);
|
||||
|
||||
n = kvmalloc(sizeof(*n) + sizeof(n->d[0]) * new_size, GFP_KERNEL);
|
||||
n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL);
|
||||
if (!n) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
@ -119,12 +116,12 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
|
||||
|
||||
retry_rehash:
|
||||
nr_rehashes++;
|
||||
bucket_table_init(n, new_size);
|
||||
bucket_table_init(n, new_bits);
|
||||
|
||||
tmp = new;
|
||||
BUG_ON(!bucket_table_insert(n, &tmp, flushed_seq));
|
||||
|
||||
for (i = 0; i < t->size; i++) {
|
||||
for (i = 0; i < 1UL << t->bits; i++) {
|
||||
if (t->d[i].journal_seq <= flushed_seq)
|
||||
continue;
|
||||
|
||||
@ -137,7 +134,7 @@ retry_rehash:
|
||||
kvfree(t);
|
||||
|
||||
pr_debug("took %zu rehashes, table at %zu/%zu elements",
|
||||
nr_rehashes, nr_elements, b->t->size);
|
||||
nr_rehashes, nr_elements, 1UL << b->t->bits);
|
||||
out:
|
||||
mutex_unlock(&b->lock);
|
||||
|
||||
@ -151,7 +148,7 @@ void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *c)
|
||||
kvfree(b->t);
|
||||
}
|
||||
|
||||
#define INITIAL_TABLE_SIZE 8
|
||||
#define INITIAL_TABLE_BITS 3
|
||||
|
||||
int bch2_fs_buckets_waiting_for_journal_init(struct bch_fs *c)
|
||||
{
|
||||
@ -159,10 +156,11 @@ int bch2_fs_buckets_waiting_for_journal_init(struct bch_fs *c)
|
||||
|
||||
mutex_init(&b->lock);
|
||||
|
||||
b->t = kvmalloc(sizeof(*b->t) + sizeof(b->t->d[0]) * INITIAL_TABLE_SIZE, GFP_KERNEL);
|
||||
b->t = kvmalloc(sizeof(*b->t) +
|
||||
(sizeof(b->t->d[0]) << INITIAL_TABLE_BITS), GFP_KERNEL);
|
||||
if (!b->t)
|
||||
return -ENOMEM;
|
||||
|
||||
bucket_table_init(b->t, INITIAL_TABLE_SIZE);
|
||||
bucket_table_init(b->t, INITIAL_TABLE_BITS);
|
||||
return 0;
|
||||
}
|
||||
|
@ -10,8 +10,8 @@ struct bucket_hashed {
|
||||
};
|
||||
|
||||
struct buckets_waiting_for_journal_table {
|
||||
size_t size;
|
||||
siphash_key_t hash_seeds[3];
|
||||
unsigned bits;
|
||||
u64 hash_seeds[3];
|
||||
struct bucket_hashed d[];
|
||||
};
|
||||
|
||||
|
@ -182,7 +182,17 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
|
||||
/* Add new ptrs: */
|
||||
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
|
||||
if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
|
||||
const struct bch_extent_ptr *existing_ptr =
|
||||
bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev);
|
||||
|
||||
if (existing_ptr && existing_ptr->cached) {
|
||||
/*
|
||||
* We're replacing a cached pointer with a non
|
||||
* cached pointer:
|
||||
*/
|
||||
bch2_bkey_drop_device_noerror(bkey_i_to_s(insert),
|
||||
existing_ptr->dev);
|
||||
} else if (existing_ptr) {
|
||||
/*
|
||||
* raced with another move op? extent already
|
||||
* has a pointer to the device we just wrote
|
||||
@ -253,8 +263,8 @@ nomatch:
|
||||
&m->ctxt->stats->sectors_raced);
|
||||
}
|
||||
|
||||
this_cpu_add(c->counters[BCH_COUNTER_move_extent_race], new->k.size);
|
||||
trace_move_extent_race(&new->k);
|
||||
this_cpu_add(c->counters[BCH_COUNTER_move_extent_fail], new->k.size);
|
||||
trace_move_extent_fail(&new->k);
|
||||
|
||||
bch2_btree_iter_advance(&iter);
|
||||
goto next;
|
||||
@ -388,17 +398,21 @@ void bch2_update_unwritten_extent(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
|
||||
int bch2_data_update_init(struct btree_trans *trans,
|
||||
struct moving_context *ctxt,
|
||||
struct data_update *m,
|
||||
struct write_point_specifier wp,
|
||||
struct bch_io_opts io_opts,
|
||||
struct data_update_opts data_opts,
|
||||
enum btree_id btree_id,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
|
||||
unsigned int ptrs_locked = 0;
|
||||
int ret;
|
||||
|
||||
bch2_bkey_buf_init(&m->k);
|
||||
@ -424,11 +438,14 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
|
||||
|
||||
i = 0;
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
bool locked;
|
||||
|
||||
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
|
||||
p.ptr.cached)
|
||||
BUG();
|
||||
|
||||
if (!((1U << i) & m->data_opts.rewrite_ptrs))
|
||||
if (!((1U << i) & m->data_opts.rewrite_ptrs) &&
|
||||
!p.ptr.cached)
|
||||
bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
|
||||
|
||||
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
|
||||
@ -448,10 +465,24 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
|
||||
if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
|
||||
m->op.incompressible = true;
|
||||
|
||||
i++;
|
||||
if (ctxt) {
|
||||
move_ctxt_wait_event(ctxt, trans,
|
||||
(locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
|
||||
PTR_BUCKET_POS(c, &p.ptr), 0)) ||
|
||||
!atomic_read(&ctxt->read_sectors));
|
||||
|
||||
bch2_bucket_nocow_lock(&c->nocow_locks,
|
||||
PTR_BUCKET_POS(c, &p.ptr), 0);
|
||||
if (!locked)
|
||||
bch2_bucket_nocow_lock(&c->nocow_locks,
|
||||
PTR_BUCKET_POS(c, &p.ptr), 0);
|
||||
} else {
|
||||
if (!bch2_bucket_nocow_trylock(&c->nocow_locks,
|
||||
PTR_BUCKET_POS(c, &p.ptr), 0)) {
|
||||
ret = -BCH_ERR_nocow_lock_blocked;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
ptrs_locked |= (1U << i);
|
||||
i++;
|
||||
}
|
||||
|
||||
if (reserve_sectors) {
|
||||
@ -473,9 +504,13 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
|
||||
return -BCH_ERR_unwritten_extent_update;
|
||||
return 0;
|
||||
err:
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
|
||||
bch2_bucket_nocow_unlock(&c->nocow_locks,
|
||||
PTR_BUCKET_POS(c, &p.ptr), 0);
|
||||
i = 0;
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
if ((1U << i) & ptrs_locked)
|
||||
bch2_bucket_nocow_unlock(&c->nocow_locks,
|
||||
PTR_BUCKET_POS(c, &p.ptr), 0);
|
||||
i++;
|
||||
}
|
||||
|
||||
bch2_bkey_buf_exit(&m->k, c);
|
||||
bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
|
||||
|
@ -33,7 +33,8 @@ void bch2_data_update_read_done(struct data_update *,
|
||||
|
||||
void bch2_data_update_exit(struct data_update *);
|
||||
void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *);
|
||||
int bch2_data_update_init(struct bch_fs *, struct data_update *,
|
||||
int bch2_data_update_init(struct btree_trans *, struct moving_context *,
|
||||
struct data_update *,
|
||||
struct write_point_specifier,
|
||||
struct bch_io_opts, struct data_update_opts,
|
||||
enum btree_id, struct bkey_s_c);
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include <linux/console.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pretty-printers.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
@ -40,7 +39,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
|
||||
struct bset *sorted, *inmemory = &b->data->keys;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev);
|
||||
struct bio *bio;
|
||||
bool failed = false;
|
||||
bool failed = false, saw_error = false;
|
||||
|
||||
if (!bch2_dev_get_ioref(ca, READ))
|
||||
return false;
|
||||
@ -61,7 +60,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
|
||||
memcpy(n_ondisk, n_sorted, btree_bytes(c));
|
||||
|
||||
v->written = 0;
|
||||
if (bch2_btree_node_read_done(c, ca, v, false))
|
||||
if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error)
|
||||
return false;
|
||||
|
||||
n_sorted = c->verify_data->data;
|
||||
@ -501,6 +500,7 @@ static const struct file_operations cached_btree_nodes_ops = {
|
||||
.read = bch2_cached_btree_nodes_read,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
|
||||
static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
|
||||
size_t size, loff_t *ppos)
|
||||
{
|
||||
@ -520,7 +520,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
|
||||
|
||||
ret = flush_buf(i);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
|
||||
bch2_btree_trans_to_text(&i->buf, trans);
|
||||
|
||||
@ -550,6 +550,7 @@ static const struct file_operations btree_transactions_ops = {
|
||||
.release = bch2_dump_release,
|
||||
.read = bch2_btree_transactions_read,
|
||||
};
|
||||
#endif /* CONFIG_BCACHEFS_DEBUG_TRANSACTIONS */
|
||||
|
||||
static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
|
||||
size_t size, loff_t *ppos)
|
||||
@ -710,7 +711,7 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
|
||||
|
||||
ret = flush_buf(i);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
|
||||
bch2_check_for_deadlock(trans, &i->buf);
|
||||
|
||||
@ -756,8 +757,10 @@ void bch2_fs_debug_init(struct bch_fs *c)
|
||||
debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir,
|
||||
c->btree_debug, &cached_btree_nodes_ops);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
|
||||
debugfs_create_file("btree_transactions", 0400, c->fs_debug_dir,
|
||||
c->btree_debug, &btree_transactions_ops);
|
||||
#endif
|
||||
|
||||
debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
|
||||
c->btree_debug, &journal_pins_ops);
|
||||
|
@ -84,7 +84,7 @@ const struct bch_hash_desc bch2_dirent_hash_desc = {
|
||||
};
|
||||
|
||||
int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
|
||||
unsigned len;
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
extern const struct bch_hash_desc bch2_dirent_hash_desc;
|
||||
|
||||
int bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_dirent ((struct bkey_ops) { \
|
||||
|
@ -104,7 +104,7 @@ struct ec_bio {
|
||||
/* Stripes btree keys: */
|
||||
|
||||
int bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
|
||||
|
||||
@ -130,7 +130,7 @@ int bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
return bch2_bkey_ptrs_invalid(c, k, rw, err);
|
||||
return bch2_bkey_ptrs_invalid(c, k, flags, err);
|
||||
}
|
||||
|
||||
void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
@ -673,9 +673,8 @@ void bch2_stripes_heap_update(struct bch_fs *c,
|
||||
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
if (stripe_idx_to_delete(c) >= 0 &&
|
||||
!percpu_ref_is_dying(&c->writes))
|
||||
schedule_work(&c->ec_stripe_delete_work);
|
||||
if (stripe_idx_to_delete(c) >= 0)
|
||||
bch2_do_stripe_deletes(c);
|
||||
}
|
||||
|
||||
/* stripe deletion */
|
||||
@ -708,6 +707,15 @@ static void ec_stripe_delete_work(struct work_struct *work)
|
||||
if (ec_stripe_delete(c, idx))
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
|
||||
}
|
||||
|
||||
void bch2_do_stripe_deletes(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) &&
|
||||
!schedule_work(&c->ec_stripe_delete_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
|
||||
}
|
||||
|
||||
/* stripe creation: */
|
||||
@ -965,7 +973,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
|
||||
BUG_ON(!s->allocated);
|
||||
|
||||
if (!percpu_ref_tryget_live(&c->writes))
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_create))
|
||||
goto err;
|
||||
|
||||
ec_generate_ec(&s->new_stripe);
|
||||
@ -1003,7 +1011,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
bch2_stripes_heap_insert(c, m, s->new_stripe.key.k.p.offset);
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
err_put_writes:
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
|
||||
err:
|
||||
bch2_disk_reservation_put(c, &s->res);
|
||||
|
||||
|
@ -4,9 +4,10 @@
|
||||
|
||||
#include "ec_types.h"
|
||||
#include "buckets_types.h"
|
||||
#include "extents_types.h"
|
||||
|
||||
int bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c,
|
||||
int rw, struct printbuf *);
|
||||
unsigned, struct printbuf *);
|
||||
void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
|
||||
@ -206,6 +207,8 @@ void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
|
||||
void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
|
||||
void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
|
||||
|
||||
void bch2_do_stripe_deletes(struct bch_fs *);
|
||||
|
||||
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
void bch2_ec_flush_new_stripes(struct bch_fs *);
|
||||
|
@ -118,6 +118,7 @@
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_clean) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_quota) \
|
||||
x(BCH_ERR_invalid, invalid_bkey) \
|
||||
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
|
||||
|
||||
enum bch_errcode {
|
||||
BCH_ERR_START = 2048,
|
||||
|
@ -27,8 +27,11 @@ bool bch2_inconsistent_error(struct bch_fs *c)
|
||||
|
||||
void bch2_topology_error(struct bch_fs *c)
|
||||
{
|
||||
if (!test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags))
|
||||
return;
|
||||
|
||||
set_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags);
|
||||
if (test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags))
|
||||
if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
|
||||
bch2_inconsistent_error(c);
|
||||
}
|
||||
|
||||
|
@ -73,8 +73,8 @@ do { \
|
||||
#define bch2_trans_inconsistent(trans, ...) \
|
||||
({ \
|
||||
bch_err(trans->c, __VA_ARGS__); \
|
||||
bch2_inconsistent_error(trans->c); \
|
||||
bch2_dump_trans_updates(trans); \
|
||||
bch2_inconsistent_error(trans->c); \
|
||||
})
|
||||
|
||||
#define bch2_trans_inconsistent_on(cond, trans, ...) \
|
||||
|
@ -166,7 +166,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
||||
/* KEY_TYPE_btree_ptr: */
|
||||
|
||||
int bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
if (bkey_val_u64s(k.k) > BCH_REPLICAS_MAX) {
|
||||
prt_printf(err, "value too big (%zu > %u)",
|
||||
@ -174,7 +174,7 @@ int bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
return bch2_bkey_ptrs_invalid(c, k, rw, err);
|
||||
return bch2_bkey_ptrs_invalid(c, k, flags, err);
|
||||
}
|
||||
|
||||
void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
@ -184,7 +184,7 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
}
|
||||
|
||||
int bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
|
||||
|
||||
@ -207,7 +207,7 @@ int bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
return bch2_bkey_ptrs_invalid(c, k, rw, err);
|
||||
return bch2_bkey_ptrs_invalid(c, k, flags, err);
|
||||
}
|
||||
|
||||
void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
@ -389,7 +389,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
|
||||
/* KEY_TYPE_reservation: */
|
||||
|
||||
int bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
|
||||
|
||||
@ -715,7 +715,7 @@ static inline void __extent_entry_insert(struct bkey_i *k,
|
||||
memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
|
||||
dst, (u64 *) end - (u64 *) dst);
|
||||
k->k.u64s += extent_entry_u64s(new);
|
||||
memcpy(dst, new, extent_entry_bytes(new));
|
||||
memcpy_u64s_small(dst, new, extent_entry_u64s(new));
|
||||
}
|
||||
|
||||
void bch2_extent_ptr_decoded_append(struct bkey_i *k,
|
||||
@ -1086,7 +1086,7 @@ static int extent_ptr_invalid(const struct bch_fs *c,
|
||||
}
|
||||
|
||||
int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
|
@ -371,11 +371,11 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
|
||||
|
||||
/* KEY_TYPE_btree_ptr: */
|
||||
|
||||
int bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
|
||||
int bch2_btree_ptr_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_btree_ptr_v2_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
|
||||
int, struct bkey_s);
|
||||
@ -414,7 +414,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
|
||||
/* KEY_TYPE_reservation: */
|
||||
|
||||
int bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c,
|
||||
int, struct printbuf *);
|
||||
unsigned, struct printbuf *);
|
||||
void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
|
||||
|
||||
@ -659,7 +659,7 @@ bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
|
||||
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
int bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c,
|
||||
int, struct printbuf *);
|
||||
unsigned, struct printbuf *);
|
||||
|
||||
void bch2_ptr_swab(struct bkey_s);
|
||||
|
||||
|
@ -812,7 +812,7 @@ static void bch2_set_page_dirty(struct bch_fs *c,
|
||||
i_sectors_acct(c, inode, &res->quota, dirty_sectors);
|
||||
|
||||
if (!PageDirty(page))
|
||||
__set_page_dirty_nobuffers(page);
|
||||
filemap_dirty_folio(inode->v.i_mapping, page_folio(page));
|
||||
}
|
||||
|
||||
vm_fault_t bch2_page_fault(struct vm_fault *vmf)
|
||||
@ -2715,7 +2715,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
|
||||
* redirty the full page:
|
||||
*/
|
||||
page_mkclean(page);
|
||||
__set_page_dirty_nobuffers(page);
|
||||
filemap_dirty_folio(mapping, page_folio(page));
|
||||
unlock:
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
@ -3280,7 +3280,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
long ret;
|
||||
|
||||
if (!percpu_ref_tryget_live(&c->writes))
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate))
|
||||
return -EROFS;
|
||||
|
||||
inode_lock(&inode->v);
|
||||
@ -3304,7 +3304,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
|
||||
err:
|
||||
bch2_pagecache_block_put(inode);
|
||||
inode_unlock(&inode->v);
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_fallocate);
|
||||
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
@ -3448,9 +3448,9 @@ err:
|
||||
|
||||
/* fseek: */
|
||||
|
||||
static int page_data_offset(struct page *page, unsigned offset)
|
||||
static int folio_data_offset(struct folio *folio, unsigned offset)
|
||||
{
|
||||
struct bch_page_state *s = bch2_page_state(page);
|
||||
struct bch_page_state *s = bch2_page_state(&folio->page);
|
||||
unsigned i;
|
||||
|
||||
if (s)
|
||||
@ -3481,8 +3481,7 @@ static loff_t bch2_seek_pagecache_data(struct inode *vinode,
|
||||
struct folio *folio = fbatch.folios[i];
|
||||
|
||||
folio_lock(folio);
|
||||
|
||||
offset = page_data_offset(&folio->page,
|
||||
offset = folio_data_offset(folio,
|
||||
folio->index == start_index
|
||||
? start_offset & (PAGE_SIZE - 1)
|
||||
: 0);
|
||||
@ -3494,7 +3493,6 @@ static loff_t bch2_seek_pagecache_data(struct inode *vinode,
|
||||
folio_batch_release(&fbatch);
|
||||
return ret;
|
||||
}
|
||||
|
||||
folio_unlock(folio);
|
||||
}
|
||||
folio_batch_release(&fbatch);
|
||||
|
@ -667,10 +667,10 @@ int bch2_setattr_nonsize(struct user_namespace *mnt_userns,
|
||||
qid = inode->ei_qid;
|
||||
|
||||
if (attr->ia_valid & ATTR_UID)
|
||||
qid.q[QTYP_USR] = from_kuid(&init_user_ns, attr->ia_uid);
|
||||
qid.q[QTYP_USR] = from_kuid(mnt_userns, attr->ia_uid);
|
||||
|
||||
if (attr->ia_valid & ATTR_GID)
|
||||
qid.q[QTYP_GRP] = from_kgid(&init_user_ns, attr->ia_gid);
|
||||
qid.q[QTYP_GRP] = from_kgid(mnt_userns, attr->ia_gid);
|
||||
|
||||
ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
|
||||
KEY_TYPE_QUOTA_PREALLOC);
|
||||
@ -779,18 +779,19 @@ static int bch2_setattr(struct user_namespace *mnt_userns,
|
||||
}
|
||||
|
||||
static int bch2_tmpfile(struct user_namespace *mnt_userns,
|
||||
struct inode *vdir, struct dentry *dentry, umode_t mode)
|
||||
struct inode *vdir, struct file *file, umode_t mode)
|
||||
{
|
||||
struct bch_inode_info *inode =
|
||||
__bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, 0,
|
||||
__bch2_create(mnt_userns, to_bch_ei(vdir),
|
||||
file->f_path.dentry, mode, 0,
|
||||
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
|
||||
|
||||
if (IS_ERR(inode))
|
||||
return bch2_err_class(PTR_ERR(inode));
|
||||
|
||||
d_mark_tmpfile(dentry, &inode->v);
|
||||
d_instantiate(dentry, &inode->v);
|
||||
return 0;
|
||||
d_mark_tmpfile(file, &inode->v);
|
||||
d_instantiate(file->f_path.dentry, &inode->v);
|
||||
return finish_open_simple(file, 0);
|
||||
}
|
||||
|
||||
static int bch2_fill_extent(struct bch_fs *c,
|
||||
|
@ -18,7 +18,6 @@ struct bch_inode_info {
|
||||
struct mutex ei_update_lock;
|
||||
u64 ei_quota_reserved;
|
||||
unsigned long ei_last_dirtied;
|
||||
|
||||
two_state_lock_t ei_pagecache_lock;
|
||||
|
||||
struct mutex ei_quota_lock;
|
||||
|
@ -817,7 +817,7 @@ static int hash_check_key(struct btree_trans *trans,
|
||||
goto bad_hash;
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, desc.btree_id,
|
||||
POS(hash_k.k->p.inode, hash),
|
||||
SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
|
||||
BTREE_ITER_SLOTS, k, ret) {
|
||||
if (bkey_eq(k.k->p, hash_k.k->p))
|
||||
break;
|
||||
|
@ -433,7 +433,7 @@ static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err)
|
||||
}
|
||||
|
||||
int bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
|
||||
|
||||
@ -453,7 +453,7 @@ int bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
}
|
||||
|
||||
int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
|
||||
|
||||
@ -473,7 +473,7 @@ int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
}
|
||||
|
||||
int bch2_inode_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
|
||||
|
||||
@ -536,7 +536,7 @@ void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
|
||||
}
|
||||
|
||||
int bch2_inode_generation_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
if (k.k->p.inode) {
|
||||
prt_printf(err, "nonzero k.p.inode");
|
||||
@ -663,19 +663,8 @@ again:
|
||||
while ((k = bch2_btree_iter_peek(iter)).k &&
|
||||
!(ret = bkey_err(k)) &&
|
||||
bkey_lt(k.k->p, POS(0, max))) {
|
||||
while (pos < iter->pos.offset) {
|
||||
if (!bch2_btree_key_cache_find(c, BTREE_ID_inodes, POS(0, pos)))
|
||||
goto found_slot;
|
||||
|
||||
pos++;
|
||||
}
|
||||
|
||||
if (k.k->p.snapshot == snapshot &&
|
||||
!bkey_is_inode(k.k) &&
|
||||
!bch2_btree_key_cache_find(c, BTREE_ID_inodes, SPOS(0, pos, snapshot))) {
|
||||
bch2_btree_iter_advance(iter);
|
||||
continue;
|
||||
}
|
||||
if (pos < iter->pos.offset)
|
||||
goto found_slot;
|
||||
|
||||
/*
|
||||
* We don't need to iterate over keys in every snapshot once
|
||||
@ -685,12 +674,8 @@ again:
|
||||
bch2_btree_iter_set_pos(iter, POS(0, pos));
|
||||
}
|
||||
|
||||
while (!ret && pos < max) {
|
||||
if (!bch2_btree_key_cache_find(c, BTREE_ID_inodes, POS(0, pos)))
|
||||
goto found_slot;
|
||||
|
||||
pos++;
|
||||
}
|
||||
if (!ret && pos < max)
|
||||
goto found_slot;
|
||||
|
||||
if (!ret && start == min)
|
||||
ret = -BCH_ERR_ENOSPC_inode_create;
|
||||
@ -713,11 +698,6 @@ found_slot:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* We may have raced while the iterator wasn't pointing at pos: */
|
||||
if (bkey_is_inode(k.k) ||
|
||||
bch2_btree_key_cache_find(c, BTREE_ID_inodes, k.k->p))
|
||||
goto again;
|
||||
|
||||
*hint = k.k->p.offset;
|
||||
inode_u->bi_inum = k.k->p.offset;
|
||||
inode_u->bi_generation = bkey_generation(k);
|
||||
@ -734,11 +714,11 @@ static int bch2_inode_delete_keys(struct btree_trans *trans,
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* We're never going to be deleting extents, no need to use an extent
|
||||
* iterator:
|
||||
* We're never going to be deleting partial extents, no need to use an
|
||||
* extent iterator:
|
||||
*/
|
||||
bch2_trans_iter_init(trans, &iter, id, POS(inum.inum, 0),
|
||||
BTREE_ITER_INTENT);
|
||||
BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
|
||||
|
||||
while (1) {
|
||||
bch2_trans_begin(trans);
|
||||
@ -760,14 +740,6 @@ static int bch2_inode_delete_keys(struct btree_trans *trans,
|
||||
bkey_init(&delete.k);
|
||||
delete.k.p = iter.pos;
|
||||
|
||||
if (iter.flags & BTREE_ITER_IS_EXTENTS) {
|
||||
bch2_key_resize(&delete.k, k.k->p.offset - iter.pos.offset);
|
||||
|
||||
ret = bch2_extent_trim_atomic(trans, &iter, &delete);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &delete, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
@ -823,8 +795,8 @@ retry:
|
||||
|
||||
if (!bkey_is_inode(k.k)) {
|
||||
bch2_fs_inconsistent(trans.c,
|
||||
"inode %llu not found when deleting",
|
||||
inum.inum);
|
||||
"inode %llu:%u not found when deleting",
|
||||
inum.inum, snapshot);
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
|
@ -7,9 +7,9 @@
|
||||
|
||||
extern const char * const bch2_inode_opts[];
|
||||
|
||||
int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_inode_v3_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
int bch2_inode_v3_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_inode ((struct bkey_ops) { \
|
||||
@ -41,7 +41,7 @@ static inline bool bkey_is_inode(const struct bkey *k)
|
||||
}
|
||||
|
||||
int bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c,
|
||||
int, struct printbuf *);
|
||||
unsigned, struct printbuf *);
|
||||
void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \
|
||||
|
191
libbcachefs/io.c
191
libbcachefs/io.c
@ -34,6 +34,7 @@
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
@ -46,6 +47,8 @@ const char *bch2_blk_status_to_str(blk_status_t status)
|
||||
return blk_status_to_str(status);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
|
||||
|
||||
static bool bch2_target_congested(struct bch_fs *c, u16 target)
|
||||
{
|
||||
const struct bch_devs_mask *devs;
|
||||
@ -134,6 +137,15 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
|
||||
__bch2_time_stats_update(&ca->io_latency[rw], submit_time, now);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static bool bch2_target_congested(struct bch_fs *c, u16 target)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Allocate, free from mempool: */
|
||||
|
||||
void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
|
||||
@ -242,6 +254,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
|
||||
struct btree_iter iter;
|
||||
struct bkey_i *k;
|
||||
struct bkey_i_inode_v3 *inode;
|
||||
unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
|
||||
@ -264,15 +277,24 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
|
||||
inode = bkey_i_to_inode_v3(k);
|
||||
|
||||
if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_I_SIZE_DIRTY) &&
|
||||
new_i_size > le64_to_cpu(inode->v.bi_size))
|
||||
new_i_size > le64_to_cpu(inode->v.bi_size)) {
|
||||
inode->v.bi_size = cpu_to_le64(new_i_size);
|
||||
inode_update_flags = 0;
|
||||
}
|
||||
|
||||
le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta);
|
||||
if (i_sectors_delta) {
|
||||
le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta);
|
||||
inode_update_flags = 0;
|
||||
}
|
||||
|
||||
inode->k.p.snapshot = iter.snapshot;
|
||||
if (inode->k.p.snapshot != iter.snapshot) {
|
||||
inode->k.p.snapshot = iter.snapshot;
|
||||
inode_update_flags = 0;
|
||||
}
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &inode->k_i,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
|
||||
inode_update_flags);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
@ -513,8 +535,6 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
BUG_ON(bkey_ge(iter->pos, end_pos));
|
||||
|
||||
bkey_init(&delete.k);
|
||||
delete.k.p = iter->pos;
|
||||
|
||||
@ -527,8 +547,6 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
}
|
||||
|
||||
BUG_ON(bkey_gt(iter->pos, end_pos));
|
||||
|
||||
return ret ?: ret2;
|
||||
}
|
||||
|
||||
@ -665,6 +683,12 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
bio_sectors(&n->bio));
|
||||
|
||||
bio_set_dev(&n->bio, ca->disk_sb.bdev);
|
||||
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_NO_IO) && type != BCH_DATA_btree) {
|
||||
bio_endio(&n->bio);
|
||||
continue;
|
||||
}
|
||||
|
||||
submit_bio(&n->bio);
|
||||
} else {
|
||||
n->bio.bi_status = BLK_STS_REMOVED;
|
||||
@ -681,11 +705,12 @@ static void bch2_write_done(struct closure *cl)
|
||||
struct bch_fs *c = op->c;
|
||||
|
||||
bch2_disk_reservation_put(c, &op->res);
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_write);
|
||||
bch2_keylist_free(&op->insert_keys, op->inline_keys);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
|
||||
|
||||
EBUG_ON(cl->parent);
|
||||
closure_debug_destroy(cl);
|
||||
if (op->end_io)
|
||||
op->end_io(op);
|
||||
@ -780,6 +805,30 @@ err:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static inline void __wp_update_state(struct write_point *wp, enum write_point_state state)
|
||||
{
|
||||
if (state != wp->state) {
|
||||
u64 now = ktime_get_ns();
|
||||
|
||||
if (wp->last_state_change &&
|
||||
time_after64(now, wp->last_state_change))
|
||||
wp->time[wp->state] += now - wp->last_state_change;
|
||||
wp->state = state;
|
||||
wp->last_state_change = now;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void wp_update_state(struct write_point *wp, bool running)
|
||||
{
|
||||
enum write_point_state state;
|
||||
|
||||
state = running ? WRITE_POINT_running :
|
||||
!list_empty(&wp->writes) ? WRITE_POINT_waiting_io
|
||||
: WRITE_POINT_stopped;
|
||||
|
||||
__wp_update_state(wp, state);
|
||||
}
|
||||
|
||||
static void bch2_write_index(struct closure *cl)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
@ -787,6 +836,16 @@ static void bch2_write_index(struct closure *cl)
|
||||
struct workqueue_struct *wq = index_update_wq(op);
|
||||
|
||||
barrier();
|
||||
|
||||
/*
|
||||
* We're not using wp->writes_lock here, so this is racey: that's ok,
|
||||
* because this is just for diagnostic purposes, and we're running out
|
||||
* of interrupt context here so if we were to take the log we'd have to
|
||||
* switch to spin_lock_irq()/irqsave(), which is not free:
|
||||
*/
|
||||
if (wp->state == WRITE_POINT_waiting_io)
|
||||
__wp_update_state(wp, WRITE_POINT_waiting_work);
|
||||
|
||||
op->btree_update_ready = true;
|
||||
queue_work(wq, &wp->index_update_work);
|
||||
}
|
||||
@ -799,16 +858,21 @@ void bch2_write_point_do_index_updates(struct work_struct *work)
|
||||
|
||||
while (1) {
|
||||
spin_lock(&wp->writes_lock);
|
||||
op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list);
|
||||
if (op && !op->btree_update_ready)
|
||||
op = NULL;
|
||||
if (op)
|
||||
list_del(&op->wp_list);
|
||||
list_for_each_entry(op, &wp->writes, wp_list)
|
||||
if (op->btree_update_ready) {
|
||||
list_del(&op->wp_list);
|
||||
goto unlock;
|
||||
}
|
||||
op = NULL;
|
||||
unlock:
|
||||
wp_update_state(wp, op != NULL);
|
||||
spin_unlock(&wp->writes_lock);
|
||||
|
||||
if (!op)
|
||||
break;
|
||||
|
||||
op->flags |= BCH_WRITE_IN_WORKER;
|
||||
|
||||
__bch2_write_index(op);
|
||||
|
||||
if (!(op->flags & BCH_WRITE_DONE))
|
||||
@ -850,12 +914,10 @@ static void bch2_write_endio(struct bio *bio)
|
||||
if (wbio->put_bio)
|
||||
bio_put(bio);
|
||||
|
||||
if (parent) {
|
||||
if (parent)
|
||||
bio_endio(&parent->bio);
|
||||
return;
|
||||
}
|
||||
|
||||
closure_put(cl);
|
||||
else
|
||||
closure_put(cl);
|
||||
}
|
||||
|
||||
static void init_append_extent(struct bch_write_op *op,
|
||||
@ -863,7 +925,6 @@ static void init_append_extent(struct bch_write_op *op,
|
||||
struct bversion version,
|
||||
struct bch_extent_crc_unpacked crc)
|
||||
{
|
||||
struct bch_fs *c = op->c;
|
||||
struct bkey_i_extent *e;
|
||||
|
||||
op->pos.offset += crc.uncompressed_size;
|
||||
@ -878,7 +939,7 @@ static void init_append_extent(struct bch_write_op *op,
|
||||
crc.nonce)
|
||||
bch2_extent_crc_append(&e->k_i, crc);
|
||||
|
||||
bch2_alloc_sectors_append_ptrs_inlined(c, wp, &e->k_i, crc.compressed_size,
|
||||
bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size,
|
||||
op->flags & BCH_WRITE_CACHED);
|
||||
|
||||
bch2_keylist_push(&op->insert_keys);
|
||||
@ -1360,8 +1421,6 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
|
||||
bkey_start_pos(&orig->k), orig->k.p,
|
||||
BTREE_ITER_INTENT, k,
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL, ({
|
||||
BUG_ON(bkey_ge(bkey_start_pos(k.k), orig->k.p));
|
||||
|
||||
bch2_nocow_write_convert_one_unwritten(&trans, &iter, orig, k, op->new_i_size);
|
||||
}));
|
||||
|
||||
@ -1641,10 +1700,11 @@ again:
|
||||
goto err;
|
||||
}
|
||||
|
||||
EBUG_ON(!wp);
|
||||
|
||||
bch2_open_bucket_get(c, wp, &op->open_buckets);
|
||||
ret = bch2_write_extent(op, wp, &bio);
|
||||
|
||||
if (ret >= 0)
|
||||
bch2_open_bucket_get(c, wp, &op->open_buckets);
|
||||
bch2_alloc_sectors_done_inlined(c, wp);
|
||||
err:
|
||||
if (ret <= 0) {
|
||||
@ -1652,6 +1712,8 @@ err:
|
||||
spin_lock(&wp->writes_lock);
|
||||
op->wp = wp;
|
||||
list_add_tail(&op->wp_list, &wp->writes);
|
||||
if (wp->state == WRITE_POINT_stopped)
|
||||
__wp_update_state(wp, WRITE_POINT_waiting_io);
|
||||
spin_unlock(&wp->writes_lock);
|
||||
}
|
||||
|
||||
@ -1683,7 +1745,9 @@ err:
|
||||
* synchronously here if we weren't able to submit all of the IO at
|
||||
* once, as that signals backpressure to the caller.
|
||||
*/
|
||||
if ((op->flags & BCH_WRITE_SYNC) || !(op->flags & BCH_WRITE_DONE)) {
|
||||
if ((op->flags & BCH_WRITE_SYNC) ||
|
||||
(!(op->flags & BCH_WRITE_DONE) &&
|
||||
!(op->flags & BCH_WRITE_IN_WORKER))) {
|
||||
closure_sync(&op->cl);
|
||||
__bch2_write_index(op);
|
||||
|
||||
@ -1705,6 +1769,9 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
|
||||
unsigned sectors;
|
||||
int ret;
|
||||
|
||||
op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
|
||||
bch2_check_set_feature(op->c, BCH_FEATURE_inline_data);
|
||||
|
||||
ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys,
|
||||
@ -1732,9 +1799,6 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
|
||||
set_bkey_val_bytes(&id->k, data_len);
|
||||
bch2_keylist_push(&op->insert_keys);
|
||||
|
||||
op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
|
||||
__bch2_write_index(op);
|
||||
err:
|
||||
bch2_write_done(&op->cl);
|
||||
@ -1782,7 +1846,7 @@ void bch2_write(struct closure *cl)
|
||||
}
|
||||
|
||||
if (c->opts.nochanges ||
|
||||
!percpu_ref_tryget_live(&c->writes)) {
|
||||
!bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
|
||||
op->error = -BCH_ERR_erofs_no_writes;
|
||||
goto err;
|
||||
}
|
||||
@ -1861,10 +1925,12 @@ static void promote_free(struct bch_fs *c, struct promote_op *op)
|
||||
{
|
||||
int ret;
|
||||
|
||||
bch2_data_update_exit(&op->write);
|
||||
|
||||
ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
|
||||
bch_promote_params);
|
||||
BUG_ON(ret);
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_promote);
|
||||
kfree_rcu(op, rcu);
|
||||
}
|
||||
|
||||
@ -1876,8 +1942,6 @@ static void promote_done(struct bch_write_op *wop)
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
|
||||
op->start_time);
|
||||
|
||||
bch2_data_update_exit(&op->write);
|
||||
promote_free(c, op);
|
||||
}
|
||||
|
||||
@ -1898,7 +1962,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
|
||||
bch2_data_update_read_done(&op->write, rbio->pick.crc);
|
||||
}
|
||||
|
||||
static struct promote_op *__promote_alloc(struct bch_fs *c,
|
||||
static struct promote_op *__promote_alloc(struct btree_trans *trans,
|
||||
enum btree_id btree_id,
|
||||
struct bkey_s_c k,
|
||||
struct bpos pos,
|
||||
@ -1907,12 +1971,13 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
|
||||
unsigned sectors,
|
||||
struct bch_read_bio **rbio)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct promote_op *op = NULL;
|
||||
struct bio *bio;
|
||||
unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
|
||||
int ret;
|
||||
|
||||
if (!percpu_ref_tryget_live(&c->writes))
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
|
||||
return NULL;
|
||||
|
||||
op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO);
|
||||
@ -1950,7 +2015,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
|
||||
bio = &op->write.op.wbio.bio;
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
|
||||
|
||||
ret = bch2_data_update_init(c, &op->write,
|
||||
ret = bch2_data_update_init(trans, NULL, &op->write,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
opts,
|
||||
(struct data_update_opts) {
|
||||
@ -1959,6 +2024,13 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
|
||||
.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
|
||||
},
|
||||
btree_id, k);
|
||||
if (ret == -BCH_ERR_nocow_lock_blocked) {
|
||||
ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
|
||||
bch_promote_params);
|
||||
BUG_ON(ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
BUG_ON(ret);
|
||||
op->write.op.end_io = promote_done;
|
||||
|
||||
@ -1969,21 +2041,22 @@ err:
|
||||
kfree(*rbio);
|
||||
*rbio = NULL;
|
||||
kfree(op);
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_promote);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
noinline
|
||||
static struct promote_op *promote_alloc(struct bch_fs *c,
|
||||
struct bvec_iter iter,
|
||||
struct bkey_s_c k,
|
||||
struct extent_ptr_decoded *pick,
|
||||
struct bch_io_opts opts,
|
||||
unsigned flags,
|
||||
struct bch_read_bio **rbio,
|
||||
bool *bounce,
|
||||
bool *read_full)
|
||||
static struct promote_op *promote_alloc(struct btree_trans *trans,
|
||||
struct bvec_iter iter,
|
||||
struct bkey_s_c k,
|
||||
struct extent_ptr_decoded *pick,
|
||||
struct bch_io_opts opts,
|
||||
unsigned flags,
|
||||
struct bch_read_bio **rbio,
|
||||
bool *bounce,
|
||||
bool *read_full)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
|
||||
/* data might have to be decompressed in the write path: */
|
||||
unsigned sectors = promote_full
|
||||
@ -1997,7 +2070,7 @@ static struct promote_op *promote_alloc(struct bch_fs *c,
|
||||
if (!should_promote(c, k, pos, opts, flags))
|
||||
return NULL;
|
||||
|
||||
promote = __promote_alloc(c,
|
||||
promote = __promote_alloc(trans,
|
||||
k.k->type == KEY_TYPE_reflink_v
|
||||
? BTREE_ID_reflink
|
||||
: BTREE_ID_extents,
|
||||
@ -2283,7 +2356,8 @@ static void __bch2_read_endio(struct work_struct *work)
|
||||
}
|
||||
|
||||
csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
|
||||
if (bch2_crc_cmp(csum, rbio->pick.crc.csum))
|
||||
if (bch2_crc_cmp(csum, rbio->pick.crc.csum) &&
|
||||
!IS_ENABLED(CONFIG_BCACHEFS_NO_IO))
|
||||
goto csum_err;
|
||||
|
||||
/*
|
||||
@ -2604,7 +2678,7 @@ retry_pick:
|
||||
}
|
||||
|
||||
if (orig->opts.promote_target)
|
||||
promote = promote_alloc(c, iter, k, &pick, orig->opts, flags,
|
||||
promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
|
||||
&rbio, &bounce, &read_full);
|
||||
|
||||
if (!read_full) {
|
||||
@ -2734,10 +2808,21 @@ get_bio:
|
||||
bio_sectors(&rbio->bio));
|
||||
bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
|
||||
|
||||
if (likely(!(flags & BCH_READ_IN_RETRY)))
|
||||
submit_bio(&rbio->bio);
|
||||
else
|
||||
submit_bio_wait(&rbio->bio);
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_NO_IO)) {
|
||||
if (likely(!(flags & BCH_READ_IN_RETRY)))
|
||||
bio_endio(&rbio->bio);
|
||||
} else {
|
||||
if (likely(!(flags & BCH_READ_IN_RETRY)))
|
||||
submit_bio(&rbio->bio);
|
||||
else
|
||||
submit_bio_wait(&rbio->bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* We just submitted IO which may block, we expect relock fail
|
||||
* events and shouldn't count them:
|
||||
*/
|
||||
trans->notrace_relock_fail = true;
|
||||
} else {
|
||||
/* Attempting reconstruct read: */
|
||||
if (bch2_ec_read_extent(c, rbio)) {
|
||||
|
@ -15,7 +15,11 @@
|
||||
void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
|
||||
void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
|
||||
|
||||
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
|
||||
void bch2_latency_acct(struct bch_dev *, u64, int);
|
||||
#else
|
||||
static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {}
|
||||
#endif
|
||||
|
||||
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
|
||||
enum bch_data_type, const struct bkey_i *, bool);
|
||||
@ -25,23 +29,41 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
|
||||
const char *bch2_blk_status_to_str(blk_status_t);
|
||||
|
||||
enum bch_write_flags {
|
||||
BCH_WRITE_ALLOC_NOWAIT = (1 << 0),
|
||||
BCH_WRITE_CACHED = (1 << 1),
|
||||
BCH_WRITE_DATA_ENCODED = (1 << 2),
|
||||
BCH_WRITE_PAGES_STABLE = (1 << 3),
|
||||
BCH_WRITE_PAGES_OWNED = (1 << 4),
|
||||
BCH_WRITE_ONLY_SPECIFIED_DEVS = (1 << 5),
|
||||
BCH_WRITE_WROTE_DATA_INLINE = (1 << 6),
|
||||
BCH_WRITE_CHECK_ENOSPC = (1 << 7),
|
||||
BCH_WRITE_SYNC = (1 << 8),
|
||||
BCH_WRITE_MOVE = (1 << 9),
|
||||
|
||||
/* Internal: */
|
||||
BCH_WRITE_DONE = (1 << 10),
|
||||
BCH_WRITE_IO_ERROR = (1 << 11),
|
||||
BCH_WRITE_CONVERT_UNWRITTEN = (1 << 12),
|
||||
__BCH_WRITE_ALLOC_NOWAIT,
|
||||
__BCH_WRITE_CACHED,
|
||||
__BCH_WRITE_DATA_ENCODED,
|
||||
__BCH_WRITE_PAGES_STABLE,
|
||||
__BCH_WRITE_PAGES_OWNED,
|
||||
__BCH_WRITE_ONLY_SPECIFIED_DEVS,
|
||||
__BCH_WRITE_WROTE_DATA_INLINE,
|
||||
__BCH_WRITE_FROM_INTERNAL,
|
||||
__BCH_WRITE_CHECK_ENOSPC,
|
||||
__BCH_WRITE_SYNC,
|
||||
__BCH_WRITE_MOVE,
|
||||
__BCH_WRITE_IN_WORKER,
|
||||
__BCH_WRITE_DONE,
|
||||
__BCH_WRITE_IO_ERROR,
|
||||
__BCH_WRITE_CONVERT_UNWRITTEN,
|
||||
};
|
||||
|
||||
#define BCH_WRITE_ALLOC_NOWAIT (1U << __BCH_WRITE_ALLOC_NOWAIT)
|
||||
#define BCH_WRITE_CACHED (1U << __BCH_WRITE_CACHED)
|
||||
#define BCH_WRITE_DATA_ENCODED (1U << __BCH_WRITE_DATA_ENCODED)
|
||||
#define BCH_WRITE_PAGES_STABLE (1U << __BCH_WRITE_PAGES_STABLE)
|
||||
#define BCH_WRITE_PAGES_OWNED (1U << __BCH_WRITE_PAGES_OWNED)
|
||||
#define BCH_WRITE_ONLY_SPECIFIED_DEVS (1U << __BCH_WRITE_ONLY_SPECIFIED_DEVS)
|
||||
#define BCH_WRITE_WROTE_DATA_INLINE (1U << __BCH_WRITE_WROTE_DATA_INLINE)
|
||||
#define BCH_WRITE_FROM_INTERNAL (1U << __BCH_WRITE_FROM_INTERNAL)
|
||||
#define BCH_WRITE_CHECK_ENOSPC (1U << __BCH_WRITE_CHECK_ENOSPC)
|
||||
#define BCH_WRITE_SYNC (1U << __BCH_WRITE_SYNC)
|
||||
#define BCH_WRITE_MOVE (1U << __BCH_WRITE_MOVE)
|
||||
|
||||
/* Internal: */
|
||||
#define BCH_WRITE_IN_WORKER (1U << __BCH_WRITE_IN_WORKER)
|
||||
#define BCH_WRITE_DONE (1U << __BCH_WRITE_DONE)
|
||||
#define BCH_WRITE_IO_ERROR (1U << __BCH_WRITE_IO_ERROR)
|
||||
#define BCH_WRITE_CONVERT_UNWRITTEN (1U << __BCH_WRITE_CONVERT_UNWRITTEN)
|
||||
|
||||
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
|
||||
{
|
||||
return op->alloc_reserve == RESERVE_movinggc
|
||||
|
@ -225,7 +225,7 @@ static int journal_entry_open(struct journal *j)
|
||||
if (!fifo_free(&j->pin))
|
||||
return JOURNAL_ERR_journal_pin_full;
|
||||
|
||||
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) - 1)
|
||||
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
|
||||
return JOURNAL_ERR_max_in_flight;
|
||||
|
||||
BUG_ON(!j->cur_entry_sectors);
|
||||
|
@ -110,7 +110,6 @@
|
||||
*/
|
||||
|
||||
#include <linux/hash.h>
|
||||
#include <linux/prefetch.h>
|
||||
|
||||
#include "journal_types.h"
|
||||
|
||||
@ -305,26 +304,15 @@ static inline int journal_res_get_fast(struct journal *j,
|
||||
{
|
||||
union journal_res_state old, new;
|
||||
u64 v = atomic64_read(&j->reservations.counter);
|
||||
unsigned u64s, offset;
|
||||
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
|
||||
/*
|
||||
* Round up the end of the journal reservation to the next
|
||||
* cacheline boundary:
|
||||
*/
|
||||
u64s = res->u64s;
|
||||
offset = sizeof(struct jset) / sizeof(u64) +
|
||||
new.cur_entry_offset + u64s;
|
||||
u64s += ((offset - 1) & ((SMP_CACHE_BYTES / sizeof(u64)) - 1)) + 1;
|
||||
|
||||
|
||||
/*
|
||||
* Check if there is still room in the current journal
|
||||
* entry:
|
||||
*/
|
||||
if (new.cur_entry_offset + u64s > j->cur_entry_u64s)
|
||||
if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
|
||||
return 0;
|
||||
|
||||
EBUG_ON(!journal_state_count(new, new.idx));
|
||||
@ -332,7 +320,7 @@ static inline int journal_res_get_fast(struct journal *j,
|
||||
if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark)
|
||||
return 0;
|
||||
|
||||
new.cur_entry_offset += u64s;
|
||||
new.cur_entry_offset += res->u64s;
|
||||
journal_state_inc(&new);
|
||||
|
||||
/*
|
||||
@ -349,15 +337,8 @@ static inline int journal_res_get_fast(struct journal *j,
|
||||
|
||||
res->ref = true;
|
||||
res->idx = old.idx;
|
||||
res->u64s = u64s;
|
||||
res->offset = old.cur_entry_offset;
|
||||
res->seq = le64_to_cpu(j->buf[old.idx].data->seq);
|
||||
|
||||
offset = res->offset;
|
||||
while (offset < res->offset + res->u64s) {
|
||||
prefetchw(vstruct_idx(j->buf[res->idx].data, offset));
|
||||
offset += SMP_CACHE_BYTES / sizeof(u64);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -154,7 +154,7 @@ replace:
|
||||
i->nr_ptrs = 0;
|
||||
i->csum_good = entry_ptr.csum_good;
|
||||
i->ignore = false;
|
||||
memcpy(&i->j, j, bytes);
|
||||
unsafe_memcpy(&i->j, j, bytes, "embedded variable length struct");
|
||||
i->ptrs[i->nr_ptrs++] = entry_ptr;
|
||||
|
||||
if (dup) {
|
||||
@ -341,7 +341,7 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c,
|
||||
int ret = journal_validate_key(c, jset, entry,
|
||||
entry->level,
|
||||
entry->btree_id,
|
||||
k, version, big_endian, write);
|
||||
k, version, big_endian, write|BKEY_INVALID_FROM_JOURNAL);
|
||||
if (ret == FSCK_DELETED_KEY)
|
||||
continue;
|
||||
|
||||
@ -662,7 +662,8 @@ static int journal_entry_overwrite_validate(struct bch_fs *c,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
return journal_entry_btree_keys_validate(c, jset, entry, version, big_endian, write);
|
||||
return journal_entry_btree_keys_validate(c, jset, entry,
|
||||
version, big_endian, READ);
|
||||
}
|
||||
|
||||
static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
@ -1498,6 +1499,8 @@ static void journal_write_done(struct closure *cl)
|
||||
|
||||
bch2_do_discards(c);
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
|
||||
bch2_reset_alloc_cursors(c);
|
||||
}
|
||||
} else if (!j->err_seq || seq < j->err_seq)
|
||||
j->err_seq = seq;
|
||||
|
@ -347,13 +347,13 @@ void bch2_journal_pin_put(struct journal *j, u64 seq)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __journal_pin_drop(struct journal *j,
|
||||
static inline bool __journal_pin_drop(struct journal *j,
|
||||
struct journal_entry_pin *pin)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
|
||||
if (!journal_pin_active(pin))
|
||||
return;
|
||||
return false;
|
||||
|
||||
if (j->flush_in_progress == pin)
|
||||
j->flush_in_progress_dropped = true;
|
||||
@ -363,19 +363,19 @@ static inline void __journal_pin_drop(struct journal *j,
|
||||
list_del_init(&pin->list);
|
||||
|
||||
/*
|
||||
* Unpinning a journal entry may make journal_next_bucket() succeed if
|
||||
* Unpinning a journal entry make make journal_next_bucket() succeed, if
|
||||
* writing a new last_seq will now make another bucket available:
|
||||
*/
|
||||
if (atomic_dec_and_test(&pin_list->count) &&
|
||||
pin_list == &fifo_peek_front(&j->pin))
|
||||
bch2_journal_reclaim_fast(j);
|
||||
return atomic_dec_and_test(&pin_list->count) &&
|
||||
pin_list == &fifo_peek_front(&j->pin);
|
||||
}
|
||||
|
||||
void bch2_journal_pin_drop(struct journal *j,
|
||||
struct journal_entry_pin *pin)
|
||||
{
|
||||
spin_lock(&j->lock);
|
||||
__journal_pin_drop(j, pin);
|
||||
if (__journal_pin_drop(j, pin))
|
||||
bch2_journal_reclaim_fast(j);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
@ -384,6 +384,7 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
|
||||
journal_pin_flush_fn flush_fn)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
bool reclaim;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
@ -400,7 +401,7 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
|
||||
|
||||
pin_list = journal_seq_pin(j, seq);
|
||||
|
||||
__journal_pin_drop(j, pin);
|
||||
reclaim = __journal_pin_drop(j, pin);
|
||||
|
||||
atomic_inc(&pin_list->count);
|
||||
pin->seq = seq;
|
||||
@ -412,6 +413,9 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
|
||||
list_add(&pin->list, &pin_list->list);
|
||||
else
|
||||
list_add(&pin->list, &pin_list->flushed);
|
||||
|
||||
if (reclaim)
|
||||
bch2_journal_reclaim_fast(j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/*
|
||||
@ -703,7 +707,7 @@ static int bch2_journal_reclaim_thread(void *arg)
|
||||
j->next_reclaim = now + delay;
|
||||
|
||||
while (1) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
|
||||
if (kthread_should_stop())
|
||||
break;
|
||||
if (j->reclaim_kicked)
|
||||
@ -714,9 +718,9 @@ static int bch2_journal_reclaim_thread(void *arg)
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (journal_empty)
|
||||
freezable_schedule();
|
||||
schedule();
|
||||
else if (time_after(j->next_reclaim, jiffies))
|
||||
freezable_schedule_timeout(j->next_reclaim - jiffies);
|
||||
schedule_timeout(j->next_reclaim - jiffies);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
@ -182,29 +182,32 @@ typedef DARRAY(u64) darray_u64;
|
||||
/* Embedded in struct bch_fs */
|
||||
struct journal {
|
||||
/* Fastpath stuff up front: */
|
||||
|
||||
unsigned long flags;
|
||||
struct {
|
||||
|
||||
union journal_res_state reservations;
|
||||
enum journal_watermark watermark;
|
||||
|
||||
union journal_preres_state prereserved;
|
||||
|
||||
} __aligned(SMP_CACHE_BYTES);
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
/* Max size of current journal entry */
|
||||
unsigned cur_entry_u64s;
|
||||
unsigned cur_entry_sectors;
|
||||
|
||||
/* Reserved space in journal entry to be used just prior to write */
|
||||
unsigned entry_u64s_reserved;
|
||||
|
||||
|
||||
/*
|
||||
* 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
|
||||
* insufficient devices:
|
||||
*/
|
||||
enum journal_errors cur_entry_error;
|
||||
|
||||
union journal_preres_state prereserved;
|
||||
|
||||
/* Reserved space in journal entry to be used just prior to write */
|
||||
unsigned entry_u64s_reserved;
|
||||
|
||||
unsigned buf_size_want;
|
||||
|
||||
/*
|
||||
* We may queue up some things to be journalled (log messages) before
|
||||
* the journal has actually started - stash them here:
|
||||
@ -298,15 +301,15 @@ struct journal {
|
||||
u64 nr_flush_writes;
|
||||
u64 nr_noflush_writes;
|
||||
|
||||
struct time_stats *flush_write_time;
|
||||
struct time_stats *noflush_write_time;
|
||||
struct time_stats *blocked_time;
|
||||
struct time_stats *flush_seq_time;
|
||||
struct bch2_time_stats *flush_write_time;
|
||||
struct bch2_time_stats *noflush_write_time;
|
||||
struct bch2_time_stats *blocked_time;
|
||||
struct bch2_time_stats *flush_seq_time;
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
struct lockdep_map res_map;
|
||||
#endif
|
||||
};
|
||||
} __aligned(SMP_CACHE_BYTES);
|
||||
|
||||
/*
|
||||
* Embedded in struct bch_dev. First three fields refer to the array of journal
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
/* KEY_TYPE_lru is obsolete: */
|
||||
int bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
const struct bch_lru *lru = bkey_s_c_to_lru(k).v;
|
||||
|
||||
@ -20,6 +20,12 @@ int bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
if (!lru_pos_time(k.k->p)) {
|
||||
prt_printf(err, "lru entry at time=0");
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -31,6 +37,15 @@ void bch2_lru_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, "idx %llu", le64_to_cpu(lru->idx));
|
||||
}
|
||||
|
||||
void bch2_lru_pos_to_text(struct printbuf *out, struct bpos lru)
|
||||
{
|
||||
prt_printf(out, "%llu:%llu -> %llu:%llu",
|
||||
lru_pos_id(lru),
|
||||
lru_pos_time(lru),
|
||||
u64_to_bucket(lru.offset).inode,
|
||||
u64_to_bucket(lru.offset).offset);
|
||||
}
|
||||
|
||||
static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
|
||||
u64 dev_bucket, u64 time, unsigned key_type)
|
||||
{
|
||||
|
@ -22,9 +22,11 @@ static inline u64 lru_pos_time(struct bpos pos)
|
||||
return pos.inode & ~(~0ULL << LRU_TIME_BITS);
|
||||
}
|
||||
|
||||
int bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
void bch2_lru_pos_to_text(struct printbuf *, struct bpos);
|
||||
|
||||
#define bch2_bkey_ops_lru ((struct bkey_ops) { \
|
||||
.key_invalid = bch2_lru_invalid, \
|
||||
.val_to_text = bch2_lru_to_text, \
|
||||
|
@ -61,7 +61,7 @@ static void move_free(struct moving_io *io)
|
||||
|
||||
bch2_data_update_exit(&io->write);
|
||||
wake_up(&ctxt->wait);
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_move);
|
||||
kfree(io);
|
||||
}
|
||||
|
||||
@ -74,6 +74,7 @@ static void move_write_done(struct bch_write_op *op)
|
||||
ctxt->write_error = true;
|
||||
|
||||
atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors);
|
||||
atomic_dec(&io->write.ctxt->write_ios);
|
||||
move_free(io);
|
||||
closure_put(&ctxt->cl);
|
||||
}
|
||||
@ -87,11 +88,12 @@ static void move_write(struct moving_io *io)
|
||||
|
||||
closure_get(&io->write.ctxt->cl);
|
||||
atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
|
||||
atomic_inc(&io->write.ctxt->write_ios);
|
||||
|
||||
bch2_data_update_read_done(&io->write, io->rbio.pick.crc);
|
||||
}
|
||||
|
||||
static inline struct moving_io *next_pending_write(struct moving_context *ctxt)
|
||||
struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt)
|
||||
{
|
||||
struct moving_io *io =
|
||||
list_first_entry_or_null(&ctxt->reads, struct moving_io, list);
|
||||
@ -105,35 +107,27 @@ static void move_read_endio(struct bio *bio)
|
||||
struct moving_context *ctxt = io->write.ctxt;
|
||||
|
||||
atomic_sub(io->read_sectors, &ctxt->read_sectors);
|
||||
atomic_dec(&ctxt->read_ios);
|
||||
io->read_completed = true;
|
||||
|
||||
wake_up(&ctxt->wait);
|
||||
closure_put(&ctxt->cl);
|
||||
}
|
||||
|
||||
static void do_pending_writes(struct moving_context *ctxt, struct btree_trans *trans)
|
||||
void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt,
|
||||
struct btree_trans *trans)
|
||||
{
|
||||
struct moving_io *io;
|
||||
|
||||
if (trans)
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
while ((io = next_pending_write(ctxt))) {
|
||||
while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
|
||||
list_del(&io->list);
|
||||
move_write(io);
|
||||
}
|
||||
}
|
||||
|
||||
#define move_ctxt_wait_event(_ctxt, _trans, _cond) \
|
||||
do { \
|
||||
do_pending_writes(_ctxt, _trans); \
|
||||
\
|
||||
if (_cond) \
|
||||
break; \
|
||||
__wait_event((_ctxt)->wait, \
|
||||
next_pending_write(_ctxt) || (_cond)); \
|
||||
} while (1)
|
||||
|
||||
static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt,
|
||||
struct btree_trans *trans)
|
||||
{
|
||||
@ -148,7 +142,11 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt)
|
||||
{
|
||||
move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads));
|
||||
closure_sync(&ctxt->cl);
|
||||
|
||||
EBUG_ON(atomic_read(&ctxt->write_sectors));
|
||||
EBUG_ON(atomic_read(&ctxt->write_ios));
|
||||
EBUG_ON(atomic_read(&ctxt->read_sectors));
|
||||
EBUG_ON(atomic_read(&ctxt->read_ios));
|
||||
|
||||
if (ctxt->stats) {
|
||||
progress_list_del(ctxt->c, ctxt->stats);
|
||||
@ -257,7 +255,7 @@ static int bch2_move_extent(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!percpu_ref_tryget_live(&c->writes))
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_move))
|
||||
return -BCH_ERR_erofs_no_writes;
|
||||
|
||||
/*
|
||||
@ -299,8 +297,8 @@ static int bch2_move_extent(struct btree_trans *trans,
|
||||
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
|
||||
io->rbio.bio.bi_end_io = move_read_endio;
|
||||
|
||||
ret = bch2_data_update_init(c, &io->write, ctxt->wp, io_opts,
|
||||
data_opts, btree_id, k);
|
||||
ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
|
||||
io_opts, data_opts, btree_id, k);
|
||||
if (ret && ret != -BCH_ERR_unwritten_extent_update)
|
||||
goto err_free_pages;
|
||||
|
||||
@ -323,6 +321,7 @@ static int bch2_move_extent(struct btree_trans *trans,
|
||||
trace_move_extent_read(k.k);
|
||||
|
||||
atomic_add(io->read_sectors, &ctxt->read_sectors);
|
||||
atomic_inc(&ctxt->read_ios);
|
||||
list_add_tail(&io->list, &ctxt->reads);
|
||||
|
||||
/*
|
||||
@ -341,7 +340,7 @@ err_free_pages:
|
||||
err_free:
|
||||
kfree(io);
|
||||
err:
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_move);
|
||||
trace_and_count(c, move_extent_alloc_mem_fail, k.k);
|
||||
return ret;
|
||||
}
|
||||
@ -412,13 +411,15 @@ static int move_ratelimit(struct btree_trans *trans,
|
||||
}
|
||||
} while (delay);
|
||||
|
||||
/*
|
||||
* XXX: these limits really ought to be per device, SSDs and hard drives
|
||||
* will want different limits
|
||||
*/
|
||||
move_ctxt_wait_event(ctxt, trans,
|
||||
atomic_read(&ctxt->write_sectors) <
|
||||
c->opts.move_bytes_in_flight >> 9);
|
||||
|
||||
move_ctxt_wait_event(ctxt, trans,
|
||||
atomic_read(&ctxt->read_sectors) <
|
||||
c->opts.move_bytes_in_flight >> 9);
|
||||
atomic_read(&ctxt->write_sectors) < c->opts.move_bytes_in_flight >> 9 &&
|
||||
atomic_read(&ctxt->read_sectors) < c->opts.move_bytes_in_flight >> 9 &&
|
||||
atomic_read(&ctxt->write_ios) < c->opts.move_ios_in_flight &&
|
||||
atomic_read(&ctxt->read_ios) < c->opts.move_ios_in_flight);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -24,10 +24,26 @@ struct moving_context {
|
||||
/* in flight sectors: */
|
||||
atomic_t read_sectors;
|
||||
atomic_t write_sectors;
|
||||
atomic_t read_ios;
|
||||
atomic_t write_ios;
|
||||
|
||||
wait_queue_head_t wait;
|
||||
};
|
||||
|
||||
#define move_ctxt_wait_event(_ctxt, _trans, _cond) \
|
||||
do { \
|
||||
bool cond_finished = false; \
|
||||
bch2_moving_ctxt_do_pending_writes(_ctxt, _trans); \
|
||||
\
|
||||
if (_cond) \
|
||||
break; \
|
||||
__wait_event((_ctxt)->wait, \
|
||||
bch2_moving_ctxt_next_pending_write(_ctxt) || \
|
||||
(cond_finished = (_cond))); \
|
||||
if (cond_finished) \
|
||||
break; \
|
||||
} while (1)
|
||||
|
||||
typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c,
|
||||
struct bch_io_opts *, struct data_update_opts *);
|
||||
|
||||
@ -35,6 +51,9 @@ void bch2_moving_ctxt_exit(struct moving_context *);
|
||||
void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *,
|
||||
struct bch_ratelimit *, struct bch_move_stats *,
|
||||
struct write_point_specifier, bool);
|
||||
struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *);
|
||||
void bch2_moving_ctxt_do_pending_writes(struct moving_context *,
|
||||
struct btree_trans *);
|
||||
|
||||
int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
|
||||
|
||||
|
@ -18,6 +18,8 @@ bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos
|
||||
return false;
|
||||
}
|
||||
|
||||
#define sign(v) (v < 0 ? -1 : v > 0 ? 1 : 0)
|
||||
|
||||
void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags)
|
||||
{
|
||||
u64 dev_bucket = bucket_to_u64(bucket);
|
||||
@ -27,6 +29,8 @@ void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos buc
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(l->b); i++)
|
||||
if (l->b[i] == dev_bucket) {
|
||||
BUG_ON(sign(atomic_read(&l->l[i])) != lock_val);
|
||||
|
||||
if (!atomic_sub_return(lock_val, &l->l[i]))
|
||||
closure_wake_up(&l->wait);
|
||||
return;
|
||||
@ -35,8 +39,8 @@ void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos buc
|
||||
BUG();
|
||||
}
|
||||
|
||||
static bool bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l,
|
||||
u64 dev_bucket, int flags)
|
||||
bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l,
|
||||
u64 dev_bucket, int flags)
|
||||
{
|
||||
int v, lock_val = flags ? 1 : -1;
|
||||
unsigned i;
|
||||
@ -69,11 +73,11 @@ void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
|
||||
struct nocow_lock_bucket *l,
|
||||
u64 dev_bucket, int flags)
|
||||
{
|
||||
if (!bch2_bucket_nocow_trylock(l, dev_bucket, flags)) {
|
||||
if (!__bch2_bucket_nocow_trylock(l, dev_bucket, flags)) {
|
||||
struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks);
|
||||
u64 start_time = local_clock();
|
||||
|
||||
__closure_wait_event(&l->wait, bch2_bucket_nocow_trylock(l, dev_bucket, flags));
|
||||
__closure_wait_event(&l->wait, __bch2_bucket_nocow_trylock(l, dev_bucket, flags));
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
|
||||
}
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ static inline struct nocow_lock_bucket *bucket_nocow_lock(struct bucket_nocow_lo
|
||||
|
||||
bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *, struct bpos);
|
||||
void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, struct bpos, int);
|
||||
bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *, u64, int);
|
||||
void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *,
|
||||
struct nocow_lock_bucket *, u64, int);
|
||||
|
||||
@ -32,6 +33,15 @@ static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
|
||||
__bch2_bucket_nocow_lock(t, l, dev_bucket, flags);
|
||||
}
|
||||
|
||||
static inline bool bch2_bucket_nocow_trylock(struct bucket_nocow_lock_table *t,
|
||||
struct bpos bucket, int flags)
|
||||
{
|
||||
u64 dev_bucket = bucket_to_u64(bucket);
|
||||
struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
|
||||
|
||||
return __bch2_bucket_nocow_trylock(l, dev_bucket, flags);
|
||||
}
|
||||
|
||||
void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *);
|
||||
|
||||
int bch2_fs_nocow_locking_init(struct bch_fs *);
|
||||
|
@ -9,8 +9,6 @@
|
||||
#include "super-io.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <linux/pretty-printers.h>
|
||||
|
||||
#define x(t, n) [n] = #t,
|
||||
|
||||
const char * const bch2_metadata_versions[] = {
|
||||
@ -284,7 +282,7 @@ int bch2_opt_parse(struct bch_fs *c,
|
||||
if (ret < 0) {
|
||||
if (err)
|
||||
prt_printf(err, "%s: must be a number",
|
||||
opt->attr.name);
|
||||
opt->attr.name);
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
@ -293,7 +291,7 @@ int bch2_opt_parse(struct bch_fs *c,
|
||||
if (ret < 0) {
|
||||
if (err)
|
||||
prt_printf(err, "%s: invalid selection",
|
||||
opt->attr.name);
|
||||
opt->attr.name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -307,7 +305,7 @@ int bch2_opt_parse(struct bch_fs *c,
|
||||
if (ret < 0) {
|
||||
if (err)
|
||||
prt_printf(err, "%s: parse error",
|
||||
opt->attr.name);
|
||||
opt->attr.name);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
@ -294,7 +294,12 @@ enum opt_type {
|
||||
OPT_HUMAN_READABLE|OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_UINT(1024, U32_MAX), \
|
||||
BCH2_NO_SB_OPT, 1U << 20, \
|
||||
NULL, "Amount of IO in flight to keep in flight by the move path")\
|
||||
NULL, "Maximum Amount of IO to keep in flight by the move path")\
|
||||
x(move_ios_in_flight, u32, \
|
||||
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_UINT(1, 1024), \
|
||||
BCH2_NO_SB_OPT, 32, \
|
||||
NULL, "Maximum number of IOs to keep in flight by the move path")\
|
||||
x(fsck, u8, \
|
||||
OPT_FS|OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
@ -336,6 +341,11 @@ enum opt_type {
|
||||
OPT_BOOL(), \
|
||||
BCH2_NO_SB_OPT, false, \
|
||||
NULL, "Only read the journal, skip the rest of recovery")\
|
||||
x(journal_transaction_names, u8, \
|
||||
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \
|
||||
NULL, "Log transaction function names in journal") \
|
||||
x(noexcl, u8, \
|
||||
OPT_FS|OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
|
@ -4,16 +4,17 @@
|
||||
#include <linux/err.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/printbuf.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string_helpers.h>
|
||||
|
||||
#include "printbuf.h"
|
||||
|
||||
static inline unsigned printbuf_linelen(struct printbuf *buf)
|
||||
{
|
||||
return buf->pos - buf->last_newline;
|
||||
}
|
||||
|
||||
int printbuf_make_room(struct printbuf *out, unsigned extra)
|
||||
int bch2_printbuf_make_room(struct printbuf *out, unsigned extra)
|
||||
{
|
||||
unsigned new_size;
|
||||
char *buf;
|
||||
@ -44,13 +45,46 @@ int printbuf_make_room(struct printbuf *out, unsigned extra)
|
||||
out->size = new_size;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(printbuf_make_room);
|
||||
|
||||
void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list args)
|
||||
{
|
||||
int len;
|
||||
|
||||
do {
|
||||
va_list args2;
|
||||
|
||||
va_copy(args2, args);
|
||||
len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args2);
|
||||
} while (len + 1 >= printbuf_remaining(out) &&
|
||||
!bch2_printbuf_make_room(out, len + 1));
|
||||
|
||||
len = min_t(size_t, len,
|
||||
printbuf_remaining(out) ? printbuf_remaining(out) - 1 : 0);
|
||||
out->pos += len;
|
||||
}
|
||||
|
||||
void bch2_prt_printf(struct printbuf *out, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
int len;
|
||||
|
||||
do {
|
||||
va_start(args, fmt);
|
||||
len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args);
|
||||
va_end(args);
|
||||
} while (len + 1 >= printbuf_remaining(out) &&
|
||||
!bch2_printbuf_make_room(out, len + 1));
|
||||
|
||||
len = min_t(size_t, len,
|
||||
printbuf_remaining(out) ? printbuf_remaining(out) - 1 : 0);
|
||||
out->pos += len;
|
||||
}
|
||||
|
||||
/**
|
||||
* printbuf_str - returns printbuf's buf as a C string, guaranteed to be null
|
||||
* terminated
|
||||
*/
|
||||
const char *printbuf_str(const struct printbuf *buf)
|
||||
const char *bch2_printbuf_str(const struct printbuf *buf)
|
||||
{
|
||||
/*
|
||||
* If we've written to a printbuf then it's guaranteed to be a null
|
||||
@ -61,33 +95,29 @@ const char *printbuf_str(const struct printbuf *buf)
|
||||
? buf->buf
|
||||
: "";
|
||||
}
|
||||
EXPORT_SYMBOL(printbuf_str);
|
||||
|
||||
/**
|
||||
* printbuf_exit - exit a printbuf, freeing memory it owns and poisoning it
|
||||
* against accidental use.
|
||||
*/
|
||||
void printbuf_exit(struct printbuf *buf)
|
||||
void bch2_printbuf_exit(struct printbuf *buf)
|
||||
{
|
||||
if (buf->heap_allocated) {
|
||||
kfree(buf->buf);
|
||||
buf->buf = ERR_PTR(-EINTR); /* poison value */
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(printbuf_exit);
|
||||
|
||||
void printbuf_tabstops_reset(struct printbuf *buf)
|
||||
void bch2_printbuf_tabstops_reset(struct printbuf *buf)
|
||||
{
|
||||
buf->nr_tabstops = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(printbuf_tabstops_reset);
|
||||
|
||||
void printbuf_tabstop_pop(struct printbuf *buf)
|
||||
void bch2_printbuf_tabstop_pop(struct printbuf *buf)
|
||||
{
|
||||
if (buf->nr_tabstops)
|
||||
--buf->nr_tabstops;
|
||||
}
|
||||
EXPORT_SYMBOL(printbuf_tabstop_pop);
|
||||
|
||||
/*
|
||||
* printbuf_tabstop_set - add a tabstop, n spaces from the previous tabstop
|
||||
@ -99,7 +129,7 @@ EXPORT_SYMBOL(printbuf_tabstop_pop);
|
||||
* PRINTBUF_INLINE_TABSTOPS or setting tabstops more than 255 spaces from start
|
||||
* of line.
|
||||
*/
|
||||
int printbuf_tabstop_push(struct printbuf *buf, unsigned spaces)
|
||||
int bch2_printbuf_tabstop_push(struct printbuf *buf, unsigned spaces)
|
||||
{
|
||||
unsigned prev_tabstop = buf->nr_tabstops
|
||||
? buf->_tabstops[buf->nr_tabstops - 1]
|
||||
@ -112,7 +142,6 @@ int printbuf_tabstop_push(struct printbuf *buf, unsigned spaces)
|
||||
buf->has_indent_or_tabstops = true;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(printbuf_tabstop_push);
|
||||
|
||||
/**
|
||||
* printbuf_indent_add - add to the current indent level
|
||||
@ -123,7 +152,7 @@ EXPORT_SYMBOL(printbuf_tabstop_push);
|
||||
* Subsequent lines, and the current line if the output position is at the start
|
||||
* of the current line, will be indented by @spaces more spaces.
|
||||
*/
|
||||
void printbuf_indent_add(struct printbuf *buf, unsigned spaces)
|
||||
void bch2_printbuf_indent_add(struct printbuf *buf, unsigned spaces)
|
||||
{
|
||||
if (WARN_ON_ONCE(buf->indent + spaces < buf->indent))
|
||||
spaces = 0;
|
||||
@ -133,7 +162,6 @@ void printbuf_indent_add(struct printbuf *buf, unsigned spaces)
|
||||
|
||||
buf->has_indent_or_tabstops = true;
|
||||
}
|
||||
EXPORT_SYMBOL(printbuf_indent_add);
|
||||
|
||||
/**
|
||||
* printbuf_indent_sub - subtract from the current indent level
|
||||
@ -144,7 +172,7 @@ EXPORT_SYMBOL(printbuf_indent_add);
|
||||
* Subsequent lines, and the current line if the output position is at the start
|
||||
* of the current line, will be indented by @spaces less spaces.
|
||||
*/
|
||||
void printbuf_indent_sub(struct printbuf *buf, unsigned spaces)
|
||||
void bch2_printbuf_indent_sub(struct printbuf *buf, unsigned spaces)
|
||||
{
|
||||
if (WARN_ON_ONCE(spaces > buf->indent))
|
||||
spaces = buf->indent;
|
||||
@ -158,13 +186,12 @@ void printbuf_indent_sub(struct printbuf *buf, unsigned spaces)
|
||||
if (!buf->indent && !buf->nr_tabstops)
|
||||
buf->has_indent_or_tabstops = false;
|
||||
}
|
||||
EXPORT_SYMBOL(printbuf_indent_sub);
|
||||
|
||||
void prt_newline(struct printbuf *buf)
|
||||
void bch2_prt_newline(struct printbuf *buf)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
printbuf_make_room(buf, 1 + buf->indent);
|
||||
bch2_printbuf_make_room(buf, 1 + buf->indent);
|
||||
|
||||
__prt_char(buf, '\n');
|
||||
|
||||
@ -178,7 +205,6 @@ void prt_newline(struct printbuf *buf)
|
||||
buf->last_field = buf->pos;
|
||||
buf->cur_tabstop = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(prt_newline);
|
||||
|
||||
/*
|
||||
* Returns spaces from start of line, if set, or 0 if unset:
|
||||
@ -207,14 +233,13 @@ static void __prt_tab(struct printbuf *out)
|
||||
*
|
||||
* Advance output to the next tabstop by printing spaces.
|
||||
*/
|
||||
void prt_tab(struct printbuf *out)
|
||||
void bch2_prt_tab(struct printbuf *out)
|
||||
{
|
||||
if (WARN_ON(!cur_tabstop(out)))
|
||||
return;
|
||||
|
||||
__prt_tab(out);
|
||||
}
|
||||
EXPORT_SYMBOL(prt_tab);
|
||||
|
||||
static void __prt_tab_rjust(struct printbuf *buf)
|
||||
{
|
||||
@ -222,7 +247,7 @@ static void __prt_tab_rjust(struct printbuf *buf)
|
||||
int pad = (int) cur_tabstop(buf) - (int) printbuf_linelen(buf);
|
||||
|
||||
if (pad > 0) {
|
||||
printbuf_make_room(buf, pad);
|
||||
bch2_printbuf_make_room(buf, pad);
|
||||
|
||||
if (buf->last_field + pad < buf->size)
|
||||
memmove(buf->buf + buf->last_field + pad,
|
||||
@ -250,14 +275,13 @@ static void __prt_tab_rjust(struct printbuf *buf)
|
||||
* Advance output to the next tabstop by inserting spaces immediately after the
|
||||
* previous tabstop, right justifying previously outputted text.
|
||||
*/
|
||||
void prt_tab_rjust(struct printbuf *buf)
|
||||
void bch2_prt_tab_rjust(struct printbuf *buf)
|
||||
{
|
||||
if (WARN_ON(!cur_tabstop(buf)))
|
||||
return;
|
||||
|
||||
__prt_tab_rjust(buf);
|
||||
}
|
||||
EXPORT_SYMBOL(prt_tab_rjust);
|
||||
|
||||
/**
|
||||
* prt_bytes_indented - Print an array of chars, handling embedded control characters
|
||||
@ -271,7 +295,7 @@ EXPORT_SYMBOL(prt_tab_rjust);
|
||||
* \t: prt_tab advance to next tabstop
|
||||
* \r: prt_tab_rjust advance to next tabstop, with right justification
|
||||
*/
|
||||
void prt_bytes_indented(struct printbuf *out, const char *str, unsigned count)
|
||||
void bch2_prt_bytes_indented(struct printbuf *out, const char *str, unsigned count)
|
||||
{
|
||||
const char *unprinted_start = str;
|
||||
const char *end = str + count;
|
||||
@ -286,7 +310,7 @@ void prt_bytes_indented(struct printbuf *out, const char *str, unsigned count)
|
||||
case '\n':
|
||||
prt_bytes(out, unprinted_start, str - unprinted_start);
|
||||
unprinted_start = str + 1;
|
||||
prt_newline(out);
|
||||
bch2_prt_newline(out);
|
||||
break;
|
||||
case '\t':
|
||||
if (likely(cur_tabstop(out))) {
|
||||
@ -309,34 +333,31 @@ void prt_bytes_indented(struct printbuf *out, const char *str, unsigned count)
|
||||
|
||||
prt_bytes(out, unprinted_start, str - unprinted_start);
|
||||
}
|
||||
EXPORT_SYMBOL(prt_bytes_indented);
|
||||
|
||||
/**
|
||||
* prt_human_readable_u64 - Print out a u64 in human readable units
|
||||
*
|
||||
* Units of 2^10 (default) or 10^3 are controlled via @buf->si_units
|
||||
*/
|
||||
void prt_human_readable_u64(struct printbuf *buf, u64 v)
|
||||
void bch2_prt_human_readable_u64(struct printbuf *buf, u64 v)
|
||||
{
|
||||
printbuf_make_room(buf, 10);
|
||||
bch2_printbuf_make_room(buf, 10);
|
||||
buf->pos += string_get_size(v, 1, !buf->si_units,
|
||||
buf->buf + buf->pos,
|
||||
printbuf_remaining_size(buf));
|
||||
}
|
||||
EXPORT_SYMBOL(prt_human_readable_u64);
|
||||
|
||||
/**
|
||||
* prt_human_readable_s64 - Print out a s64 in human readable units
|
||||
*
|
||||
* Units of 2^10 (default) or 10^3 are controlled via @buf->si_units
|
||||
*/
|
||||
void prt_human_readable_s64(struct printbuf *buf, s64 v)
|
||||
void bch2_prt_human_readable_s64(struct printbuf *buf, s64 v)
|
||||
{
|
||||
if (v < 0)
|
||||
prt_char(buf, '-');
|
||||
prt_human_readable_u64(buf, abs(v));
|
||||
bch2_prt_human_readable_u64(buf, abs(v));
|
||||
}
|
||||
EXPORT_SYMBOL(prt_human_readable_s64);
|
||||
|
||||
/**
|
||||
* prt_units_u64 - Print out a u64 according to printbuf unit options
|
||||
@ -344,14 +365,13 @@ EXPORT_SYMBOL(prt_human_readable_s64);
|
||||
* Units are either raw (default), or human reabable units (controlled via
|
||||
* @buf->human_readable_units)
|
||||
*/
|
||||
void prt_units_u64(struct printbuf *out, u64 v)
|
||||
void bch2_prt_units_u64(struct printbuf *out, u64 v)
|
||||
{
|
||||
if (out->human_readable_units)
|
||||
prt_human_readable_u64(out, v);
|
||||
bch2_prt_human_readable_u64(out, v);
|
||||
else
|
||||
prt_printf(out, "%llu", v);
|
||||
bch2_prt_printf(out, "%llu", v);
|
||||
}
|
||||
EXPORT_SYMBOL(prt_units_u64);
|
||||
|
||||
/**
|
||||
* prt_units_s64 - Print out a s64 according to printbuf unit options
|
||||
@ -359,10 +379,37 @@ EXPORT_SYMBOL(prt_units_u64);
|
||||
* Units are either raw (default), or human reabable units (controlled via
|
||||
* @buf->human_readable_units)
|
||||
*/
|
||||
void prt_units_s64(struct printbuf *out, s64 v)
|
||||
void bch2_prt_units_s64(struct printbuf *out, s64 v)
|
||||
{
|
||||
if (v < 0)
|
||||
prt_char(out, '-');
|
||||
prt_units_u64(out, abs(v));
|
||||
bch2_prt_units_u64(out, abs(v));
|
||||
}
|
||||
|
||||
void bch2_prt_string_option(struct printbuf *out,
|
||||
const char * const list[],
|
||||
size_t selected)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; list[i]; i++)
|
||||
bch2_prt_printf(out, i == selected ? "[%s] " : "%s ", list[i]);
|
||||
}
|
||||
|
||||
void bch2_prt_bitflags(struct printbuf *out,
|
||||
const char * const list[], u64 flags)
|
||||
{
|
||||
unsigned bit, nr = 0;
|
||||
bool first = true;
|
||||
|
||||
while (list[nr])
|
||||
nr++;
|
||||
|
||||
while (flags && (bit = __ffs(flags)) < nr) {
|
||||
if (!first)
|
||||
bch2_prt_printf(out, ",");
|
||||
first = false;
|
||||
bch2_prt_printf(out, "%s", list[bit]);
|
||||
flags ^= 1 << bit;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(prt_units_s64);
|
@ -1,8 +1,8 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1+ */
|
||||
/* Copyright (C) 2022 Kent Overstreet */
|
||||
|
||||
#ifndef _LINUX_PRINTBUF_H
|
||||
#define _LINUX_PRINTBUF_H
|
||||
#ifndef _BCACHEFS_PRINTBUF_H
|
||||
#define _BCACHEFS_PRINTBUF_H
|
||||
|
||||
/*
|
||||
* Printbufs: Simple strings for printing to, with optional heap allocation
|
||||
@ -100,26 +100,30 @@ struct printbuf {
|
||||
u8 _tabstops[PRINTBUF_INLINE_TABSTOPS];
|
||||
};
|
||||
|
||||
int printbuf_make_room(struct printbuf *, unsigned);
|
||||
const char *printbuf_str(const struct printbuf *);
|
||||
void printbuf_exit(struct printbuf *);
|
||||
int bch2_printbuf_make_room(struct printbuf *, unsigned);
|
||||
__printf(2, 3) void bch2_prt_printf(struct printbuf *out, const char *fmt, ...);
|
||||
__printf(2, 0) void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list);
|
||||
const char *bch2_printbuf_str(const struct printbuf *);
|
||||
void bch2_printbuf_exit(struct printbuf *);
|
||||
|
||||
void printbuf_tabstops_reset(struct printbuf *);
|
||||
void printbuf_tabstop_pop(struct printbuf *);
|
||||
int printbuf_tabstop_push(struct printbuf *, unsigned);
|
||||
void bch2_printbuf_tabstops_reset(struct printbuf *);
|
||||
void bch2_printbuf_tabstop_pop(struct printbuf *);
|
||||
int bch2_printbuf_tabstop_push(struct printbuf *, unsigned);
|
||||
|
||||
void printbuf_indent_add(struct printbuf *, unsigned);
|
||||
void printbuf_indent_sub(struct printbuf *, unsigned);
|
||||
void bch2_printbuf_indent_add(struct printbuf *, unsigned);
|
||||
void bch2_printbuf_indent_sub(struct printbuf *, unsigned);
|
||||
|
||||
void prt_newline(struct printbuf *);
|
||||
void prt_tab(struct printbuf *);
|
||||
void prt_tab_rjust(struct printbuf *);
|
||||
void bch2_prt_newline(struct printbuf *);
|
||||
void bch2_prt_tab(struct printbuf *);
|
||||
void bch2_prt_tab_rjust(struct printbuf *);
|
||||
|
||||
void prt_bytes_indented(struct printbuf *, const char *, unsigned);
|
||||
void prt_human_readable_u64(struct printbuf *, u64);
|
||||
void prt_human_readable_s64(struct printbuf *, s64);
|
||||
void prt_units_u64(struct printbuf *, u64);
|
||||
void prt_units_s64(struct printbuf *, s64);
|
||||
void bch2_prt_bytes_indented(struct printbuf *, const char *, unsigned);
|
||||
void bch2_prt_human_readable_u64(struct printbuf *, u64);
|
||||
void bch2_prt_human_readable_s64(struct printbuf *, s64);
|
||||
void bch2_prt_units_u64(struct printbuf *, u64);
|
||||
void bch2_prt_units_s64(struct printbuf *, s64);
|
||||
void bch2_prt_string_option(struct printbuf *, const char * const[], size_t);
|
||||
void bch2_prt_bitflags(struct printbuf *, const char * const[], u64);
|
||||
|
||||
/* Initializer for a heap allocated printbuf: */
|
||||
#define PRINTBUF ((struct printbuf) { .heap_allocated = true })
|
||||
@ -163,7 +167,7 @@ static inline bool printbuf_overflowed(struct printbuf *out)
|
||||
|
||||
static inline void printbuf_nul_terminate(struct printbuf *out)
|
||||
{
|
||||
printbuf_make_room(out, 1);
|
||||
bch2_printbuf_make_room(out, 1);
|
||||
|
||||
if (out->pos < out->size)
|
||||
out->buf[out->pos] = 0;
|
||||
@ -171,7 +175,7 @@ static inline void printbuf_nul_terminate(struct printbuf *out)
|
||||
out->buf[out->size - 1] = 0;
|
||||
}
|
||||
|
||||
/* Doesn't call printbuf_make_room(), doesn't nul terminate: */
|
||||
/* Doesn't call bch2_printbuf_make_room(), doesn't nul terminate: */
|
||||
static inline void __prt_char_reserved(struct printbuf *out, char c)
|
||||
{
|
||||
if (printbuf_remaining(out))
|
||||
@ -182,7 +186,7 @@ static inline void __prt_char_reserved(struct printbuf *out, char c)
|
||||
/* Doesn't nul terminate: */
|
||||
static inline void __prt_char(struct printbuf *out, char c)
|
||||
{
|
||||
printbuf_make_room(out, 1);
|
||||
bch2_printbuf_make_room(out, 1);
|
||||
__prt_char_reserved(out, c);
|
||||
}
|
||||
|
||||
@ -203,7 +207,7 @@ static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n
|
||||
|
||||
static inline void prt_chars(struct printbuf *out, char c, unsigned n)
|
||||
{
|
||||
printbuf_make_room(out, n);
|
||||
bch2_printbuf_make_room(out, n);
|
||||
__prt_chars_reserved(out, c, n);
|
||||
printbuf_nul_terminate(out);
|
||||
}
|
||||
@ -212,7 +216,7 @@ static inline void prt_bytes(struct printbuf *out, const void *b, unsigned n)
|
||||
{
|
||||
unsigned i, can_print;
|
||||
|
||||
printbuf_make_room(out, n);
|
||||
bch2_printbuf_make_room(out, n);
|
||||
|
||||
can_print = min(n, printbuf_remaining(out));
|
||||
|
||||
@ -230,12 +234,12 @@ static inline void prt_str(struct printbuf *out, const char *str)
|
||||
|
||||
static inline void prt_str_indented(struct printbuf *out, const char *str)
|
||||
{
|
||||
prt_bytes_indented(out, str, strlen(str));
|
||||
bch2_prt_bytes_indented(out, str, strlen(str));
|
||||
}
|
||||
|
||||
static inline void prt_hex_byte(struct printbuf *out, u8 byte)
|
||||
{
|
||||
printbuf_make_room(out, 2);
|
||||
bch2_printbuf_make_room(out, 2);
|
||||
__prt_char_reserved(out, hex_asc_hi(byte));
|
||||
__prt_char_reserved(out, hex_asc_lo(byte));
|
||||
printbuf_nul_terminate(out);
|
||||
@ -243,7 +247,7 @@ static inline void prt_hex_byte(struct printbuf *out, u8 byte)
|
||||
|
||||
static inline void prt_hex_byte_upper(struct printbuf *out, u8 byte)
|
||||
{
|
||||
printbuf_make_room(out, 2);
|
||||
bch2_printbuf_make_room(out, 2);
|
||||
__prt_char_reserved(out, hex_asc_upper_hi(byte));
|
||||
__prt_char_reserved(out, hex_asc_upper_lo(byte));
|
||||
printbuf_nul_terminate(out);
|
||||
@ -277,30 +281,4 @@ static inline void printbuf_atomic_dec(struct printbuf *buf)
|
||||
buf->atomic--;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is used for the %pf(%p) sprintf format extension, where we pass a pretty
|
||||
* printer and arguments to the pretty-printer to sprintf
|
||||
*
|
||||
* Instead of passing a pretty-printer function to sprintf directly, we pass it
|
||||
* a pointer to a struct call_pp, so that sprintf can check that the magic
|
||||
* number is present, which in turn ensures that the CALL_PP() macro has been
|
||||
* used in order to typecheck the arguments to the pretty printer function
|
||||
*
|
||||
* Example usage:
|
||||
* sprintf("%pf(%p)", CALL_PP(prt_bdev, bdev));
|
||||
*/
|
||||
struct call_pp {
|
||||
unsigned long magic;
|
||||
void *fn;
|
||||
};
|
||||
|
||||
#define PP_TYPECHECK(fn, ...) \
|
||||
({ while (0) fn((struct printbuf *) NULL, ##__VA_ARGS__); })
|
||||
|
||||
#define CALL_PP_MAGIC (unsigned long) 0xce0b92d22f6b6be4
|
||||
|
||||
#define CALL_PP(fn, ...) \
|
||||
(PP_TYPECHECK(fn, ##__VA_ARGS__), \
|
||||
&((struct call_pp) { CALL_PP_MAGIC, fn })), ##__VA_ARGS__
|
||||
|
||||
#endif /* _LINUX_PRINTBUF_H */
|
||||
#endif /* _BCACHEFS_PRINTBUF_H */
|
@ -59,7 +59,7 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = {
|
||||
};
|
||||
|
||||
int bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
if (k.k->p.inode >= QTYP_NR) {
|
||||
prt_printf(err, "invalid quota type (%llu >= %u)",
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
|
||||
|
||||
int bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_quota ((struct bkey_ops) { \
|
||||
|
@ -969,7 +969,7 @@ static int read_btree_roots(struct bch_fs *c)
|
||||
? FSCK_CAN_IGNORE : 0,
|
||||
"error reading btree root %s",
|
||||
bch2_btree_ids[i]);
|
||||
if (i == BTREE_ID_alloc)
|
||||
if (btree_id_is_alloc(i))
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
|
||||
}
|
||||
}
|
||||
@ -1217,6 +1217,9 @@ use_clean:
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (c->opts.reconstruct_alloc)
|
||||
bch2_fs_log_msg(c, "dropping alloc info");
|
||||
|
||||
/*
|
||||
* Skip past versions that might have possibly been used (as nonces),
|
||||
* but hadn't had their pointers written:
|
||||
@ -1250,6 +1253,20 @@ use_clean:
|
||||
|
||||
bch2_stripes_heap_start(c);
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
|
||||
err = "error creating root snapshot node";
|
||||
ret = bch2_fs_initialize_subvolumes(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch_verbose(c, "reading snapshots table");
|
||||
err = "error reading snapshots table";
|
||||
ret = bch2_fs_snapshots_start(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "reading snapshots done");
|
||||
|
||||
if (c->opts.fsck) {
|
||||
bool metadata_only = c->opts.norecovery;
|
||||
|
||||
@ -1262,20 +1279,6 @@ use_clean:
|
||||
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
|
||||
err = "error creating root snapshot node";
|
||||
ret = bch2_fs_initialize_subvolumes(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch_verbose(c, "reading snapshots table");
|
||||
err = "error reading snapshots table";
|
||||
ret = bch2_fs_snapshots_start(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "reading snapshots done");
|
||||
|
||||
set_bit(BCH_FS_MAY_GO_RW, &c->flags);
|
||||
|
||||
bch_info(c, "starting journal replay, %zu keys", c->journal_keys.nr);
|
||||
@ -1343,20 +1346,6 @@ use_clean:
|
||||
if (c->opts.norecovery)
|
||||
goto out;
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
|
||||
err = "error creating root snapshot node";
|
||||
ret = bch2_fs_initialize_subvolumes(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch_verbose(c, "reading snapshots table");
|
||||
err = "error reading snapshots table";
|
||||
ret = bch2_fs_snapshots_start(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "reading snapshots done");
|
||||
|
||||
set_bit(BCH_FS_MAY_GO_RW, &c->flags);
|
||||
|
||||
bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr);
|
||||
@ -1632,6 +1621,6 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
|
||||
return 0;
|
||||
err:
|
||||
pr_err("Error initializing new filesystem: %s (%i)", err, ret);
|
||||
pr_err("Error initializing new filesystem: %s (%s)", err, bch2_err_str(ret));
|
||||
return ret;
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k)
|
||||
/* reflink pointers */
|
||||
|
||||
int bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
|
||||
|
||||
@ -78,7 +78,7 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
|
||||
/* indirect extents */
|
||||
|
||||
int bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
|
||||
|
||||
@ -88,7 +88,7 @@ int bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
return bch2_bkey_ptrs_invalid(c, k, rw, err);
|
||||
return bch2_bkey_ptrs_invalid(c, k, flags, err);
|
||||
}
|
||||
|
||||
void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
@ -131,7 +131,7 @@ int bch2_trans_mark_reflink_v(struct btree_trans *trans,
|
||||
/* indirect inline data */
|
||||
|
||||
int bch2_indirect_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
if (bkey_val_bytes(k.k) < sizeof(struct bch_indirect_inline_data)) {
|
||||
prt_printf(err, "incorrect value size (%zu < %zu)",
|
||||
@ -282,7 +282,7 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
u32 dst_snapshot, src_snapshot;
|
||||
int ret = 0, ret2 = 0;
|
||||
|
||||
if (!percpu_ref_tryget_live(&c->writes))
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink))
|
||||
return -BCH_ERR_erofs_no_writes;
|
||||
|
||||
bch2_check_set_feature(c, BCH_FEATURE_reflink);
|
||||
@ -416,7 +416,7 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
bch2_bkey_buf_exit(&new_src, c);
|
||||
bch2_bkey_buf_exit(&new_dst, c);
|
||||
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_reflink);
|
||||
|
||||
return dst_done ?: ret ?: ret2;
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
#define _BCACHEFS_REFLINK_H
|
||||
|
||||
int bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c,
|
||||
int, struct printbuf *);
|
||||
unsigned, struct printbuf *);
|
||||
void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
|
||||
@ -17,7 +17,7 @@ bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
|
||||
})
|
||||
|
||||
int bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c,
|
||||
int, struct printbuf *);
|
||||
unsigned, struct printbuf *);
|
||||
void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned,
|
||||
@ -32,7 +32,7 @@ int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned,
|
||||
})
|
||||
|
||||
int bch2_indirect_inline_data_invalid(const struct bch_fs *, struct bkey_s_c,
|
||||
int, struct printbuf *);
|
||||
unsigned, struct printbuf *);
|
||||
void bch2_indirect_inline_data_to_text(struct printbuf *,
|
||||
struct bch_fs *, struct bkey_s_c);
|
||||
int bch2_trans_mark_indirect_inline_data(struct btree_trans *,
|
||||
|
@ -299,6 +299,13 @@ static int replicas_table_update(struct bch_fs *c,
|
||||
|
||||
memset(new_usage, 0, sizeof(new_usage));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(new_usage); i++)
|
||||
if (!(new_usage[i] = __alloc_percpu_gfp(bytes,
|
||||
sizeof(u64), GFP_KERNEL)))
|
||||
goto err;
|
||||
|
||||
memset(new_usage, 0, sizeof(new_usage));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(new_usage); i++)
|
||||
if (!(new_usage[i] = __alloc_percpu_gfp(bytes,
|
||||
sizeof(u64), GFP_KERNEL)))
|
||||
|
@ -27,22 +27,6 @@ bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry *);
|
||||
int bch2_mark_replicas(struct bch_fs *,
|
||||
struct bch_replicas_entry *);
|
||||
|
||||
struct replicas_delta {
|
||||
s64 delta;
|
||||
struct bch_replicas_entry r;
|
||||
} __packed;
|
||||
|
||||
struct replicas_delta_list {
|
||||
unsigned size;
|
||||
unsigned used;
|
||||
|
||||
struct {} memset_start;
|
||||
u64 nr_inodes;
|
||||
u64 persistent_reserved[BCH_REPLICAS_MAX];
|
||||
struct {} memset_end;
|
||||
struct replicas_delta d[0];
|
||||
};
|
||||
|
||||
static inline struct replicas_delta *
|
||||
replicas_delta_next(struct replicas_delta *d)
|
||||
{
|
||||
|
@ -8,4 +8,20 @@ struct bch_replicas_cpu {
|
||||
struct bch_replicas_entry *entries;
|
||||
};
|
||||
|
||||
struct replicas_delta {
|
||||
s64 delta;
|
||||
struct bch_replicas_entry r;
|
||||
} __packed;
|
||||
|
||||
struct replicas_delta_list {
|
||||
unsigned size;
|
||||
unsigned used;
|
||||
|
||||
struct {} memset_start;
|
||||
u64 nr_inodes;
|
||||
u64 persistent_reserved[BCH_REPLICAS_MAX];
|
||||
struct {} memset_end;
|
||||
struct replicas_delta d[0];
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_REPLICAS_TYPES_H */
|
||||
|
@ -25,7 +25,7 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
}
|
||||
|
||||
int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_snapshot s;
|
||||
u32 i, id;
|
||||
@ -706,16 +706,14 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work)
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
|
||||
|
||||
bch2_delete_dead_snapshots(c);
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
|
||||
}
|
||||
|
||||
void bch2_delete_dead_snapshots_async(struct bch_fs *c)
|
||||
{
|
||||
if (!percpu_ref_tryget_live(&c->writes))
|
||||
return;
|
||||
|
||||
if (!queue_work(system_long_wq, &c->snapshot_delete_work))
|
||||
percpu_ref_put(&c->writes);
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
|
||||
!queue_work(system_long_wq, &c->snapshot_delete_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
|
||||
}
|
||||
|
||||
static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
|
||||
@ -735,7 +733,7 @@ static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
|
||||
/* Subvolumes: */
|
||||
|
||||
int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
if (bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
|
||||
bkey_gt(k.k->p, SUBVOL_POS_MAX)) {
|
||||
@ -900,7 +898,7 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
|
||||
darray_exit(&s);
|
||||
}
|
||||
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
|
||||
}
|
||||
|
||||
struct subvolume_unlink_hook {
|
||||
@ -923,11 +921,11 @@ int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (unlikely(!percpu_ref_tryget_live(&c->writes)))
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
|
||||
return -EROFS;
|
||||
|
||||
if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
int bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c,
|
||||
int rw, struct printbuf *);
|
||||
unsigned, struct printbuf *);
|
||||
|
||||
#define bch2_bkey_ops_snapshot ((struct bkey_ops) { \
|
||||
.key_invalid = bch2_snapshot_invalid, \
|
||||
@ -106,7 +106,7 @@ void bch2_fs_snapshots_exit(struct bch_fs *);
|
||||
int bch2_fs_snapshots_start(struct bch_fs *);
|
||||
|
||||
int bch2_subvolume_invalid(const struct bch_fs *, struct bkey_s_c,
|
||||
int rw, struct printbuf *);
|
||||
unsigned, struct printbuf *);
|
||||
void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_subvolume ((struct bkey_ops) { \
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "counters.h"
|
||||
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/pretty-printers.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
#include <trace/events/bcachefs.h>
|
||||
@ -1261,7 +1260,8 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
|
||||
u->entry.type = BCH_JSET_ENTRY_data_usage;
|
||||
u->v = cpu_to_le64(c->usage_base->replicas[i]);
|
||||
memcpy(&u->r, e, replicas_entry_bytes(e));
|
||||
unsafe_memcpy(&u->r, e, replicas_entry_bytes(e),
|
||||
"embedded variable length struct");
|
||||
}
|
||||
|
||||
for_each_member_device(ca, c, dev) {
|
||||
|
@ -55,7 +55,6 @@
|
||||
#include <linux/idr.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/pretty-printers.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <crypto/hash.h>
|
||||
@ -110,7 +109,7 @@ static struct kset *bcachefs_kset;
|
||||
static LIST_HEAD(bch_fs_list);
|
||||
static DEFINE_MUTEX(bch_fs_list_lock);
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait);
|
||||
DECLARE_WAIT_QUEUE_HEAD(bch2_read_only_wait);
|
||||
|
||||
static void bch2_dev_free(struct bch_dev *);
|
||||
static int bch2_dev_alloc(struct bch_fs *, unsigned);
|
||||
@ -238,13 +237,15 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
bch2_dev_allocator_remove(c, ca);
|
||||
}
|
||||
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
static void bch2_writes_disabled(struct percpu_ref *writes)
|
||||
{
|
||||
struct bch_fs *c = container_of(writes, struct bch_fs, writes);
|
||||
|
||||
set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
|
||||
wake_up(&bch_read_only_wait);
|
||||
wake_up(&bch2_read_only_wait);
|
||||
}
|
||||
#endif
|
||||
|
||||
void bch2_fs_read_only(struct bch_fs *c)
|
||||
{
|
||||
@ -259,9 +260,13 @@ void bch2_fs_read_only(struct bch_fs *c)
|
||||
* Block new foreground-end write operations from starting - any new
|
||||
* writes will return -EROFS:
|
||||
*/
|
||||
set_bit(BCH_FS_GOING_RO, &c->flags);
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
percpu_ref_kill(&c->writes);
|
||||
|
||||
cancel_work_sync(&c->ec_stripe_delete_work);
|
||||
#else
|
||||
for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
|
||||
bch2_write_ref_put(c, i);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If we're not doing an emergency shutdown, we want to wait on
|
||||
@ -274,16 +279,17 @@ void bch2_fs_read_only(struct bch_fs *c)
|
||||
* we do need to wait on them before returning and signalling
|
||||
* that going RO is complete:
|
||||
*/
|
||||
wait_event(bch_read_only_wait,
|
||||
wait_event(bch2_read_only_wait,
|
||||
test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) ||
|
||||
test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
|
||||
|
||||
__bch2_fs_read_only(c);
|
||||
|
||||
wait_event(bch_read_only_wait,
|
||||
wait_event(bch2_read_only_wait,
|
||||
test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
|
||||
|
||||
clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
|
||||
clear_bit(BCH_FS_GOING_RO, &c->flags);
|
||||
|
||||
if (!bch2_journal_error(&c->journal) &&
|
||||
!test_bit(BCH_FS_ERROR, &c->flags) &&
|
||||
@ -320,7 +326,7 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c)
|
||||
bch2_journal_halt(&c->journal);
|
||||
bch2_fs_read_only_async(c);
|
||||
|
||||
wake_up(&bch_read_only_wait);
|
||||
wake_up(&bch2_read_only_wait);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -392,20 +398,26 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
return ret;
|
||||
}
|
||||
|
||||
schedule_work(&c->ec_stripe_delete_work);
|
||||
|
||||
bch2_do_discards(c);
|
||||
bch2_do_invalidates(c);
|
||||
|
||||
if (!early) {
|
||||
ret = bch2_fs_read_write_late(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
percpu_ref_reinit(&c->writes);
|
||||
#else
|
||||
for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++) {
|
||||
BUG_ON(atomic_long_read(&c->writes[i]));
|
||||
atomic_long_inc(&c->writes[i]);
|
||||
}
|
||||
#endif
|
||||
set_bit(BCH_FS_RW, &c->flags);
|
||||
set_bit(BCH_FS_WAS_RW, &c->flags);
|
||||
|
||||
bch2_do_discards(c);
|
||||
bch2_do_invalidates(c);
|
||||
bch2_do_stripe_deletes(c);
|
||||
return 0;
|
||||
err:
|
||||
__bch2_fs_read_only(c);
|
||||
@ -454,19 +466,21 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
bch2_journal_keys_free(&c->journal_keys);
|
||||
bch2_journal_entries_free(c);
|
||||
percpu_free_rwsem(&c->mark_lock);
|
||||
free_percpu(c->online_reserved);
|
||||
|
||||
if (c->btree_paths_bufs)
|
||||
for_each_possible_cpu(cpu)
|
||||
kfree(per_cpu_ptr(c->btree_paths_bufs, cpu)->path);
|
||||
|
||||
free_percpu(c->online_reserved);
|
||||
free_percpu(c->btree_paths_bufs);
|
||||
free_percpu(c->pcpu);
|
||||
mempool_exit(&c->large_bkey_pool);
|
||||
mempool_exit(&c->btree_bounce_pool);
|
||||
bioset_exit(&c->btree_bio);
|
||||
mempool_exit(&c->fill_iter);
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
percpu_ref_exit(&c->writes);
|
||||
#endif
|
||||
kfree(rcu_dereference_protected(c->disk_groups, 1));
|
||||
kfree(c->journal_seq_blacklist_table);
|
||||
kfree(c->unused_inode_hints);
|
||||
@ -695,6 +709,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
|
||||
seqcount_init(&c->usage_lock);
|
||||
|
||||
sema_init(&c->io_in_flight, 128);
|
||||
|
||||
c->copy_gc_enabled = 1;
|
||||
c->rebalance.enabled = 1;
|
||||
c->promote_whole_extents = true;
|
||||
@ -743,9 +759,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
|
||||
bch2_opts_apply(&c->opts, opts);
|
||||
|
||||
/* key cache currently disabled for inodes, because of snapshots: */
|
||||
c->opts.inodes_use_key_cache = 0;
|
||||
|
||||
c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc;
|
||||
if (c->opts.inodes_use_key_cache)
|
||||
c->btree_key_cache_btrees |= 1U << BTREE_ID_inodes;
|
||||
@ -766,23 +779,25 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus()));
|
||||
|
||||
if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
|
||||
WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512)) ||
|
||||
!(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io",
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
|
||||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
|
||||
!(c->io_complete_wq = alloc_workqueue("bcachefs_io",
|
||||
WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) ||
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
percpu_ref_init(&c->writes, bch2_writes_disabled,
|
||||
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
|
||||
#endif
|
||||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
|
||||
bioset_init(&c->btree_bio, 1,
|
||||
max(offsetof(struct btree_read_bio, bio),
|
||||
offsetof(struct btree_write_bio, wbio.bio)),
|
||||
BIOSET_NEED_BVECS) ||
|
||||
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
|
||||
!(c->btree_paths_bufs = alloc_percpu(struct btree_path_buf)) ||
|
||||
!(c->online_reserved = alloc_percpu(u64)) ||
|
||||
!(c->btree_paths_bufs = alloc_percpu(struct btree_path_buf)) ||
|
||||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
|
||||
btree_bytes(c)) ||
|
||||
mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
|
||||
@ -850,9 +865,12 @@ static void print_mount_opts(struct bch_fs *c)
|
||||
struct printbuf p = PRINTBUF;
|
||||
bool first = true;
|
||||
|
||||
prt_printf(&p, "mounted version=%s", bch2_metadata_versions[c->sb.version]);
|
||||
|
||||
if (c->opts.read_only) {
|
||||
prt_printf(&p, "ro");
|
||||
prt_str(&p, " opts=");
|
||||
first = false;
|
||||
prt_printf(&p, "ro");
|
||||
}
|
||||
|
||||
for (i = 0; i < bch2_opts_nr; i++) {
|
||||
@ -865,16 +883,12 @@ static void print_mount_opts(struct bch_fs *c)
|
||||
if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
|
||||
continue;
|
||||
|
||||
if (!first)
|
||||
prt_printf(&p, ",");
|
||||
prt_str(&p, first ? " opts=" : ",");
|
||||
first = false;
|
||||
bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE);
|
||||
}
|
||||
|
||||
if (!p.pos)
|
||||
prt_printf(&p, "(null)");
|
||||
|
||||
bch_info(c, "mounted version=%s opts=%s", bch2_metadata_versions[c->sb.version], p.buf);
|
||||
bch_info(c, "%s", p.buf);
|
||||
printbuf_exit(&p);
|
||||
}
|
||||
|
||||
@ -1955,5 +1969,8 @@ err:
|
||||
BCH_DEBUG_PARAMS()
|
||||
#undef BCH_DEBUG_PARAM
|
||||
|
||||
unsigned bch2_metadata_version = bcachefs_metadata_version_current;
|
||||
module_param_named(version, bch2_metadata_version, uint, 0400);
|
||||
|
||||
module_exit(bcachefs_exit);
|
||||
module_init(bcachefs_init);
|
||||
|
@ -251,7 +251,8 @@ int bch2_fs_read_write_early(struct bch_fs *);
|
||||
*/
|
||||
static inline void bch2_fs_lazy_rw(struct bch_fs *c)
|
||||
{
|
||||
if (percpu_ref_is_zero(&c->writes))
|
||||
if (!test_bit(BCH_FS_RW, &c->flags) &&
|
||||
!test_bit(BCH_FS_WAS_RW, &c->flags))
|
||||
bch2_fs_read_write_early(c);
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,6 @@
|
||||
#include "tests.h"
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/pretty-printers.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/sched/clock.h>
|
||||
|
||||
@ -195,8 +194,32 @@ read_attribute(btree_cache);
|
||||
read_attribute(btree_key_cache);
|
||||
read_attribute(stripes_heap);
|
||||
read_attribute(open_buckets);
|
||||
read_attribute(write_points);
|
||||
read_attribute(nocow_lock_table);
|
||||
|
||||
#ifdef BCH_WRITE_REF_DEBUG
|
||||
read_attribute(write_refs);
|
||||
|
||||
const char * const bch2_write_refs[] = {
|
||||
#define x(n) #n,
|
||||
BCH_WRITE_REFS()
|
||||
#undef x
|
||||
NULL
|
||||
};
|
||||
|
||||
static void bch2_write_refs_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
bch2_printbuf_tabstop_push(out, 24);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(c->writes); i++) {
|
||||
prt_str(out, bch2_write_refs[i]);
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%li", atomic_long_read(&c->writes[i]));
|
||||
prt_newline(out);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
read_attribute(internal_uuid);
|
||||
|
||||
read_attribute(has_data);
|
||||
@ -432,6 +455,9 @@ SHOW(bch2_fs)
|
||||
if (attr == &sysfs_open_buckets)
|
||||
bch2_open_buckets_to_text(out, c);
|
||||
|
||||
if (attr == &sysfs_write_points)
|
||||
bch2_write_points_to_text(out, c);
|
||||
|
||||
if (attr == &sysfs_compression_stats)
|
||||
bch2_compression_stats_to_text(out, c);
|
||||
|
||||
@ -450,6 +476,11 @@ SHOW(bch2_fs)
|
||||
if (attr == &sysfs_nocow_lock_table)
|
||||
bch2_nocow_locks_to_text(out, &c->nocow_locks);
|
||||
|
||||
#ifdef BCH_WRITE_REF_DEBUG
|
||||
if (attr == &sysfs_write_refs)
|
||||
bch2_write_refs_to_text(out, c);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -632,7 +663,11 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_new_stripes,
|
||||
&sysfs_stripes_heap,
|
||||
&sysfs_open_buckets,
|
||||
&sysfs_write_points,
|
||||
&sysfs_nocow_lock_table,
|
||||
#ifdef BCH_WRITE_REF_DEBUG
|
||||
&sysfs_write_refs,
|
||||
#endif
|
||||
&sysfs_io_timers_read,
|
||||
&sysfs_io_timers_write,
|
||||
|
||||
@ -684,7 +719,7 @@ STORE(bch2_fs_opts_dir)
|
||||
* We don't need to take c->writes for correctness, but it eliminates an
|
||||
* unsightly error message in the dmesg log when we're RO:
|
||||
*/
|
||||
if (unlikely(!percpu_ref_tryget_live(&c->writes)))
|
||||
if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
|
||||
return -EROFS;
|
||||
|
||||
tmp = kstrdup(buf, GFP_KERNEL);
|
||||
@ -714,7 +749,7 @@ STORE(bch2_fs_opts_dir)
|
||||
|
||||
ret = size;
|
||||
err:
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
|
||||
return ret;
|
||||
}
|
||||
SYSFS_OPS(bch2_fs_opts_dir);
|
||||
|
@ -573,7 +573,7 @@ static u64 test_rand(void)
|
||||
{
|
||||
u64 v;
|
||||
|
||||
prandom_bytes(&v, sizeof(v));
|
||||
get_random_bytes(&v, sizeof(v));
|
||||
return v;
|
||||
}
|
||||
|
||||
|
@ -240,12 +240,12 @@ bool bch2_is_zero(const void *_p, size_t n)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void bch2_quantiles_update(struct quantiles *q, u64 v)
|
||||
static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v)
|
||||
{
|
||||
unsigned i = 0;
|
||||
|
||||
while (i < ARRAY_SIZE(q->entries)) {
|
||||
struct quantile_entry *e = q->entries + i;
|
||||
struct bch2_quantile_entry *e = q->entries + i;
|
||||
|
||||
if (unlikely(!e->step)) {
|
||||
e->m = v;
|
||||
@ -292,7 +292,6 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines)
|
||||
if (!*p)
|
||||
break;
|
||||
lines = p + 1;
|
||||
prefix = KERN_CONT;
|
||||
}
|
||||
console_unlock();
|
||||
}
|
||||
@ -301,11 +300,9 @@ int bch2_prt_backtrace(struct printbuf *out, struct task_struct *task)
|
||||
{
|
||||
unsigned long entries[32];
|
||||
unsigned i, nr_entries;
|
||||
int ret;
|
||||
|
||||
ret = down_read_killable(&task->signal->exec_update_lock);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!down_read_trylock(&task->signal->exec_update_lock))
|
||||
return 0;
|
||||
|
||||
nr_entries = stack_trace_save_tsk(task, entries, ARRAY_SIZE(entries), 0);
|
||||
for (i = 0; i < nr_entries; i++) {
|
||||
@ -319,7 +316,8 @@ int bch2_prt_backtrace(struct printbuf *out, struct task_struct *task)
|
||||
|
||||
/* time stats: */
|
||||
|
||||
static inline void bch2_time_stats_update_one(struct time_stats *stats,
|
||||
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
|
||||
static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
u64 duration, freq;
|
||||
@ -348,10 +346,10 @@ static inline void bch2_time_stats_update_one(struct time_stats *stats,
|
||||
}
|
||||
}
|
||||
|
||||
static noinline void bch2_time_stats_clear_buffer(struct time_stats *stats,
|
||||
struct time_stat_buffer *b)
|
||||
static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
|
||||
struct bch2_time_stat_buffer *b)
|
||||
{
|
||||
struct time_stat_buffer_entry *i;
|
||||
struct bch2_time_stat_buffer_entry *i;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&stats->lock, flags);
|
||||
@ -364,7 +362,7 @@ static noinline void bch2_time_stats_clear_buffer(struct time_stats *stats,
|
||||
b->nr = 0;
|
||||
}
|
||||
|
||||
void __bch2_time_stats_update(struct time_stats *stats, u64 start, u64 end)
|
||||
void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@ -379,17 +377,17 @@ void __bch2_time_stats_update(struct time_stats *stats, u64 start, u64 end)
|
||||
if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 &&
|
||||
stats->duration_stats.n > 1024)
|
||||
stats->buffer =
|
||||
alloc_percpu_gfp(struct time_stat_buffer,
|
||||
alloc_percpu_gfp(struct bch2_time_stat_buffer,
|
||||
GFP_ATOMIC);
|
||||
spin_unlock_irqrestore(&stats->lock, flags);
|
||||
} else {
|
||||
struct time_stat_buffer *b;
|
||||
struct bch2_time_stat_buffer *b;
|
||||
|
||||
preempt_disable();
|
||||
b = this_cpu_ptr(stats->buffer);
|
||||
|
||||
BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
|
||||
b->entries[b->nr++] = (struct time_stat_buffer_entry) {
|
||||
b->entries[b->nr++] = (struct bch2_time_stat_buffer_entry) {
|
||||
.start = start,
|
||||
.end = end
|
||||
};
|
||||
@ -399,6 +397,7 @@ void __bch2_time_stats_update(struct time_stats *stats, u64 start, u64 end)
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static const struct time_unit {
|
||||
const char *name;
|
||||
@ -426,7 +425,14 @@ static const struct time_unit *pick_time_units(u64 ns)
|
||||
return u;
|
||||
}
|
||||
|
||||
static void pr_time_units(struct printbuf *out, u64 ns)
|
||||
void bch2_pr_time_units(struct printbuf *out, u64 ns)
|
||||
{
|
||||
const struct time_unit *u = pick_time_units(ns);
|
||||
|
||||
prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
|
||||
}
|
||||
|
||||
static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
|
||||
{
|
||||
const struct time_unit *u = pick_time_units(ns);
|
||||
|
||||
@ -441,11 +447,11 @@ static inline void pr_name_and_units(struct printbuf *out, const char *name, u64
|
||||
{
|
||||
prt_str(out, name);
|
||||
prt_tab(out);
|
||||
pr_time_units(out, ns);
|
||||
bch2_pr_time_units_aligned(out, ns);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats)
|
||||
void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats)
|
||||
{
|
||||
const struct time_unit *u;
|
||||
s64 f_mean = 0, d_mean = 0;
|
||||
@ -499,16 +505,16 @@ void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats)
|
||||
|
||||
prt_printf(out, "mean:");
|
||||
prt_tab(out);
|
||||
pr_time_units(out, d_mean);
|
||||
bch2_pr_time_units_aligned(out, d_mean);
|
||||
prt_tab(out);
|
||||
pr_time_units(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted));
|
||||
bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted));
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "stddev:");
|
||||
prt_tab(out);
|
||||
pr_time_units(out, d_stddev);
|
||||
bch2_pr_time_units_aligned(out, d_stddev);
|
||||
prt_tab(out);
|
||||
pr_time_units(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted));
|
||||
bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted));
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
prt_newline(out);
|
||||
@ -522,16 +528,16 @@ void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats)
|
||||
|
||||
prt_printf(out, "mean:");
|
||||
prt_tab(out);
|
||||
pr_time_units(out, f_mean);
|
||||
bch2_pr_time_units_aligned(out, f_mean);
|
||||
prt_tab(out);
|
||||
pr_time_units(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted));
|
||||
bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted));
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "stddev:");
|
||||
prt_tab(out);
|
||||
pr_time_units(out, f_stddev);
|
||||
bch2_pr_time_units_aligned(out, f_stddev);
|
||||
prt_tab(out);
|
||||
pr_time_units(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted));
|
||||
bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted));
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
prt_newline(out);
|
||||
@ -554,12 +560,12 @@ void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats)
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_time_stats_exit(struct time_stats *stats)
|
||||
void bch2_time_stats_exit(struct bch2_time_stats *stats)
|
||||
{
|
||||
free_percpu(stats->buffer);
|
||||
}
|
||||
|
||||
void bch2_time_stats_init(struct time_stats *stats)
|
||||
void bch2_time_stats_init(struct bch2_time_stats *stats)
|
||||
{
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
stats->duration_stats_weighted.w = 8;
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/llist.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/printbuf.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/ratelimit.h>
|
||||
@ -215,6 +214,34 @@ do { \
|
||||
#define ANYSINT_MAX(t) \
|
||||
((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)
|
||||
|
||||
#include "printbuf.h"
|
||||
|
||||
#define prt_vprintf(_out, ...) bch2_prt_vprintf(_out, __VA_ARGS__)
|
||||
#define prt_printf(_out, ...) bch2_prt_printf(_out, __VA_ARGS__)
|
||||
#define printbuf_str(_buf) bch2_printbuf_str(_buf)
|
||||
#define printbuf_exit(_buf) bch2_printbuf_exit(_buf)
|
||||
|
||||
#define printbuf_tabstops_reset(_buf) bch2_printbuf_tabstops_reset(_buf)
|
||||
#define printbuf_tabstop_pop(_buf) bch2_printbuf_tabstop_pop(_buf)
|
||||
#define printbuf_tabstop_push(_buf, _n) bch2_printbuf_tabstop_push(_buf, _n)
|
||||
|
||||
#define printbuf_indent_add(_out, _n) bch2_printbuf_indent_add(_out, _n)
|
||||
#define printbuf_indent_sub(_out, _n) bch2_printbuf_indent_sub(_out, _n)
|
||||
|
||||
#define prt_newline(_out) bch2_prt_newline(_out)
|
||||
#define prt_tab(_out) bch2_prt_tab(_out)
|
||||
#define prt_tab_rjust(_out) bch2_prt_tab_rjust(_out)
|
||||
|
||||
#define prt_bytes_indented(...) bch2_prt_bytes_indented(__VA_ARGS__)
|
||||
#define prt_u64(_out, _v) prt_printf(_out, "%llu", _v)
|
||||
#define prt_human_readable_u64(...) bch2_prt_human_readable_u64(__VA_ARGS__)
|
||||
#define prt_human_readable_s64(...) bch2_prt_human_readable_s64(__VA_ARGS__)
|
||||
#define prt_units_u64(...) bch2_prt_units_u64(__VA_ARGS__)
|
||||
#define prt_units_s64(...) bch2_prt_units_s64(__VA_ARGS__)
|
||||
#define prt_string_option(...) bch2_prt_string_option(__VA_ARGS__)
|
||||
#define prt_bitflags(...) bch2_prt_bitflags(__VA_ARGS__)
|
||||
|
||||
void bch2_pr_time_units(struct printbuf *, u64);
|
||||
|
||||
#ifdef __KERNEL__
|
||||
static inline void pr_time(struct printbuf *out, u64 time)
|
||||
@ -340,22 +367,22 @@ int bch2_prt_backtrace(struct printbuf *, struct task_struct *);
|
||||
#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
|
||||
#define QUANTILE_LAST eytzinger0_last(NR_QUANTILES)
|
||||
|
||||
struct quantiles {
|
||||
struct quantile_entry {
|
||||
struct bch2_quantiles {
|
||||
struct bch2_quantile_entry {
|
||||
u64 m;
|
||||
u64 step;
|
||||
} entries[NR_QUANTILES];
|
||||
};
|
||||
|
||||
struct time_stat_buffer {
|
||||
struct bch2_time_stat_buffer {
|
||||
unsigned nr;
|
||||
struct time_stat_buffer_entry {
|
||||
struct bch2_time_stat_buffer_entry {
|
||||
u64 start;
|
||||
u64 end;
|
||||
} entries[32];
|
||||
};
|
||||
|
||||
struct time_stats {
|
||||
struct bch2_time_stats {
|
||||
spinlock_t lock;
|
||||
/* all fields are in nanoseconds */
|
||||
u64 max_duration;
|
||||
@ -363,26 +390,30 @@ struct time_stats {
|
||||
u64 max_freq;
|
||||
u64 min_freq;
|
||||
u64 last_event;
|
||||
struct quantiles quantiles;
|
||||
struct bch2_quantiles quantiles;
|
||||
|
||||
struct mean_and_variance duration_stats;
|
||||
struct mean_and_variance_weighted duration_stats_weighted;
|
||||
struct mean_and_variance freq_stats;
|
||||
struct mean_and_variance_weighted freq_stats_weighted;
|
||||
struct time_stat_buffer __percpu *buffer;
|
||||
struct bch2_time_stat_buffer __percpu *buffer;
|
||||
};
|
||||
|
||||
void __bch2_time_stats_update(struct time_stats *stats, u64, u64);
|
||||
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
|
||||
void __bch2_time_stats_update(struct bch2_time_stats *stats, u64, u64);
|
||||
#else
|
||||
static inline void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) {}
|
||||
#endif
|
||||
|
||||
static inline void bch2_time_stats_update(struct time_stats *stats, u64 start)
|
||||
static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start)
|
||||
{
|
||||
__bch2_time_stats_update(stats, start, local_clock());
|
||||
}
|
||||
|
||||
void bch2_time_stats_to_text(struct printbuf *, struct time_stats *);
|
||||
void bch2_time_stats_to_text(struct printbuf *, struct bch2_time_stats *);
|
||||
|
||||
void bch2_time_stats_exit(struct time_stats *);
|
||||
void bch2_time_stats_init(struct time_stats *);
|
||||
void bch2_time_stats_exit(struct bch2_time_stats *);
|
||||
void bch2_time_stats_init(struct bch2_time_stats *);
|
||||
|
||||
#define ewma_add(ewma, val, weight) \
|
||||
({ \
|
||||
@ -582,6 +613,20 @@ static inline void memmove_u64s_down(void *dst, const void *src,
|
||||
__memmove_u64s_down(dst, src, u64s);
|
||||
}
|
||||
|
||||
static inline void __memmove_u64s_down_small(void *dst, const void *src,
|
||||
unsigned u64s)
|
||||
{
|
||||
memcpy_u64s_small(dst, src, u64s);
|
||||
}
|
||||
|
||||
static inline void memmove_u64s_down_small(void *dst, const void *src,
|
||||
unsigned u64s)
|
||||
{
|
||||
EBUG_ON(dst > src);
|
||||
|
||||
__memmove_u64s_down_small(dst, src, u64s);
|
||||
}
|
||||
|
||||
static inline void __memmove_u64s_up_small(void *_dst, const void *_src,
|
||||
unsigned u64s)
|
||||
{
|
||||
|
@ -70,7 +70,7 @@ const struct bch_hash_desc bch2_xattr_hash_desc = {
|
||||
};
|
||||
|
||||
int bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
const struct xattr_handler *handler;
|
||||
struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
extern const struct bch_hash_desc bch2_xattr_hash_desc;
|
||||
|
||||
int bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
|
||||
int bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||
void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_xattr ((struct bkey_ops) { \
|
||||
|
@ -184,7 +184,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
|
||||
if (buffered_fd < 0)
|
||||
return ERR_PTR(-errno);
|
||||
|
||||
fd = open(path, flags|O_DIRECT);
|
||||
fd = open(path, flags);
|
||||
if (fd < 0)
|
||||
fd = dup(buffered_fd);
|
||||
if (fd < 0) {
|
||||
@ -192,7 +192,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
|
||||
return ERR_PTR(-errno);
|
||||
}
|
||||
|
||||
sync_fd = open(path, flags|O_DIRECT|O_SYNC);
|
||||
sync_fd = open(path, flags|O_SYNC);
|
||||
if (sync_fd < 0)
|
||||
sync_fd = open(path, flags|O_SYNC);
|
||||
if (sync_fd < 0) {
|
||||
|
@ -42,8 +42,6 @@
|
||||
#include <linux/math64.h>
|
||||
#include <linux/mean_and_variance.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/printbuf.h>
|
||||
|
||||
|
||||
/**
|
||||
* fast_divpow2() - fast approximation for n / (1 << d)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user