mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-03-28 00:00:03 +03:00
Update bcachefs sources to 50847e296b34 bcachefs: Check subvol <-> inode pointers in check_inode()
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
1ef396b684
commit
f3f005c76e
.bcachefs_revisionMakefile
c_src
include/linux
darray.hdarray_types.heytzinger.hmean_and_variance.hmempool.hspinlock.hspinlock_types.hthread_with_file.hthread_with_file_types.htime.htime_stats.htypes.h
libbcachefs
alloc_foreground.cbcachefs.hbcachefs_format.hbcachefs_ioctl.hbset.cbtree_cache.cbtree_gc.cbtree_io.cbtree_iter.cbtree_journal_iter.cbtree_journal_iter.hbtree_locking.hbtree_types.hbtree_update.cbtree_update_interior.cbtree_write_buffer_types.hbuckets.cchardev.ccompress.cdebug.cdirent.cdirent.hec.cerrcode.herror.cfifo.hfs-common.cfs-ioctl.cfs.cfsck.cinode.cinode.hio_read.cio_write.cjournal.cjournal.hjournal_io.cjournal_io.hjournal_reclaim.cjournal_sb.cjournal_seq_blacklist.cjournal_types.hmigrate.cnocow_locking.creplicas.creplicas.hsb-clean.csb-downgrade.csb-errors_types.hsb-members.hstr_hash.hsubvolume.csubvolume.hsubvolume_types.hsuper-io.hsuper.csysfs.cthread_with_file.cthread_with_file.hthread_with_file_types.hutil.cutil.h
linux
src/wrappers
@ -1 +1 @@
|
||||
481b5f34324809f47a58ed798d038fb17e5b7b0a
|
||||
50847e296b34efabe199e408ec4d72f10a866c39
|
||||
|
19
Makefile
19
Makefile
@ -273,11 +273,20 @@ update-bcachefs-sources:
|
||||
git add include/linux/kmemleak.h
|
||||
cp $(LINUX_DIR)/lib/math/int_sqrt.c linux/
|
||||
git add linux/int_sqrt.c
|
||||
git rm -f libbcachefs/mean_and_variance_test.c
|
||||
# cp $(LINUX_DIR)/lib/math/mean_and_variance.c linux/
|
||||
# git add linux/mean_and_variance.c
|
||||
# cp $(LINUX_DIR)/include/linux/mean_and_variance.h include/linux/
|
||||
# git add include/linux/mean_and_variance.h
|
||||
cp $(LINUX_DIR)/lib/math/mean_and_variance.c linux/
|
||||
git add linux/mean_and_variance.c
|
||||
cp $(LINUX_DIR)/include/linux/mean_and_variance.h include/linux/
|
||||
git add include/linux/mean_and_variance.h
|
||||
cp $(LINUX_DIR)/lib/time_stats.c linux/
|
||||
git add linux/time_stats.c
|
||||
cp $(LINUX_DIR)/include/linux/time_stats.h include/linux/
|
||||
git add include/linux/time_stats.h
|
||||
cp $(LINUX_DIR)/include/linux/darray.h include/linux/
|
||||
git add include/linux/darray.h
|
||||
cp $(LINUX_DIR)/include/linux/darray_types.h include/linux/
|
||||
git add include/linux/darray_types.h
|
||||
cp $(LINUX_DIR)/include/linux/eytzinger.h include/linux/
|
||||
git add include/linux/eytzinger.h
|
||||
cp $(LINUX_DIR)/scripts/Makefile.compiler ./
|
||||
git add Makefile.compiler
|
||||
$(RM) libbcachefs/*.mod.c
|
||||
|
@ -23,12 +23,13 @@
|
||||
#include "cmds.h"
|
||||
#include "libbcachefs.h"
|
||||
#include "crypto.h"
|
||||
#include "libbcachefs/darray.h"
|
||||
#include "libbcachefs/errcode.h"
|
||||
#include "libbcachefs/opts.h"
|
||||
#include "libbcachefs/super-io.h"
|
||||
#include "libbcachefs/util.h"
|
||||
|
||||
#include "linux/darray.h"
|
||||
|
||||
#define OPTS \
|
||||
x(0, replicas, required_argument) \
|
||||
x(0, encrypted, no_argument) \
|
||||
|
@ -9,13 +9,14 @@
|
||||
|
||||
#include "libbcachefs/bcachefs_ioctl.h"
|
||||
#include "libbcachefs/buckets.h"
|
||||
#include "libbcachefs/darray.h"
|
||||
#include "libbcachefs/opts.h"
|
||||
#include "libbcachefs/super-io.h"
|
||||
|
||||
#include "cmds.h"
|
||||
#include "libbcachefs.h"
|
||||
|
||||
#include "linux/darray.h"
|
||||
|
||||
static void __dev_usage_type_to_text(struct printbuf *out,
|
||||
enum bch_data_type type,
|
||||
unsigned bucket_size,
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include <linux/uuid.h>
|
||||
#include "libbcachefs/bcachefs.h"
|
||||
#include "libbcachefs/bbpos.h"
|
||||
#include "libbcachefs/darray.h"
|
||||
#include "linux/darray.h"
|
||||
|
||||
#define noreturn __attribute__((noreturn))
|
||||
|
||||
|
@ -1,34 +1,26 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_DARRAY_H
|
||||
#define _BCACHEFS_DARRAY_H
|
||||
/*
|
||||
* (C) 2022-2024 Kent Overstreet <kent.overstreet@linux.dev>
|
||||
*/
|
||||
#ifndef _LINUX_DARRAY_H
|
||||
#define _LINUX_DARRAY_H
|
||||
|
||||
/*
|
||||
* Dynamic arrays:
|
||||
* Dynamic arrays
|
||||
*
|
||||
* Inspired by CCAN's darray
|
||||
*/
|
||||
|
||||
#include <linux/darray_types.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#define DARRAY_PREALLOCATED(_type, _nr) \
|
||||
struct { \
|
||||
size_t nr, size; \
|
||||
_type *data; \
|
||||
_type preallocated[_nr]; \
|
||||
}
|
||||
|
||||
#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)
|
||||
|
||||
typedef DARRAY(char) darray_char;
|
||||
typedef DARRAY(char *) darray_str;
|
||||
|
||||
int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t);
|
||||
int __darray_resize_slowpath(darray_char *, size_t, size_t, gfp_t);
|
||||
|
||||
static inline int __darray_resize(darray_char *d, size_t element_size,
|
||||
size_t new_size, gfp_t gfp)
|
||||
{
|
||||
return unlikely(new_size > d->size)
|
||||
? __bch2_darray_resize(d, element_size, new_size, gfp)
|
||||
? __darray_resize_slowpath(d, element_size, new_size, gfp)
|
||||
: 0;
|
||||
}
|
||||
|
||||
@ -69,6 +61,28 @@ static inline int __darray_make_room(darray_char *d, size_t t_size, size_t more,
|
||||
#define darray_first(_d) ((_d).data[0])
|
||||
#define darray_last(_d) ((_d).data[(_d).nr - 1])
|
||||
|
||||
/* Insert/remove items into the middle of a darray: */
|
||||
|
||||
#define array_insert_item(_array, _nr, _pos, _new_item) \
|
||||
do { \
|
||||
memmove(&(_array)[(_pos) + 1], \
|
||||
&(_array)[(_pos)], \
|
||||
sizeof((_array)[0]) * ((_nr) - (_pos))); \
|
||||
(_nr)++; \
|
||||
(_array)[(_pos)] = (_new_item); \
|
||||
} while (0)
|
||||
|
||||
#define array_remove_items(_array, _nr, _pos, _nr_to_remove) \
|
||||
do { \
|
||||
(_nr) -= (_nr_to_remove); \
|
||||
memmove(&(_array)[(_pos)], \
|
||||
&(_array)[(_pos) + (_nr_to_remove)], \
|
||||
sizeof((_array)[0]) * ((_nr) - (_pos))); \
|
||||
} while (0)
|
||||
|
||||
#define array_remove_item(_array, _nr, _pos) \
|
||||
array_remove_items(_array, _nr, _pos, 1)
|
||||
|
||||
#define darray_insert_item(_d, pos, _item) \
|
||||
({ \
|
||||
size_t _pos = (pos); \
|
||||
@ -79,10 +93,15 @@ static inline int __darray_make_room(darray_char *d, size_t t_size, size_t more,
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define darray_remove_item(_d, _pos) \
|
||||
array_remove_item((_d)->data, (_d)->nr, (_pos) - (_d)->data)
|
||||
#define darray_remove_items(_d, _pos, _nr_to_remove) \
|
||||
array_remove_items((_d)->data, (_d)->nr, (_pos) - (_d)->data, _nr_to_remove)
|
||||
|
||||
#define __darray_for_each(_d, _i) \
|
||||
#define darray_remove_item(_d, _pos) \
|
||||
darray_remove_items(_d, _pos, 1)
|
||||
|
||||
/* Iteration: */
|
||||
|
||||
#define __darray_for_each(_d, _i) \
|
||||
for ((_i) = (_d).data; _i < (_d).data + (_d).nr; _i++)
|
||||
|
||||
#define darray_for_each(_d, _i) \
|
||||
@ -106,4 +125,4 @@ do { \
|
||||
darray_init(_d); \
|
||||
} while (0)
|
||||
|
||||
#endif /* _BCACHEFS_DARRAY_H */
|
||||
#endif /* _LINUX_DARRAY_H */
|
22
include/linux/darray_types.h
Normal file
22
include/linux/darray_types.h
Normal file
@ -0,0 +1,22 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* (C) 2022-2024 Kent Overstreet <kent.overstreet@linux.dev>
|
||||
*/
|
||||
#ifndef _LINUX_DARRAY_TYpES_H
|
||||
#define _LINUX_DARRAY_TYpES_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define DARRAY_PREALLOCATED(_type, _nr) \
|
||||
struct { \
|
||||
size_t nr, size; \
|
||||
_type *data; \
|
||||
_type preallocated[_nr]; \
|
||||
}
|
||||
|
||||
#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)
|
||||
|
||||
typedef DARRAY(char) darray_char;
|
||||
typedef DARRAY(char *) darray_str;
|
||||
|
||||
#endif /* _LINUX_DARRAY_TYpES_H */
|
@ -1,27 +1,37 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _EYTZINGER_H
|
||||
#define _EYTZINGER_H
|
||||
#ifndef _LINUX_EYTZINGER_H
|
||||
#define _LINUX_EYTZINGER_H
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/log2.h>
|
||||
|
||||
#include "util.h"
|
||||
#ifdef EYTZINGER_DEBUG
|
||||
#define EYTZINGER_BUG_ON(cond) BUG_ON(cond)
|
||||
#else
|
||||
#define EYTZINGER_BUG_ON(cond)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Traversal for trees in eytzinger layout - a full binary tree layed out in an
|
||||
* array
|
||||
*/
|
||||
|
||||
/*
|
||||
* One based indexing version:
|
||||
* array.
|
||||
*
|
||||
* With one based indexing each level of the tree starts at a power of two -
|
||||
* good for cacheline alignment:
|
||||
* Consider using an eytzinger tree any time you would otherwise be doing binary
|
||||
* search over an array. Binary search is a worst case scenario for branch
|
||||
* prediction and prefetching, but in an eytzinger tree every node's children
|
||||
* are adjacent in memory, thus we can prefetch children before knowing the
|
||||
* result of the comparison, assuming multiple nodes fit on a cacheline.
|
||||
*
|
||||
* Two variants are provided, for one based indexing and zero based indexing.
|
||||
*
|
||||
* Zero based indexing is more convenient, but one based indexing has better
|
||||
* alignment and thus better performance because each new level of the tree
|
||||
* starts at a power of two, and thus if element 0 was cacheline aligned, each
|
||||
* new level will be as well.
|
||||
*/
|
||||
|
||||
static inline unsigned eytzinger1_child(unsigned i, unsigned child)
|
||||
{
|
||||
EBUG_ON(child > 1);
|
||||
EYTZINGER_BUG_ON(child > 1);
|
||||
|
||||
return (i << 1) + child;
|
||||
}
|
||||
@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size)
|
||||
|
||||
static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
||||
{
|
||||
EBUG_ON(i > size);
|
||||
EYTZINGER_BUG_ON(i > size);
|
||||
|
||||
if (eytzinger1_right_child(i) <= size) {
|
||||
i = eytzinger1_right_child(i);
|
||||
@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
||||
|
||||
static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
|
||||
{
|
||||
EBUG_ON(i > size);
|
||||
EYTZINGER_BUG_ON(i > size);
|
||||
|
||||
if (eytzinger1_left_child(i) <= size) {
|
||||
i = eytzinger1_left_child(i) + 1;
|
||||
@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
|
||||
unsigned shift = __fls(size) - b;
|
||||
int s;
|
||||
|
||||
EBUG_ON(!i || i > size);
|
||||
EYTZINGER_BUG_ON(!i || i > size);
|
||||
|
||||
i ^= 1U << b;
|
||||
i <<= 1;
|
||||
@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
|
||||
unsigned shift;
|
||||
int s;
|
||||
|
||||
EBUG_ON(!i || i > size);
|
||||
EYTZINGER_BUG_ON(!i || i > size);
|
||||
|
||||
/*
|
||||
* sign bit trick:
|
||||
@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
|
||||
|
||||
static inline unsigned eytzinger0_child(unsigned i, unsigned child)
|
||||
{
|
||||
EBUG_ON(child > 1);
|
||||
EYTZINGER_BUG_ON(child > 1);
|
||||
|
||||
return (i << 1) + 1 + child;
|
||||
}
|
||||
@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
|
||||
(_i) != -1; \
|
||||
(_i) = eytzinger0_next((_i), (_size)))
|
||||
|
||||
typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
|
||||
|
||||
/* return greatest node <= @search, or -1 if not found */
|
||||
static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
|
||||
eytzinger_cmp_fn cmp, const void *search)
|
||||
cmp_func_t cmp, const void *search)
|
||||
{
|
||||
unsigned i, n = 0;
|
||||
|
||||
@ -244,7 +252,7 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
|
||||
|
||||
do {
|
||||
i = n;
|
||||
n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
|
||||
n = eytzinger0_child(i, cmp(search, base + i * size) >= 0);
|
||||
} while (n < nr);
|
||||
|
||||
if (n & 1) {
|
||||
@ -274,8 +282,8 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
|
||||
_i; \
|
||||
})
|
||||
|
||||
void eytzinger0_sort(void *, size_t, size_t,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t));
|
||||
void eytzinger0_sort_r(void *, size_t, size_t,
|
||||
cmp_r_func_t, swap_r_func_t, const void *);
|
||||
void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
|
||||
|
||||
#endif /* _EYTZINGER_H */
|
||||
#endif /* _LINUX_EYTZINGER_H */
|
@ -17,7 +17,7 @@
|
||||
* Rust and rustc has issues with u128.
|
||||
*/
|
||||
|
||||
#if defined(__SIZEOF_INT128__) && defined(__KERNEL__)
|
||||
#if defined(__SIZEOF_INT128__) && defined(__KERNEL__) && !defined(CONFIG_PARISC)
|
||||
|
||||
typedef struct {
|
||||
unsigned __int128 v;
|
||||
@ -154,8 +154,6 @@ struct mean_and_variance {
|
||||
|
||||
/* expontentially weighted variant */
|
||||
struct mean_and_variance_weighted {
|
||||
bool init;
|
||||
u8 weight; /* base 2 logarithim */
|
||||
s64 mean;
|
||||
u64 variance;
|
||||
};
|
||||
@ -192,10 +190,14 @@ s64 mean_and_variance_get_mean(struct mean_and_variance s);
|
||||
u64 mean_and_variance_get_variance(struct mean_and_variance s1);
|
||||
u32 mean_and_variance_get_stddev(struct mean_and_variance s);
|
||||
|
||||
void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 v);
|
||||
void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s,
|
||||
s64 v, bool initted, u8 weight);
|
||||
|
||||
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s);
|
||||
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s);
|
||||
u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s);
|
||||
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s,
|
||||
u8 weight);
|
||||
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s,
|
||||
u8 weight);
|
||||
u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s,
|
||||
u8 weight);
|
||||
|
||||
#endif // MEAN_AND_VAIRANCE_H_
|
@ -90,6 +90,19 @@ static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
|
||||
(void *) size);
|
||||
}
|
||||
|
||||
void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data);
|
||||
void mempool_kvfree(void *element, void *pool_data);
|
||||
|
||||
static inline int mempool_init_kvmalloc_pool(mempool_t *pool, int min_nr, size_t size)
|
||||
{
|
||||
return mempool_init(pool, min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size);
|
||||
}
|
||||
|
||||
static inline mempool_t *mempool_create_kvmalloc_pool(int min_nr, size_t size)
|
||||
{
|
||||
return mempool_create(min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size);
|
||||
}
|
||||
|
||||
/*
|
||||
* A mempool_alloc_t and mempool_free_t for a simple page allocator that
|
||||
* allocates pages of the order specified by pool_data
|
||||
|
@ -1,65 +1 @@
|
||||
#ifndef __TOOLS_LINUX_SPINLOCK_H
|
||||
#define __TOOLS_LINUX_SPINLOCK_H
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <pthread.h>
|
||||
|
||||
typedef struct {
|
||||
pthread_mutex_t lock;
|
||||
} raw_spinlock_t;
|
||||
|
||||
#define __RAW_SPIN_LOCK_UNLOCKED(name) (raw_spinlock_t) { .lock = PTHREAD_MUTEX_INITIALIZER }
|
||||
|
||||
static inline void raw_spin_lock_init(raw_spinlock_t *lock)
|
||||
{
|
||||
pthread_mutex_init(&lock->lock, NULL);
|
||||
}
|
||||
|
||||
static inline bool raw_spin_trylock(raw_spinlock_t *lock)
|
||||
{
|
||||
return !pthread_mutex_trylock(&lock->lock);
|
||||
}
|
||||
|
||||
static inline void raw_spin_lock(raw_spinlock_t *lock)
|
||||
{
|
||||
pthread_mutex_lock(&lock->lock);
|
||||
}
|
||||
|
||||
static inline void raw_spin_unlock(raw_spinlock_t *lock)
|
||||
{
|
||||
pthread_mutex_unlock(&lock->lock);
|
||||
}
|
||||
|
||||
#define raw_spin_lock_irq(lock) raw_spin_lock(lock)
|
||||
#define raw_spin_unlock_irq(lock) raw_spin_unlock(lock)
|
||||
|
||||
#define raw_spin_lock_irqsave(lock, flags) \
|
||||
do { \
|
||||
flags = 0; \
|
||||
raw_spin_lock(lock); \
|
||||
} while (0)
|
||||
|
||||
#define raw_spin_unlock_irqrestore(lock, flags) raw_spin_unlock(lock)
|
||||
|
||||
typedef raw_spinlock_t spinlock_t;
|
||||
|
||||
#define __SPIN_LOCK_UNLOCKED(name) __RAW_SPIN_LOCK_UNLOCKED(name)
|
||||
|
||||
#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
|
||||
|
||||
#define spin_lock_init(lock) raw_spin_lock_init(lock)
|
||||
#define spin_lock(lock) raw_spin_lock(lock)
|
||||
#define spin_unlock(lock) raw_spin_unlock(lock)
|
||||
|
||||
#define spin_lock_nested(lock, n) spin_lock(lock)
|
||||
|
||||
#define spin_lock_bh(lock) raw_spin_lock(lock)
|
||||
#define spin_unlock_bh(lock) raw_spin_unlock(lock)
|
||||
|
||||
#define spin_lock_irq(lock) raw_spin_lock(lock)
|
||||
#define spin_unlock_irq(lock) raw_spin_unlock(lock)
|
||||
|
||||
#define spin_lock_irqsave(lock, flags) raw_spin_lock_irqsave(lock, flags)
|
||||
#define spin_unlock_irqrestore(lock, flags) raw_spin_unlock_irqrestore(lock, flags)
|
||||
|
||||
#endif /* __TOOLS_LINUX_SPINLOCK_H */
|
||||
#include "linux/spinlock_types.h"
|
||||
|
65
include/linux/spinlock_types.h
Normal file
65
include/linux/spinlock_types.h
Normal file
@ -0,0 +1,65 @@
|
||||
#ifndef __TOOLS_LINUX_SPINLOCK_H
|
||||
#define __TOOLS_LINUX_SPINLOCK_H
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <pthread.h>
|
||||
|
||||
typedef struct {
|
||||
pthread_mutex_t lock;
|
||||
} raw_spinlock_t;
|
||||
|
||||
#define __RAW_SPIN_LOCK_UNLOCKED(name) (raw_spinlock_t) { .lock = PTHREAD_MUTEX_INITIALIZER }
|
||||
|
||||
static inline void raw_spin_lock_init(raw_spinlock_t *lock)
|
||||
{
|
||||
pthread_mutex_init(&lock->lock, NULL);
|
||||
}
|
||||
|
||||
static inline bool raw_spin_trylock(raw_spinlock_t *lock)
|
||||
{
|
||||
return !pthread_mutex_trylock(&lock->lock);
|
||||
}
|
||||
|
||||
static inline void raw_spin_lock(raw_spinlock_t *lock)
|
||||
{
|
||||
pthread_mutex_lock(&lock->lock);
|
||||
}
|
||||
|
||||
static inline void raw_spin_unlock(raw_spinlock_t *lock)
|
||||
{
|
||||
pthread_mutex_unlock(&lock->lock);
|
||||
}
|
||||
|
||||
#define raw_spin_lock_irq(lock) raw_spin_lock(lock)
|
||||
#define raw_spin_unlock_irq(lock) raw_spin_unlock(lock)
|
||||
|
||||
#define raw_spin_lock_irqsave(lock, flags) \
|
||||
do { \
|
||||
flags = 0; \
|
||||
raw_spin_lock(lock); \
|
||||
} while (0)
|
||||
|
||||
#define raw_spin_unlock_irqrestore(lock, flags) raw_spin_unlock(lock)
|
||||
|
||||
typedef raw_spinlock_t spinlock_t;
|
||||
|
||||
#define __SPIN_LOCK_UNLOCKED(name) __RAW_SPIN_LOCK_UNLOCKED(name)
|
||||
|
||||
#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
|
||||
|
||||
#define spin_lock_init(lock) raw_spin_lock_init(lock)
|
||||
#define spin_lock(lock) raw_spin_lock(lock)
|
||||
#define spin_unlock(lock) raw_spin_unlock(lock)
|
||||
|
||||
#define spin_lock_nested(lock, n) spin_lock(lock)
|
||||
|
||||
#define spin_lock_bh(lock) raw_spin_lock(lock)
|
||||
#define spin_unlock_bh(lock) raw_spin_unlock(lock)
|
||||
|
||||
#define spin_lock_irq(lock) raw_spin_lock(lock)
|
||||
#define spin_unlock_irq(lock) raw_spin_unlock(lock)
|
||||
|
||||
#define spin_lock_irqsave(lock, flags) raw_spin_lock_irqsave(lock, flags)
|
||||
#define spin_unlock_irqrestore(lock, flags) raw_spin_unlock_irqrestore(lock, flags)
|
||||
|
||||
#endif /* __TOOLS_LINUX_SPINLOCK_H */
|
15
include/linux/thread_with_file.h
Normal file
15
include/linux/thread_with_file.h
Normal file
@ -0,0 +1,15 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* (C) 2022-2024 Kent Overstreet <kent.overstreet@linux.dev>
|
||||
*/
|
||||
#ifndef _LINUX_THREAD_WITH_FILE_H
|
||||
#define _LINUX_THREAD_WITH_FILE_H
|
||||
|
||||
struct stdio_redirect;
|
||||
|
||||
__printf(3, 0)
|
||||
static inline void stdio_redirect_vprintf(struct stdio_redirect *s, bool nonblocking, const char *msg, va_list args) {}
|
||||
__printf(3, 4)
|
||||
static inline void stdio_redirect_printf(struct stdio_redirect *s, bool nonblocking, const char *msg, ...) {}
|
||||
|
||||
#endif /* _LINUX_THREAD_WITH_FILE_H */
|
0
include/linux/thread_with_file_types.h
Normal file
0
include/linux/thread_with_file_types.h
Normal file
0
include/linux/time.h
Normal file
0
include/linux/time.h
Normal file
167
include/linux/time_stats.h
Normal file
167
include/linux/time_stats.h
Normal file
@ -0,0 +1,167 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* time_stats - collect statistics on events that have a duration, with nicely
|
||||
* formatted textual output on demand
|
||||
*
|
||||
* - percpu buffering of event collection: cheap enough to shotgun
|
||||
* everywhere without worrying about overhead
|
||||
*
|
||||
* tracks:
|
||||
* - number of events
|
||||
* - maximum event duration ever seen
|
||||
* - sum of all event durations
|
||||
* - average event duration, standard and weighted
|
||||
* - standard deviation of event durations, standard and weighted
|
||||
* and analagous statistics for the frequency of events
|
||||
*
|
||||
* We provide both mean and weighted mean (exponentially weighted), and standard
|
||||
* deviation and weighted standard deviation, to give an efficient-to-compute
|
||||
* view of current behaviour versus. average behaviour - "did this event source
|
||||
* just become wonky, or is this typical?".
|
||||
*
|
||||
* Particularly useful for tracking down latency issues.
|
||||
*/
|
||||
#ifndef _LINUX_TIME_STATS_H
|
||||
#define _LINUX_TIME_STATS_H
|
||||
|
||||
#include <linux/mean_and_variance.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/spinlock_types.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
struct time_unit {
|
||||
const char *name;
|
||||
u64 nsecs;
|
||||
};
|
||||
|
||||
/*
|
||||
* given a nanosecond value, pick the preferred time units for printing:
|
||||
*/
|
||||
const struct time_unit *pick_time_units(u64 ns);
|
||||
|
||||
/*
|
||||
* quantiles - do not use:
|
||||
*
|
||||
* Only enabled if time_stats->quantiles_enabled has been manually set - don't
|
||||
* use in new code.
|
||||
*/
|
||||
|
||||
#define NR_QUANTILES 15
|
||||
#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES)
|
||||
#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
|
||||
#define QUANTILE_LAST eytzinger0_last(NR_QUANTILES)
|
||||
|
||||
struct quantiles {
|
||||
struct quantile_entry {
|
||||
u64 m;
|
||||
u64 step;
|
||||
} entries[NR_QUANTILES];
|
||||
};
|
||||
|
||||
struct time_stat_buffer {
|
||||
unsigned nr;
|
||||
struct time_stat_buffer_entry {
|
||||
u64 start;
|
||||
u64 end;
|
||||
} entries[31];
|
||||
};
|
||||
|
||||
struct time_stats {
|
||||
spinlock_t lock;
|
||||
bool have_quantiles;
|
||||
/* all fields are in nanoseconds */
|
||||
u64 min_duration;
|
||||
u64 max_duration;
|
||||
u64 total_duration;
|
||||
u64 max_freq;
|
||||
u64 min_freq;
|
||||
u64 last_event;
|
||||
u64 last_event_start;
|
||||
|
||||
struct mean_and_variance duration_stats;
|
||||
struct mean_and_variance freq_stats;
|
||||
|
||||
/* default weight for weighted mean and variance calculations */
|
||||
#define TIME_STATS_MV_WEIGHT 8
|
||||
|
||||
struct mean_and_variance_weighted duration_stats_weighted;
|
||||
struct mean_and_variance_weighted freq_stats_weighted;
|
||||
struct time_stat_buffer __percpu *buffer;
|
||||
|
||||
u64 start_time;
|
||||
};
|
||||
|
||||
struct time_stats_quantiles {
|
||||
struct time_stats stats;
|
||||
struct quantiles quantiles;
|
||||
};
|
||||
|
||||
static inline struct quantiles *time_stats_to_quantiles(struct time_stats *stats)
|
||||
{
|
||||
return stats->have_quantiles
|
||||
? &container_of(stats, struct time_stats_quantiles, stats)->quantiles
|
||||
: NULL;
|
||||
}
|
||||
|
||||
void __time_stats_clear_buffer(struct time_stats *, struct time_stat_buffer *);
|
||||
void __time_stats_update(struct time_stats *stats, u64, u64);
|
||||
|
||||
/**
|
||||
* time_stats_update - collect a new event being tracked
|
||||
*
|
||||
* @stats - time_stats to update
|
||||
* @start - start time of event, recorded with local_clock()
|
||||
*
|
||||
* The end duration of the event will be the current time
|
||||
*/
|
||||
static inline void time_stats_update(struct time_stats *stats, u64 start)
|
||||
{
|
||||
__time_stats_update(stats, start, local_clock());
|
||||
}
|
||||
|
||||
/**
|
||||
* track_event_change - track state change events
|
||||
*
|
||||
* @stats - time_stats to update
|
||||
* @v - new state, true or false
|
||||
*
|
||||
* Use this when tracking time stats for state changes, i.e. resource X becoming
|
||||
* blocked/unblocked.
|
||||
*/
|
||||
static inline bool track_event_change(struct time_stats *stats, bool v)
|
||||
{
|
||||
if (v != !!stats->last_event_start) {
|
||||
if (!v) {
|
||||
time_stats_update(stats, stats->last_event_start);
|
||||
stats->last_event_start = 0;
|
||||
} else {
|
||||
stats->last_event_start = local_clock() ?: 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#define TIME_STATS_PRINT_NO_ZEROES (1U << 0) /* print nothing if zero count */
|
||||
struct seq_buf;
|
||||
void time_stats_to_seq_buf(struct seq_buf *, struct time_stats *,
|
||||
const char *epoch_name, unsigned int flags);
|
||||
void time_stats_to_json(struct seq_buf *, struct time_stats *,
|
||||
const char *epoch_name, unsigned int flags);
|
||||
|
||||
void time_stats_exit(struct time_stats *);
|
||||
void time_stats_init(struct time_stats *);
|
||||
|
||||
static inline void time_stats_quantiles_exit(struct time_stats_quantiles *statq)
|
||||
{
|
||||
time_stats_exit(&statq->stats);
|
||||
}
|
||||
static inline void time_stats_quantiles_init(struct time_stats_quantiles *statq)
|
||||
{
|
||||
time_stats_init(&statq->stats);
|
||||
statq->stats.have_quantiles = true;
|
||||
memset(&statq->quantiles, 0, sizeof(statq->quantiles));
|
||||
}
|
||||
|
||||
#endif /* _LINUX_TIME_STATS_H */
|
@ -8,6 +8,7 @@
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <linux/posix_types.h>
|
||||
|
||||
#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
|
||||
#include <asm/types.h>
|
||||
@ -77,6 +78,10 @@ typedef __u64 __bitwise __be64;
|
||||
|
||||
typedef u64 sector_t;
|
||||
|
||||
typedef void (*swap_r_func_t)(void *a, void *b, int size, const void *priv);
|
||||
typedef void (*swap_func_t)(void *a, void *b, int size);
|
||||
|
||||
typedef int (*cmp_r_func_t)(const void *a, const void *b, const void *priv);
|
||||
typedef int (*cmp_func_t)(const void *a, const void *b);
|
||||
|
||||
typedef unsigned int __bitwise slab_flags_t;
|
||||
|
@ -236,8 +236,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
||||
if (cl)
|
||||
closure_wait(&c->open_buckets_wait, cl);
|
||||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket],
|
||||
&c->blocked_allocate_open_bucket, true);
|
||||
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true);
|
||||
spin_unlock(&c->freelist_lock);
|
||||
return ERR_PTR(-BCH_ERR_open_buckets_empty);
|
||||
}
|
||||
@ -263,11 +262,8 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
||||
ca->nr_open_buckets++;
|
||||
bch2_open_bucket_hash_add(c, ob);
|
||||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket],
|
||||
&c->blocked_allocate_open_bucket, false);
|
||||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_allocate],
|
||||
&c->blocked_allocate, false);
|
||||
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], false);
|
||||
track_event_change(&c->times[BCH_TIME_blocked_allocate], false);
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
return ob;
|
||||
@ -555,8 +551,7 @@ again:
|
||||
goto again;
|
||||
}
|
||||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_allocate],
|
||||
&c->blocked_allocate, true);
|
||||
track_event_change(&c->times[BCH_TIME_blocked_allocate], true);
|
||||
|
||||
ob = ERR_PTR(-BCH_ERR_freelist_empty);
|
||||
goto err;
|
||||
|
@ -200,6 +200,8 @@
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/shrinker.h>
|
||||
#include <linux/srcu.h>
|
||||
#include <linux/thread_with_file_types.h>
|
||||
#include <linux/time_stats.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/zstd.h>
|
||||
@ -465,7 +467,6 @@ enum bch_time_stats {
|
||||
#include "replicas_types.h"
|
||||
#include "subvolume_types.h"
|
||||
#include "super_types.h"
|
||||
#include "thread_with_file_types.h"
|
||||
|
||||
/* Number of nodes btree coalesce will try to coalesce at once */
|
||||
#define GC_MERGE_NODES 4U
|
||||
@ -593,7 +594,7 @@ struct bch_dev {
|
||||
|
||||
/* The rest of this all shows up in sysfs */
|
||||
atomic64_t cur_latency[2];
|
||||
struct bch2_time_stats io_latency[2];
|
||||
struct time_stats_quantiles io_latency[2];
|
||||
|
||||
#define CONGESTED_MAX 1024
|
||||
atomic_t congested;
|
||||
@ -640,8 +641,8 @@ struct btree_debug {
|
||||
#define BCH_TRANSACTIONS_NR 128
|
||||
|
||||
struct btree_transaction_stats {
|
||||
struct bch2_time_stats duration;
|
||||
struct bch2_time_stats lock_hold_times;
|
||||
struct time_stats duration;
|
||||
struct time_stats lock_hold_times;
|
||||
struct mutex lock;
|
||||
unsigned nr_max_paths;
|
||||
unsigned journal_entries_size;
|
||||
@ -919,8 +920,6 @@ struct bch_fs {
|
||||
/* ALLOCATOR */
|
||||
spinlock_t freelist_lock;
|
||||
struct closure_waitlist freelist_wait;
|
||||
u64 blocked_allocate;
|
||||
u64 blocked_allocate_open_bucket;
|
||||
|
||||
open_bucket_idx_t open_buckets_freelist;
|
||||
open_bucket_idx_t open_buckets_nr_free;
|
||||
@ -1104,7 +1103,7 @@ struct bch_fs {
|
||||
unsigned copy_gc_enabled:1;
|
||||
bool promote_whole_extents;
|
||||
|
||||
struct bch2_time_stats times[BCH_TIME_STAT_NR];
|
||||
struct time_stats times[BCH_TIME_STAT_NR];
|
||||
|
||||
struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
|
||||
|
||||
|
@ -1275,7 +1275,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
|
||||
x(dev_usage, 8) \
|
||||
x(log, 9) \
|
||||
x(overwrite, 10) \
|
||||
x(write_buffer_keys, 11)
|
||||
x(write_buffer_keys, 11) \
|
||||
x(datetime, 12)
|
||||
|
||||
enum {
|
||||
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
|
||||
@ -1376,6 +1377,11 @@ struct jset_entry_log {
|
||||
u8 d[];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct jset_entry_datetime {
|
||||
struct jset_entry entry;
|
||||
__le64 seconds;
|
||||
} __packed __aligned(8);
|
||||
|
||||
/*
|
||||
* On disk format for a journal entry:
|
||||
* seq is monotonically increasing; every journal entry has its own unique
|
||||
|
@ -379,7 +379,7 @@ struct bch_ioctl_disk_resize_journal {
|
||||
|
||||
struct bch_ioctl_subvolume {
|
||||
__u32 flags;
|
||||
__s32 dirfd;
|
||||
__u32 dirfd;
|
||||
__u16 mode;
|
||||
__u16 pad[3];
|
||||
__u64 dst_ptr;
|
||||
|
@ -9,12 +9,12 @@
|
||||
#include "bcachefs.h"
|
||||
#include "btree_cache.h"
|
||||
#include "bset.h"
|
||||
#include "eytzinger.h"
|
||||
#include "trace.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/eytzinger.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/prefetch.h>
|
||||
|
||||
|
@ -60,7 +60,7 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
|
||||
clear_btree_node_just_written(b);
|
||||
|
||||
kvpfree(b->data, btree_buf_bytes(b));
|
||||
kvfree(b->data);
|
||||
b->data = NULL;
|
||||
#ifdef __KERNEL__
|
||||
kvfree(b->aux_data);
|
||||
@ -94,7 +94,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
{
|
||||
BUG_ON(b->data || b->aux_data);
|
||||
|
||||
b->data = kvpmalloc(btree_buf_bytes(b), gfp);
|
||||
b->data = kvmalloc(btree_buf_bytes(b), gfp);
|
||||
if (!b->data)
|
||||
return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
|
||||
#ifdef __KERNEL__
|
||||
@ -107,7 +107,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
b->aux_data = NULL;
|
||||
#endif
|
||||
if (!b->aux_data) {
|
||||
kvpfree(b->data, btree_buf_bytes(b));
|
||||
kvfree(b->data);
|
||||
b->data = NULL;
|
||||
return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
|
||||
}
|
||||
@ -408,7 +408,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
if (c->verify_data)
|
||||
list_move(&c->verify_data->list, &bc->live);
|
||||
|
||||
kvpfree(c->verify_ondisk, c->opts.btree_node_size);
|
||||
kvfree(c->verify_ondisk);
|
||||
|
||||
for (i = 0; i < btree_id_nr_alive(c); i++) {
|
||||
struct btree_root *r = bch2_btree_id_root(c, i);
|
||||
@ -648,7 +648,7 @@ out:
|
||||
bch2_btree_keys_init(b);
|
||||
set_btree_node_accessed(b);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
|
||||
time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
|
||||
start_time);
|
||||
|
||||
memalloc_nofs_restore(flags);
|
||||
@ -711,6 +711,9 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
|
||||
b = bch2_btree_node_mem_alloc(trans, level != 0);
|
||||
|
||||
if (bch2_err_matches(PTR_ERR_OR_ZERO(b), ENOMEM)) {
|
||||
if (!path)
|
||||
return b;
|
||||
|
||||
trans->memory_allocation_failure = true;
|
||||
trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail));
|
||||
@ -760,8 +763,9 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (!six_relock_type(&b->c.lock, lock_type, seq)) {
|
||||
if (path)
|
||||
trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
|
||||
BUG_ON(!path);
|
||||
|
||||
trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
|
||||
}
|
||||
|
||||
@ -1096,7 +1100,7 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
|
||||
BUG_ON(trans && !btree_node_locked(path, level + 1));
|
||||
BUG_ON(path && !btree_node_locked(path, level + 1));
|
||||
BUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
|
||||
b = btree_cache_find(bc, k);
|
||||
|
@ -389,7 +389,8 @@ again:
|
||||
have_child = dropped_children = false;
|
||||
bch2_bkey_buf_init(&prev_k);
|
||||
bch2_bkey_buf_init(&cur_k);
|
||||
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
|
||||
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
|
||||
iter.prefetch = true;
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
BUG_ON(bpos_lt(k.k->p, b->data->min_key));
|
||||
@ -478,7 +479,8 @@ again:
|
||||
goto err;
|
||||
|
||||
bch2_btree_and_journal_iter_exit(&iter);
|
||||
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
|
||||
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
|
||||
iter.prefetch = true;
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
bch2_bkey_buf_reassemble(&cur_k, c, k);
|
||||
@ -931,7 +933,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
|
||||
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
|
||||
bch2_bkey_buf_init(&prev);
|
||||
bch2_bkey_buf_init(&cur);
|
||||
bkey_init(&prev.k->k);
|
||||
@ -963,7 +965,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
|
||||
|
||||
if (b->c.level > target_depth) {
|
||||
bch2_btree_and_journal_iter_exit(&iter);
|
||||
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
|
||||
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
|
||||
iter.prefetch = true;
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
struct btree *child;
|
||||
@ -1190,9 +1193,7 @@ static void bch2_gc_free(struct bch_fs *c)
|
||||
genradix_free(&c->gc_stripes);
|
||||
|
||||
for_each_member_device(c, ca) {
|
||||
kvpfree(rcu_dereference_protected(ca->buckets_gc, 1),
|
||||
sizeof(struct bucket_array) +
|
||||
ca->mi.nbuckets * sizeof(struct bucket));
|
||||
kvfree(rcu_dereference_protected(ca->buckets_gc, 1));
|
||||
ca->buckets_gc = NULL;
|
||||
|
||||
free_percpu(ca->usage_gc);
|
||||
@ -1491,7 +1492,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
|
||||
static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
|
||||
{
|
||||
for_each_member_device(c, ca) {
|
||||
struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
|
||||
struct bucket_array *buckets = kvmalloc(sizeof(struct bucket_array) +
|
||||
ca->mi.nbuckets * sizeof(struct bucket),
|
||||
GFP_KERNEL|__GFP_ZERO);
|
||||
if (!buckets) {
|
||||
@ -1970,7 +1971,7 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
|
||||
c->gc_count++;
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
|
||||
time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
|
||||
trace_and_count(c, gc_gens_end, c);
|
||||
err:
|
||||
for_each_member_device(c, ca) {
|
||||
|
@ -103,7 +103,7 @@ static void btree_bounce_free(struct bch_fs *c, size_t size,
|
||||
if (used_mempool)
|
||||
mempool_free(p, &c->btree_bounce_pool);
|
||||
else
|
||||
vpfree(p, size);
|
||||
kvfree(p);
|
||||
}
|
||||
|
||||
static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
|
||||
@ -115,7 +115,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
|
||||
BUG_ON(size > c->opts.btree_node_size);
|
||||
|
||||
*used_mempool = false;
|
||||
p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
|
||||
p = kvmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
|
||||
if (!p) {
|
||||
*used_mempool = true;
|
||||
p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
|
||||
@ -327,7 +327,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes);
|
||||
|
||||
if (sorting_entire_node)
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
|
||||
time_stats_update(&c->times[BCH_TIME_btree_node_sort],
|
||||
start_time);
|
||||
|
||||
/* Make sure we preserve bset journal_seq: */
|
||||
@ -397,7 +397,7 @@ void bch2_btree_sort_into(struct bch_fs *c,
|
||||
&dst->format,
|
||||
true);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
|
||||
time_stats_update(&c->times[BCH_TIME_btree_node_sort],
|
||||
start_time);
|
||||
|
||||
set_btree_bset_end(dst, dst->set);
|
||||
@ -1251,7 +1251,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
out:
|
||||
mempool_free(iter, &c->fill_iter);
|
||||
printbuf_exit(&buf);
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time);
|
||||
time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time);
|
||||
return retry_read;
|
||||
fsck_err:
|
||||
if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
|
||||
@ -1323,7 +1323,7 @@ start:
|
||||
}
|
||||
}
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
|
||||
time_stats_update(&c->times[BCH_TIME_btree_node_read],
|
||||
rb->start_time);
|
||||
bio_put(&rb->bio);
|
||||
|
||||
|
@ -891,7 +891,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
__bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
|
||||
__bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos);
|
||||
|
||||
k = bch2_btree_and_journal_iter_peek(&jiter);
|
||||
|
||||
@ -1146,7 +1146,7 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
|
||||
path = &trans->paths[path_idx];
|
||||
|
||||
if (unlikely(path->level >= BTREE_MAX_DEPTH))
|
||||
goto out;
|
||||
goto out_uptodate;
|
||||
|
||||
path->level = btree_path_up_until_good_node(trans, path, 0);
|
||||
|
||||
@ -1179,7 +1179,7 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out_uptodate:
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
out:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted)
|
||||
@ -2899,7 +2899,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
|
||||
|
||||
if (!IS_ENABLED(CONFIG_BCACHEFS_NO_LATENCY_ACCT) &&
|
||||
time_after64(now, trans->last_begin_time + 10))
|
||||
__bch2_time_stats_update(&btree_trans_stats(trans)->duration,
|
||||
__time_stats_update(&btree_trans_stats(trans)->duration,
|
||||
trans->last_begin_time, now);
|
||||
|
||||
if (!trans->restarted &&
|
||||
@ -3224,7 +3224,7 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
|
||||
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
|
||||
s++) {
|
||||
kfree(s->max_paths_text);
|
||||
bch2_time_stats_exit(&s->lock_hold_times);
|
||||
time_stats_exit(&s->lock_hold_times);
|
||||
}
|
||||
|
||||
if (c->btree_trans_barrier_initialized)
|
||||
@ -3240,8 +3240,8 @@ void bch2_fs_btree_iter_init_early(struct bch_fs *c)
|
||||
for (s = c->btree_transaction_stats;
|
||||
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
|
||||
s++) {
|
||||
bch2_time_stats_init(&s->duration);
|
||||
bch2_time_stats_init(&s->lock_hold_times);
|
||||
time_stats_init(&s->duration);
|
||||
time_stats_init(&s->lock_hold_times);
|
||||
mutex_init(&s->lock);
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,9 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "bset.h"
|
||||
#include "btree_cache.h"
|
||||
#include "btree_journal_iter.h"
|
||||
#include "journal_io.h"
|
||||
|
||||
@ -334,9 +336,38 @@ void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
|
||||
iter->pos = bpos_successor(iter->pos);
|
||||
}
|
||||
|
||||
static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter)
|
||||
{
|
||||
struct btree_and_journal_iter iter = *_iter;
|
||||
struct bch_fs *c = iter.trans->c;
|
||||
unsigned level = iter.journal.level;
|
||||
struct bkey_buf tmp;
|
||||
unsigned nr = test_bit(BCH_FS_started, &c->flags)
|
||||
? (level > 1 ? 0 : 2)
|
||||
: (level > 1 ? 1 : 16);
|
||||
|
||||
iter.prefetch = false;
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
while (nr--) {
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
struct bkey_s_c k = bch2_btree_and_journal_iter_peek(&iter);
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
bch2_bkey_buf_reassemble(&tmp, c, k);
|
||||
bch2_btree_node_prefetch(iter.trans, NULL, tmp.k, iter.journal.btree_id, level - 1);
|
||||
}
|
||||
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
|
||||
{
|
||||
struct bkey_s_c btree_k, journal_k, ret;
|
||||
|
||||
if (iter->prefetch && iter->journal.level)
|
||||
btree_and_journal_iter_prefetch(iter);
|
||||
again:
|
||||
if (iter->at_end)
|
||||
return bkey_s_c_null;
|
||||
@ -376,17 +407,18 @@ void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter)
|
||||
bch2_journal_iter_exit(&iter->journal);
|
||||
}
|
||||
|
||||
void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
|
||||
struct bch_fs *c,
|
||||
void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
|
||||
struct btree_and_journal_iter *iter,
|
||||
struct btree *b,
|
||||
struct btree_node_iter node_iter,
|
||||
struct bpos pos)
|
||||
{
|
||||
memset(iter, 0, sizeof(*iter));
|
||||
|
||||
iter->trans = trans;
|
||||
iter->b = b;
|
||||
iter->node_iter = node_iter;
|
||||
bch2_journal_iter_init(c, &iter->journal, b->c.btree_id, b->c.level, pos);
|
||||
bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
|
||||
INIT_LIST_HEAD(&iter->journal.list);
|
||||
iter->pos = b->data->min_key;
|
||||
iter->at_end = false;
|
||||
@ -396,15 +428,15 @@ void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter
|
||||
* this version is used by btree_gc before filesystem has gone RW and
|
||||
* multithreaded, so uses the journal_iters list:
|
||||
*/
|
||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
|
||||
struct bch_fs *c,
|
||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
|
||||
struct btree_and_journal_iter *iter,
|
||||
struct btree *b)
|
||||
{
|
||||
struct btree_node_iter node_iter;
|
||||
|
||||
bch2_btree_node_iter_init_from_start(&node_iter, b);
|
||||
__bch2_btree_and_journal_iter_init_node_iter(iter, c, b, node_iter, b->data->min_key);
|
||||
list_add(&iter->journal.list, &c->journal_iters);
|
||||
__bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key);
|
||||
list_add(&iter->journal.list, &trans->c->journal_iters);
|
||||
}
|
||||
|
||||
/* sort and dedup all keys in the journal: */
|
||||
@ -415,9 +447,7 @@ void bch2_journal_entries_free(struct bch_fs *c)
|
||||
struct genradix_iter iter;
|
||||
|
||||
genradix_for_each(&c->journal_entries, iter, i)
|
||||
if (*i)
|
||||
kvpfree(*i, offsetof(struct journal_replay, j) +
|
||||
vstruct_bytes(&(*i)->j));
|
||||
kvfree(*i);
|
||||
genradix_free(&c->journal_entries);
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@ struct journal_iter {
|
||||
*/
|
||||
|
||||
struct btree_and_journal_iter {
|
||||
struct btree_trans *trans;
|
||||
struct btree *b;
|
||||
struct btree_node_iter node_iter;
|
||||
struct bkey unpacked;
|
||||
@ -22,6 +23,7 @@ struct btree_and_journal_iter {
|
||||
struct journal_iter journal;
|
||||
struct bpos pos;
|
||||
bool at_end;
|
||||
bool prefetch;
|
||||
};
|
||||
|
||||
struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id,
|
||||
@ -29,6 +31,9 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id,
|
||||
struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos);
|
||||
|
||||
int bch2_btree_and_journal_iter_prefetch(struct btree_trans *, struct btree_path *,
|
||||
struct btree_and_journal_iter *);
|
||||
|
||||
int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bkey_i *);
|
||||
int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
|
||||
@ -42,12 +47,11 @@ void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
|
||||
|
||||
void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
|
||||
void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
||||
struct bch_fs *, struct btree *,
|
||||
void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *,
|
||||
struct btree_and_journal_iter *, struct btree *,
|
||||
struct btree_node_iter, struct bpos);
|
||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
||||
struct bch_fs *,
|
||||
struct btree *);
|
||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *,
|
||||
struct btree_and_journal_iter *, struct btree *);
|
||||
|
||||
void bch2_journal_keys_put(struct bch_fs *);
|
||||
|
||||
|
@ -122,7 +122,7 @@ static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
|
||||
__bch2_time_stats_update(&btree_trans_stats(trans)->lock_hold_times,
|
||||
__time_stats_update(&btree_trans_stats(trans)->lock_hold_times,
|
||||
path->l[level].lock_taken_time,
|
||||
local_clock());
|
||||
#endif
|
||||
|
@ -2,12 +2,12 @@
|
||||
#ifndef _BCACHEFS_BTREE_TYPES_H
|
||||
#define _BCACHEFS_BTREE_TYPES_H
|
||||
|
||||
#include <linux/darray_types.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/rhashtable.h>
|
||||
|
||||
#include "btree_key_cache_types.h"
|
||||
#include "buckets_types.h"
|
||||
#include "darray.h"
|
||||
#include "errcode.h"
|
||||
#include "journal_types.h"
|
||||
#include "replicas_types.h"
|
||||
|
@ -14,6 +14,8 @@
|
||||
#include "snapshot.h"
|
||||
#include "trace.h"
|
||||
|
||||
#include <linux/darray.h>
|
||||
|
||||
static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
|
||||
const struct btree_insert_entry *r)
|
||||
{
|
||||
|
@ -516,7 +516,7 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
|
||||
bch2_disk_reservation_put(c, &as->disk_res);
|
||||
bch2_btree_reserve_put(as, trans);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total],
|
||||
time_stats_update(&c->times[BCH_TIME_btree_interior_update_total],
|
||||
as->start_time);
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
@ -1038,7 +1038,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
|
||||
continue_at(&as->cl, btree_update_set_nodes_written,
|
||||
as->c->btree_interior_update_worker);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_foreground],
|
||||
time_stats_update(&c->times[BCH_TIME_btree_interior_update_foreground],
|
||||
start_time);
|
||||
}
|
||||
|
||||
@ -1629,7 +1629,7 @@ out:
|
||||
|
||||
bch2_trans_verify_locks(trans);
|
||||
|
||||
bch2_time_stats_update(&c->times[n2
|
||||
time_stats_update(&c->times[n2
|
||||
? BCH_TIME_btree_node_split
|
||||
: BCH_TIME_btree_node_compact],
|
||||
start_time);
|
||||
@ -1935,7 +1935,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
|
||||
bch2_btree_update_done(as, trans);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time);
|
||||
time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time);
|
||||
out:
|
||||
err:
|
||||
if (new_path)
|
||||
@ -2484,7 +2484,7 @@ void bch2_fs_btree_interior_update_init_early(struct bch_fs *c)
|
||||
int bch2_fs_btree_interior_update_init(struct bch_fs *c)
|
||||
{
|
||||
c->btree_interior_update_worker =
|
||||
alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
|
||||
alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 8);
|
||||
if (!c->btree_interior_update_worker)
|
||||
return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
|
||||
#define _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
|
||||
|
||||
#include "darray.h"
|
||||
#include <linux/darray_types.h>
|
||||
#include "journal_types.h"
|
||||
|
||||
#define BTREE_WRITE_BUFERED_VAL_U64s_MAX 4
|
||||
|
@ -1335,7 +1335,7 @@ static void bucket_gens_free_rcu(struct rcu_head *rcu)
|
||||
struct bucket_gens *buckets =
|
||||
container_of(rcu, struct bucket_gens, rcu);
|
||||
|
||||
kvpfree(buckets, sizeof(*buckets) + buckets->nbuckets);
|
||||
kvfree(buckets);
|
||||
}
|
||||
|
||||
int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
@ -1345,16 +1345,16 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
bool resize = ca->bucket_gens != NULL;
|
||||
int ret;
|
||||
|
||||
if (!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets,
|
||||
GFP_KERNEL|__GFP_ZERO))) {
|
||||
if (!(bucket_gens = kvmalloc(sizeof(struct bucket_gens) + nbuckets,
|
||||
GFP_KERNEL|__GFP_ZERO))) {
|
||||
ret = -BCH_ERR_ENOMEM_bucket_gens;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if ((c->opts.buckets_nouse &&
|
||||
!(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
|
||||
sizeof(unsigned long),
|
||||
GFP_KERNEL|__GFP_ZERO)))) {
|
||||
!(buckets_nouse = kvmalloc(BITS_TO_LONGS(nbuckets) *
|
||||
sizeof(unsigned long),
|
||||
GFP_KERNEL|__GFP_ZERO)))) {
|
||||
ret = -BCH_ERR_ENOMEM_buckets_nouse;
|
||||
goto err;
|
||||
}
|
||||
@ -1397,8 +1397,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
|
||||
ret = 0;
|
||||
err:
|
||||
kvpfree(buckets_nouse,
|
||||
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
|
||||
kvfree(buckets_nouse);
|
||||
if (bucket_gens)
|
||||
call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu);
|
||||
|
||||
@ -1407,27 +1406,21 @@ err:
|
||||
|
||||
void bch2_dev_buckets_free(struct bch_dev *ca)
|
||||
{
|
||||
unsigned i;
|
||||
kvfree(ca->buckets_nouse);
|
||||
kvfree(rcu_dereference_protected(ca->bucket_gens, 1));
|
||||
|
||||
kvpfree(ca->buckets_nouse,
|
||||
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
|
||||
kvpfree(rcu_dereference_protected(ca->bucket_gens, 1),
|
||||
sizeof(struct bucket_gens) + ca->mi.nbuckets);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ca->usage); i++)
|
||||
free_percpu(ca->usage[i]);
|
||||
kfree(ca->usage_base);
|
||||
}
|
||||
|
||||
int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
ca->usage_base = kzalloc(sizeof(struct bch_dev_usage), GFP_KERNEL);
|
||||
if (!ca->usage_base)
|
||||
return -BCH_ERR_ENOMEM_usage_init;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ca->usage); i++) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ca->usage); i++) {
|
||||
ca->usage[i] = alloc_percpu(struct bch_dev_usage);
|
||||
if (!ca->usage[i])
|
||||
return -BCH_ERR_ENOMEM_usage_init;
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include "replicas.h"
|
||||
#include "super.h"
|
||||
#include "super-io.h"
|
||||
#include "thread_with_file.h"
|
||||
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/device.h>
|
||||
@ -20,6 +19,7 @@
|
||||
#include <linux/major.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/thread_with_file.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
__must_check
|
||||
@ -155,17 +155,14 @@ static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
|
||||
kfree(thr);
|
||||
}
|
||||
|
||||
static int bch2_fsck_offline_thread_fn(void *arg)
|
||||
static void bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio)
|
||||
{
|
||||
struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
|
||||
struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
|
||||
struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
|
||||
|
||||
thr->thr.thr.ret = PTR_ERR_OR_ZERO(c);
|
||||
if (!thr->thr.thr.ret)
|
||||
bch2_fs_stop(c);
|
||||
|
||||
thread_with_stdio_done(&thr->thr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
|
||||
@ -220,7 +217,7 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
|
||||
|
||||
opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
|
||||
|
||||
ret = bch2_run_thread_with_stdio(&thr->thr,
|
||||
ret = run_thread_with_stdio(&thr->thr,
|
||||
bch2_fsck_thread_exit,
|
||||
bch2_fsck_offline_thread_fn);
|
||||
err:
|
||||
@ -425,7 +422,7 @@ static int bch2_data_job_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
|
||||
|
||||
bch2_thread_with_file_exit(&ctx->thr);
|
||||
thread_with_file_exit(&ctx->thr);
|
||||
kfree(ctx);
|
||||
return 0;
|
||||
}
|
||||
@ -475,7 +472,7 @@ static long bch2_ioctl_data(struct bch_fs *c,
|
||||
ctx->c = c;
|
||||
ctx->arg = arg;
|
||||
|
||||
ret = bch2_run_thread_with_file(&ctx->thr,
|
||||
ret = run_thread_with_file(&ctx->thr,
|
||||
&bcachefs_data_ops,
|
||||
bch2_data_thread);
|
||||
if (ret < 0)
|
||||
@ -763,9 +760,9 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_fsck_online_thread_fn(void *arg)
|
||||
static void bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio)
|
||||
{
|
||||
struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
|
||||
struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
|
||||
struct bch_fs *c = thr->c;
|
||||
|
||||
c->stdio_filter = current;
|
||||
@ -793,11 +790,8 @@ static int bch2_fsck_online_thread_fn(void *arg)
|
||||
c->stdio_filter = NULL;
|
||||
c->opts.fix_errors = old_fix_errors;
|
||||
|
||||
thread_with_stdio_done(&thr->thr);
|
||||
|
||||
up(&c->online_fsck_mutex);
|
||||
bch2_ro_ref_put(c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long bch2_ioctl_fsck_online(struct bch_fs *c,
|
||||
@ -840,7 +834,7 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c,
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_run_thread_with_stdio(&thr->thr,
|
||||
ret = run_thread_with_stdio(&thr->thr,
|
||||
bch2_fsck_thread_exit,
|
||||
bch2_fsck_online_thread_fn);
|
||||
err:
|
||||
|
@ -601,13 +601,13 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
|
||||
return 0;
|
||||
|
||||
if (!mempool_initialized(&c->compression_bounce[READ]) &&
|
||||
mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
|
||||
1, c->opts.encoded_extent_max))
|
||||
mempool_init_kvmalloc_pool(&c->compression_bounce[READ],
|
||||
1, c->opts.encoded_extent_max))
|
||||
return -BCH_ERR_ENOMEM_compression_bounce_read_init;
|
||||
|
||||
if (!mempool_initialized(&c->compression_bounce[WRITE]) &&
|
||||
mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
|
||||
1, c->opts.encoded_extent_max))
|
||||
mempool_init_kvmalloc_pool(&c->compression_bounce[WRITE],
|
||||
1, c->opts.encoded_extent_max))
|
||||
return -BCH_ERR_ENOMEM_compression_bounce_write_init;
|
||||
|
||||
for (i = compression_types;
|
||||
@ -622,15 +622,15 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
|
||||
if (mempool_initialized(&c->compress_workspace[i->type]))
|
||||
continue;
|
||||
|
||||
if (mempool_init_kvpmalloc_pool(
|
||||
if (mempool_init_kvmalloc_pool(
|
||||
&c->compress_workspace[i->type],
|
||||
1, i->compress_workspace))
|
||||
return -BCH_ERR_ENOMEM_compression_workspace_init;
|
||||
}
|
||||
|
||||
if (!mempool_initialized(&c->decompress_workspace) &&
|
||||
mempool_init_kvpmalloc_pool(&c->decompress_workspace,
|
||||
1, decompress_workspace_size))
|
||||
mempool_init_kvmalloc_pool(&c->decompress_workspace,
|
||||
1, decompress_workspace_size))
|
||||
return -BCH_ERR_ENOMEM_decompression_workspace_init;
|
||||
|
||||
return 0;
|
||||
|
@ -137,7 +137,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
mutex_lock(&c->verify_lock);
|
||||
|
||||
if (!c->verify_ondisk) {
|
||||
c->verify_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL);
|
||||
c->verify_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL);
|
||||
if (!c->verify_ondisk)
|
||||
goto out;
|
||||
}
|
||||
@ -199,7 +199,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
return;
|
||||
}
|
||||
|
||||
n_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL);
|
||||
n_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL);
|
||||
if (!n_ondisk) {
|
||||
prt_printf(out, "memory allocation failure\n");
|
||||
goto out;
|
||||
@ -293,7 +293,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
out:
|
||||
if (bio)
|
||||
bio_put(bio);
|
||||
kvpfree(n_ondisk, btree_buf_bytes(b));
|
||||
kvfree(n_ondisk);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
}
|
||||
|
||||
|
@ -219,10 +219,10 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
|
||||
dirent->k.p.inode = dir;
|
||||
dirent->k.p.snapshot = snapshot;
|
||||
|
||||
ret = bch2_hash_set_snapshot(trans, bch2_dirent_hash_desc, hash_info,
|
||||
zero_inum, snapshot,
|
||||
&dirent->k_i, str_hash_flags,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
|
||||
zero_inum, snapshot,
|
||||
&dirent->k_i, str_hash_flags,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||
*dir_offset = dirent->k.p.offset;
|
||||
|
||||
return ret;
|
||||
@ -293,12 +293,10 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
|
||||
struct bpos dst_pos =
|
||||
POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name));
|
||||
unsigned src_type = 0, dst_type = 0, src_update_flags = 0;
|
||||
unsigned src_update_flags = 0;
|
||||
bool delete_src, delete_dst;
|
||||
int ret = 0;
|
||||
|
||||
if (src_dir.subvol != dst_dir.subvol)
|
||||
return -EXDEV;
|
||||
|
||||
memset(src_inum, 0, sizeof(*src_inum));
|
||||
memset(dst_inum, 0, sizeof(*dst_inum));
|
||||
|
||||
@ -319,12 +317,6 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
src_type = bkey_s_c_to_dirent(old_src).v->d_type;
|
||||
|
||||
if (src_type == DT_SUBVOL && mode == BCH_RENAME_EXCHANGE)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
|
||||
/* Lookup dst: */
|
||||
if (mode == BCH_RENAME) {
|
||||
/*
|
||||
@ -352,11 +344,6 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
bkey_s_c_to_dirent(old_dst), dst_inum);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
dst_type = bkey_s_c_to_dirent(old_dst).v->d_type;
|
||||
|
||||
if (dst_type == DT_SUBVOL)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (mode != BCH_RENAME_EXCHANGE)
|
||||
@ -426,28 +413,55 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
if (new_dst->v.d_type == DT_SUBVOL)
|
||||
new_dst->v.d_parent_subvol = cpu_to_le32(dst_dir.subvol);
|
||||
|
||||
if ((mode == BCH_RENAME_EXCHANGE) &&
|
||||
new_src->v.d_type == DT_SUBVOL)
|
||||
new_src->v.d_parent_subvol = cpu_to_le32(src_dir.subvol);
|
||||
|
||||
ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
out_set_src:
|
||||
|
||||
/*
|
||||
* If we're deleting a subvolume, we need to really delete the dirent,
|
||||
* not just emit a whiteout in the current snapshot:
|
||||
* If we're deleting a subvolume we need to really delete the dirent,
|
||||
* not just emit a whiteout in the current snapshot - there can only be
|
||||
* single dirent that points to a given subvolume.
|
||||
*
|
||||
* IOW, we don't maintain multiple versions in different snapshots of
|
||||
* dirents that point to subvolumes - dirents that point to subvolumes
|
||||
* are only visible in one particular subvolume so it's not necessary,
|
||||
* and it would be particularly confusing for fsck to have to deal with.
|
||||
*/
|
||||
if (src_type == DT_SUBVOL) {
|
||||
bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot);
|
||||
ret = bch2_btree_iter_traverse(&src_iter);
|
||||
delete_src = bkey_s_c_to_dirent(old_src).v->d_type == DT_SUBVOL &&
|
||||
new_src->k.p.snapshot != old_src.k->p.snapshot;
|
||||
|
||||
delete_dst = old_dst.k &&
|
||||
bkey_s_c_to_dirent(old_dst).v->d_type == DT_SUBVOL &&
|
||||
new_dst->k.p.snapshot != old_dst.k->p.snapshot;
|
||||
|
||||
if (!delete_src || !bkey_deleted(&new_src->k)) {
|
||||
ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
new_src->k.p = src_iter.pos;
|
||||
src_update_flags |= BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE;
|
||||
}
|
||||
|
||||
ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (delete_src) {
|
||||
bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot);
|
||||
ret = bch2_btree_iter_traverse(&src_iter) ?:
|
||||
bch2_btree_delete_at(trans, &src_iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (delete_dst) {
|
||||
bch2_btree_iter_set_snapshot(&dst_iter, old_dst.k->p.snapshot);
|
||||
ret = bch2_btree_iter_traverse(&dst_iter) ?:
|
||||
bch2_btree_delete_at(trans, &dst_iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (mode == BCH_RENAME_EXCHANGE)
|
||||
*src_offset = new_src->k.p.offset;
|
||||
@ -458,41 +472,29 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __bch2_dirent_lookup_trans(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
subvol_inum dir,
|
||||
const struct bch_hash_info *hash_info,
|
||||
const struct qstr *name, subvol_inum *inum,
|
||||
unsigned flags)
|
||||
int bch2_dirent_lookup_trans(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
subvol_inum dir,
|
||||
const struct bch_hash_info *hash_info,
|
||||
const struct qstr *name, subvol_inum *inum,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_dirent d;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
|
||||
int ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
|
||||
hash_info, dir, name, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
|
||||
hash_info, dir, name, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
d = bkey_s_c_to_dirent(k);
|
||||
|
||||
ret = bch2_dirent_read_target(trans, dir, d, inum);
|
||||
ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), inum);
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
err:
|
||||
if (ret)
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -504,7 +506,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
|
||||
struct btree_iter iter = { NULL };
|
||||
|
||||
int ret = lockrestart_do(trans,
|
||||
__bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0));
|
||||
bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0));
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
bch2_trans_put(trans);
|
||||
return ret;
|
||||
|
@ -62,7 +62,7 @@ int bch2_dirent_rename(struct btree_trans *,
|
||||
const struct qstr *, subvol_inum *, u64 *,
|
||||
enum bch_rename_mode);
|
||||
|
||||
int __bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *,
|
||||
int bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *,
|
||||
subvol_inum, const struct bch_hash_info *,
|
||||
const struct qstr *, subvol_inum *, unsigned);
|
||||
u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum,
|
||||
|
@ -504,7 +504,7 @@ static void ec_stripe_buf_exit(struct ec_stripe_buf *buf)
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < s->v.nr_blocks; i++) {
|
||||
kvpfree(buf->data[i], buf->size << 9);
|
||||
kvfree(buf->data[i]);
|
||||
buf->data[i] = NULL;
|
||||
}
|
||||
}
|
||||
@ -531,7 +531,7 @@ static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
|
||||
memset(buf->valid, 0xFF, sizeof(buf->valid));
|
||||
|
||||
for (i = 0; i < v->nr_blocks; i++) {
|
||||
buf->data[i] = kvpmalloc(buf->size << 9, GFP_KERNEL);
|
||||
buf->data[i] = kvmalloc(buf->size << 9, GFP_KERNEL);
|
||||
if (!buf->data[i])
|
||||
goto err;
|
||||
}
|
||||
|
@ -176,6 +176,8 @@
|
||||
x(EINVAL, invalid) \
|
||||
x(EINVAL, internal_fsck_err) \
|
||||
x(EINVAL, opt_parse_error) \
|
||||
x(EINVAL, remove_with_metadata_missing_unimplemented)\
|
||||
x(EINVAL, remove_would_lose_data) \
|
||||
x(EROFS, erofs_trans_commit) \
|
||||
x(EROFS, erofs_no_writes) \
|
||||
x(EROFS, erofs_journal_err) \
|
||||
|
@ -2,7 +2,7 @@
|
||||
#include "bcachefs.h"
|
||||
#include "error.h"
|
||||
#include "super.h"
|
||||
#include "thread_with_file.h"
|
||||
#include <linux/thread_with_file.h>
|
||||
|
||||
#define FSCK_ERR_RATELIMIT_NR 10
|
||||
|
||||
@ -105,7 +105,7 @@ static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
|
||||
do {
|
||||
bch2_print(c, " (y,n, or Y,N for all errors of this type) ");
|
||||
|
||||
int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1);
|
||||
int r = stdio_redirect_readline(stdio, buf, sizeof(buf) - 1);
|
||||
if (r < 0)
|
||||
return YN_NO;
|
||||
buf[r] = '\0';
|
||||
|
@ -24,12 +24,12 @@ struct { \
|
||||
(fifo)->mask = (fifo)->size \
|
||||
? roundup_pow_of_two((fifo)->size) - 1 \
|
||||
: 0; \
|
||||
(fifo)->data = kvpmalloc(fifo_buf_size(fifo), (_gfp)); \
|
||||
(fifo)->data = kvmalloc(fifo_buf_size(fifo), (_gfp)); \
|
||||
})
|
||||
|
||||
#define free_fifo(fifo) \
|
||||
do { \
|
||||
kvpfree((fifo)->data, fifo_buf_size(fifo)); \
|
||||
kvfree((fifo)->data); \
|
||||
(fifo)->data = NULL; \
|
||||
} while (0)
|
||||
|
||||
|
@ -260,8 +260,8 @@ int bch2_unlink_trans(struct btree_trans *trans,
|
||||
|
||||
dir_hash = bch2_hash_info_init(c, dir_u);
|
||||
|
||||
ret = __bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
|
||||
name, &inum, BTREE_ITER_INTENT);
|
||||
ret = bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
|
||||
name, &inum, BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -410,6 +410,21 @@ int bch2_rename_trans(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Can't move across subvolumes, unless it's a subvolume root: */
|
||||
if (src_dir.subvol != dst_dir.subvol &&
|
||||
(!src_inode_u->bi_subvol ||
|
||||
(dst_inum.inum && !dst_inode_u->bi_subvol))) {
|
||||
ret = -EXDEV;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (src_inode_u->bi_parent_subvol)
|
||||
src_inode_u->bi_parent_subvol = dst_dir.subvol;
|
||||
|
||||
if ((mode == BCH_RENAME_EXCHANGE) &&
|
||||
dst_inode_u->bi_parent_subvol)
|
||||
dst_inode_u->bi_parent_subvol = src_dir.subvol;
|
||||
|
||||
src_inode_u->bi_dir = dst_dir_u->bi_inum;
|
||||
src_inode_u->bi_dir_offset = dst_offset;
|
||||
|
||||
|
@ -455,6 +455,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
|
||||
if (IS_ERR(victim))
|
||||
return PTR_ERR(victim);
|
||||
|
||||
dir = d_inode(path.dentry);
|
||||
if (victim->d_sb->s_fs_info != c) {
|
||||
ret = -EXDEV;
|
||||
goto err;
|
||||
@ -463,14 +464,13 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
dir = d_inode(path.dentry);
|
||||
ret = __bch2_unlink(dir, victim, true);
|
||||
if (!ret) {
|
||||
fsnotify_rmdir(dir, victim);
|
||||
d_delete(victim);
|
||||
}
|
||||
inode_unlock(dir);
|
||||
err:
|
||||
inode_unlock(dir);
|
||||
dput(victim);
|
||||
path_put(&path);
|
||||
return ret;
|
||||
|
224
libbcachefs/fs.c
224
libbcachefs/fs.c
@ -176,45 +176,88 @@ static unsigned bch2_inode_hash(subvol_inum inum)
|
||||
return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
|
||||
}
|
||||
|
||||
struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
|
||||
static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_inode_info *inode)
|
||||
{
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct bch_inode_info *inode;
|
||||
struct btree_trans *trans;
|
||||
struct bch_subvolume subvol;
|
||||
int ret;
|
||||
subvol_inum inum = inode_inum(inode);
|
||||
struct bch_inode_info *old = to_bch_ei(inode_insert5(&inode->v,
|
||||
bch2_inode_hash(inum),
|
||||
bch2_iget5_test,
|
||||
bch2_iget5_set,
|
||||
&inum));
|
||||
BUG_ON(!old);
|
||||
|
||||
inode = to_bch_ei(iget5_locked(c->vfs_sb,
|
||||
bch2_inode_hash(inum),
|
||||
bch2_iget5_test,
|
||||
bch2_iget5_set,
|
||||
&inum));
|
||||
if (unlikely(!inode))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
if (!(inode->v.i_state & I_NEW))
|
||||
return &inode->v;
|
||||
|
||||
trans = bch2_trans_get(c);
|
||||
ret = lockrestart_do(trans,
|
||||
bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
|
||||
bch2_inode_find_by_inum_trans(trans, inum, &inode_u));
|
||||
|
||||
if (!ret)
|
||||
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
|
||||
bch2_trans_put(trans);
|
||||
|
||||
if (ret) {
|
||||
iget_failed(&inode->v);
|
||||
return ERR_PTR(bch2_err_class(ret));
|
||||
if (unlikely(old != inode)) {
|
||||
discard_new_inode(&inode->v);
|
||||
inode = old;
|
||||
} else {
|
||||
mutex_lock(&c->vfs_inodes_lock);
|
||||
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
|
||||
mutex_unlock(&c->vfs_inodes_lock);
|
||||
/*
|
||||
* we really don't want insert_inode_locked2() to be setting
|
||||
* I_NEW...
|
||||
*/
|
||||
unlock_new_inode(&inode->v);
|
||||
}
|
||||
|
||||
mutex_lock(&c->vfs_inodes_lock);
|
||||
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
|
||||
mutex_unlock(&c->vfs_inodes_lock);
|
||||
return inode;
|
||||
}
|
||||
|
||||
unlock_new_inode(&inode->v);
|
||||
#define memalloc_flags_do(_flags, _do) \
|
||||
({ \
|
||||
unsigned _saved_flags = memalloc_flags_save(_flags); \
|
||||
typeof(_do) _ret = _do; \
|
||||
memalloc_noreclaim_restore(_saved_flags); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
return &inode->v;
|
||||
/*
|
||||
* Allocate a new inode, dropping/retaking btree locks if necessary:
|
||||
*/
|
||||
static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
struct bch_inode_info *inode =
|
||||
memalloc_flags_do(PF_MEMALLOC_NORECLAIM|PF_MEMALLOC_NOWARN,
|
||||
to_bch_ei(new_inode(c->vfs_sb)));
|
||||
|
||||
if (unlikely(!inode)) {
|
||||
int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM);
|
||||
if (ret && inode)
|
||||
discard_new_inode(&inode->v);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return inode;
|
||||
}
|
||||
|
||||
struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
|
||||
{
|
||||
struct bch_inode_info *inode =
|
||||
to_bch_ei(ilookup5_nowait(c->vfs_sb,
|
||||
bch2_inode_hash(inum),
|
||||
bch2_iget5_test,
|
||||
&inum));
|
||||
if (inode)
|
||||
return &inode->v;
|
||||
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct bch_subvolume subvol;
|
||||
int ret = lockrestart_do(trans,
|
||||
bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
|
||||
bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?:
|
||||
PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));
|
||||
if (!ret) {
|
||||
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
|
||||
inode = bch2_inode_insert(c, inode);
|
||||
}
|
||||
bch2_trans_put(trans);
|
||||
|
||||
return ret ? ERR_PTR(ret) : &inode->v;
|
||||
}
|
||||
|
||||
struct bch_inode_info *
|
||||
@ -226,7 +269,7 @@ __bch2_create(struct mnt_idmap *idmap,
|
||||
struct bch_fs *c = dir->v.i_sb->s_fs_info;
|
||||
struct btree_trans *trans;
|
||||
struct bch_inode_unpacked dir_u;
|
||||
struct bch_inode_info *inode, *old;
|
||||
struct bch_inode_info *inode;
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct posix_acl *default_acl = NULL, *acl = NULL;
|
||||
subvol_inum inum;
|
||||
@ -293,7 +336,6 @@ err_before_quota:
|
||||
mutex_unlock(&dir->ei_update_lock);
|
||||
}
|
||||
|
||||
bch2_iget5_set(&inode->v, &inum);
|
||||
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
|
||||
|
||||
set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
|
||||
@ -304,36 +346,7 @@ err_before_quota:
|
||||
* bch2_trans_exit() and dropping locks, else we could race with another
|
||||
* thread pulling the inode in and modifying it:
|
||||
*/
|
||||
|
||||
inode->v.i_state |= I_CREATING;
|
||||
|
||||
old = to_bch_ei(inode_insert5(&inode->v,
|
||||
bch2_inode_hash(inum),
|
||||
bch2_iget5_test,
|
||||
bch2_iget5_set,
|
||||
&inum));
|
||||
BUG_ON(!old);
|
||||
|
||||
if (unlikely(old != inode)) {
|
||||
/*
|
||||
* We raced, another process pulled the new inode into cache
|
||||
* before us:
|
||||
*/
|
||||
make_bad_inode(&inode->v);
|
||||
iput(&inode->v);
|
||||
|
||||
inode = old;
|
||||
} else {
|
||||
mutex_lock(&c->vfs_inodes_lock);
|
||||
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
|
||||
mutex_unlock(&c->vfs_inodes_lock);
|
||||
/*
|
||||
* we really don't want insert_inode_locked2() to be setting
|
||||
* I_NEW...
|
||||
*/
|
||||
unlock_new_inode(&inode->v);
|
||||
}
|
||||
|
||||
inode = bch2_inode_insert(c, inode);
|
||||
bch2_trans_put(trans);
|
||||
err:
|
||||
posix_acl_release(default_acl);
|
||||
@ -352,23 +365,78 @@ err_trans:
|
||||
|
||||
/* methods */
|
||||
|
||||
static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
|
||||
subvol_inum dir, struct bch_hash_info *dir_hash_info,
|
||||
const struct qstr *name)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter dirent_iter = {};
|
||||
subvol_inum inum = {};
|
||||
|
||||
int ret = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
|
||||
dir_hash_info, dir, name, 0);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_slot(&dirent_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), &inum);
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
struct bch_inode_info *inode =
|
||||
to_bch_ei(ilookup5_nowait(c->vfs_sb,
|
||||
bch2_inode_hash(inum),
|
||||
bch2_iget5_test,
|
||||
&inum));
|
||||
if (inode)
|
||||
goto out;
|
||||
|
||||
struct bch_subvolume subvol;
|
||||
struct bch_inode_unpacked inode_u;
|
||||
ret = bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
|
||||
bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?:
|
||||
PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));
|
||||
if (bch2_err_matches(ret, ENOENT)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch_err(c, "%s points to missing inode", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
|
||||
inode = bch2_inode_insert(c, inode);
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &dirent_iter);
|
||||
return inode;
|
||||
err:
|
||||
inode = ERR_PTR(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct bch_fs *c = vdir->i_sb->s_fs_info;
|
||||
struct bch_inode_info *dir = to_bch_ei(vdir);
|
||||
struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
|
||||
struct inode *vinode = NULL;
|
||||
subvol_inum inum = { .subvol = 1 };
|
||||
int ret;
|
||||
|
||||
ret = bch2_dirent_lookup(c, inode_inum(dir), &hash,
|
||||
&dentry->d_name, &inum);
|
||||
struct bch_inode_info *inode;
|
||||
bch2_trans_do(c, NULL, NULL, 0,
|
||||
PTR_ERR_OR_ZERO(inode = bch2_lookup_trans(trans, inode_inum(dir),
|
||||
&hash, &dentry->d_name)));
|
||||
if (IS_ERR(inode))
|
||||
inode = NULL;
|
||||
|
||||
if (!ret)
|
||||
vinode = bch2_vfs_inode_get(c, inum);
|
||||
|
||||
return d_splice_alias(vinode, dentry);
|
||||
return d_splice_alias(&inode->v, dentry);
|
||||
}
|
||||
|
||||
static int bch2_mknod(struct mnt_idmap *idmap,
|
||||
@ -1371,6 +1439,7 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
|
||||
struct bch_inode_unpacked *bi,
|
||||
struct bch_subvolume *subvol)
|
||||
{
|
||||
bch2_iget5_set(&inode->v, &inum);
|
||||
bch2_inode_update_after_write(trans, inode, bi, ~0);
|
||||
|
||||
if (BCH_SUBVOLUME_SNAP(subvol))
|
||||
@ -1571,7 +1640,6 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
* number:
|
||||
*/
|
||||
u64 avail_inodes = ((usage.capacity - usage.used) << 3);
|
||||
u64 fsid;
|
||||
|
||||
buf->f_type = BCACHEFS_STATFS_MAGIC;
|
||||
buf->f_bsize = sb->s_blocksize;
|
||||
@ -1582,10 +1650,7 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
buf->f_files = usage.nr_inodes + avail_inodes;
|
||||
buf->f_ffree = avail_inodes;
|
||||
|
||||
fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
|
||||
le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
|
||||
buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
|
||||
buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
|
||||
buf->f_fsid = uuid_to_fsid(c->sb.user_uuid.b);
|
||||
buf->f_namelen = BCH_NAME_MAX;
|
||||
|
||||
return 0;
|
||||
@ -1881,6 +1946,7 @@ got_sb:
|
||||
sb->s_time_gran = c->sb.nsec_per_time_unit;
|
||||
sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
|
||||
sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec);
|
||||
sb->s_uuid = c->sb.user_uuid;
|
||||
c->vfs_sb = sb;
|
||||
strscpy(sb->s_id, c->name, sizeof(sb->s_id));
|
||||
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include "btree_cache.h"
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
#include "darray.h"
|
||||
#include "dirent.h"
|
||||
#include "error.h"
|
||||
#include "fs-common.h"
|
||||
@ -18,6 +17,7 @@
|
||||
#include "xattr.h"
|
||||
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/darray.h>
|
||||
#include <linux/dcache.h> /* struct qstr */
|
||||
|
||||
/*
|
||||
@ -100,8 +100,8 @@ err:
|
||||
}
|
||||
|
||||
static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
|
||||
struct bch_inode_unpacked *inode,
|
||||
u32 *snapshot)
|
||||
struct bch_inode_unpacked *inode,
|
||||
u32 *snapshot)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
@ -123,17 +123,15 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __lookup_dirent(struct btree_trans *trans,
|
||||
static int lookup_dirent_in_snapshot(struct btree_trans *trans,
|
||||
struct bch_hash_info hash_info,
|
||||
subvol_inum dir, struct qstr *name,
|
||||
u64 *target, unsigned *type)
|
||||
u64 *target, unsigned *type, u32 snapshot)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c_dirent d;
|
||||
int ret;
|
||||
|
||||
ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc,
|
||||
&hash_info, dir, name, 0);
|
||||
int ret = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc,
|
||||
&hash_info, dir, name, 0, snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -144,34 +142,6 @@ static int __lookup_dirent(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __write_inode(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode,
|
||||
u32 snapshot)
|
||||
{
|
||||
struct bkey_inode_buf *inode_p =
|
||||
bch2_trans_kmalloc(trans, sizeof(*inode_p));
|
||||
|
||||
if (IS_ERR(inode_p))
|
||||
return PTR_ERR(inode_p);
|
||||
|
||||
bch2_inode_pack(inode_p, inode);
|
||||
inode_p->inode.k.p.snapshot = snapshot;
|
||||
|
||||
return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
|
||||
&inode_p->inode.k_i,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||
}
|
||||
|
||||
static int fsck_write_inode(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode,
|
||||
u32 snapshot)
|
||||
{
|
||||
int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
__write_inode(trans, inode, snapshot));
|
||||
bch_err_fn(trans->c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -224,15 +194,16 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
|
||||
|
||||
struct bch_inode_unpacked root_inode;
|
||||
struct bch_hash_info root_hash_info;
|
||||
ret = lookup_inode(trans, root_inum.inum, &root_inode, &snapshot);
|
||||
u32 root_inode_snapshot = snapshot;
|
||||
ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot);
|
||||
bch_err_msg(c, ret, "looking up root inode");
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
root_hash_info = bch2_hash_info_init(c, &root_inode);
|
||||
|
||||
ret = __lookup_dirent(trans, root_hash_info, root_inum,
|
||||
&lostfound_str, &inum, &d_type);
|
||||
ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum,
|
||||
&lostfound_str, &inum, &d_type, snapshot);
|
||||
if (bch2_err_matches(ret, ENOENT))
|
||||
goto create_lostfound;
|
||||
|
||||
@ -250,7 +221,8 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
|
||||
* shouldn't exist here:
|
||||
*/
|
||||
ret = lookup_inode(trans, inum, lostfound, &snapshot);
|
||||
bch_err_msg(c, ret, "looking up lost+found");
|
||||
bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)",
|
||||
inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot));
|
||||
return ret;
|
||||
|
||||
create_lostfound:
|
||||
@ -312,7 +284,7 @@ static int reattach_inode(struct btree_trans *trans,
|
||||
if (S_ISDIR(inode->bi_mode)) {
|
||||
lostfound.bi_nlink++;
|
||||
|
||||
ret = __write_inode(trans, &lostfound, U32_MAX);
|
||||
ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -334,7 +306,7 @@ static int reattach_inode(struct btree_trans *trans,
|
||||
inode->bi_dir = lostfound.bi_inum;
|
||||
inode->bi_dir_offset = dir_offset;
|
||||
|
||||
return __write_inode(trans, inode, inode_snapshot);
|
||||
return __bch2_fsck_write_inode(trans, inode, inode_snapshot);
|
||||
}
|
||||
|
||||
static int remove_backpointer(struct btree_trans *trans,
|
||||
@ -722,7 +694,7 @@ static int hash_redo_key(struct btree_trans *trans,
|
||||
delete->k.p = k_iter->pos;
|
||||
return bch2_btree_iter_traverse(k_iter) ?:
|
||||
bch2_trans_update(trans, k_iter, delete, 0) ?:
|
||||
bch2_hash_set_snapshot(trans, desc, hash_info,
|
||||
bch2_hash_set_in_snapshot(trans, desc, hash_info,
|
||||
(subvol_inum) { 0, k.k->p.inode },
|
||||
k.k->p.snapshot, tmp,
|
||||
BCH_HASH_SET_MUST_CREATE,
|
||||
@ -861,7 +833,8 @@ static int check_inode(struct btree_trans *trans,
|
||||
|
||||
u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked;
|
||||
|
||||
ret = __write_inode(trans, &u, iter->pos.snapshot);
|
||||
ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot);
|
||||
|
||||
bch_err_msg(c, ret, "in fsck updating inode");
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -950,8 +923,33 @@ static int check_inode(struct btree_trans *trans,
|
||||
do_update = true;
|
||||
}
|
||||
|
||||
if (u.bi_subvol) {
|
||||
struct bch_subvolume s;
|
||||
|
||||
ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
goto err;
|
||||
|
||||
if (fsck_err_on(ret,
|
||||
c, inode_bi_subvol_missing,
|
||||
"inode %llu:%u bi_subvol points to missing subvolume %u",
|
||||
u.bi_inum, k.k->p.snapshot, u.bi_subvol) ||
|
||||
fsck_err_on(le64_to_cpu(s.inode) != u.bi_inum ||
|
||||
!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.snapshot),
|
||||
k.k->p.snapshot),
|
||||
c, inode_bi_subvol_wrong,
|
||||
"inode %llu:%u points to subvol %u, but subvol points to %llu:%u",
|
||||
u.bi_inum, k.k->p.snapshot, u.bi_subvol,
|
||||
le64_to_cpu(s.inode),
|
||||
le32_to_cpu(s.snapshot))) {
|
||||
u.bi_subvol = 0;
|
||||
u.bi_parent_subvol = 0;
|
||||
do_update = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (do_update) {
|
||||
ret = __write_inode(trans, &u, iter->pos.snapshot);
|
||||
ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot);
|
||||
bch_err_msg(c, ret, "in fsck updating inode");
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -1032,7 +1030,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
|
||||
w->last_pos.inode, i->snapshot,
|
||||
i->inode.bi_sectors, i->count)) {
|
||||
i->inode.bi_sectors = i->count;
|
||||
ret = fsck_write_inode(trans, &i->inode, i->snapshot);
|
||||
ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@ -1481,7 +1479,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
|
||||
"directory %llu:%u with wrong i_nlink: got %u, should be %llu",
|
||||
w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
|
||||
i->inode.bi_nlink = i->count;
|
||||
ret = fsck_write_inode(trans, &i->inode, i->snapshot);
|
||||
ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@ -1491,16 +1489,15 @@ fsck_err:
|
||||
return ret ?: trans_was_restarted(trans, restart_count);
|
||||
}
|
||||
|
||||
static int check_dirent_target(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c_dirent d,
|
||||
struct bch_inode_unpacked *target,
|
||||
u32 target_snapshot)
|
||||
static int check_inode_backpointer(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c_dirent d,
|
||||
struct bch_inode_unpacked *target,
|
||||
u32 target_snapshot)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i_dirent *n;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct btree_iter bp_iter = { NULL };
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
if (!target->bi_dir &&
|
||||
@ -1508,7 +1505,7 @@ static int check_dirent_target(struct btree_trans *trans,
|
||||
target->bi_dir = d.k->p.inode;
|
||||
target->bi_dir_offset = d.k->p.offset;
|
||||
|
||||
ret = __write_inode(trans, target, target_snapshot);
|
||||
ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -1548,7 +1545,7 @@ static int check_dirent_target(struct btree_trans *trans,
|
||||
target->bi_nlink++;
|
||||
target->bi_flags &= ~BCH_INODE_unlinked;
|
||||
|
||||
ret = __write_inode(trans, target, target_snapshot);
|
||||
ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -1566,11 +1563,34 @@ static int check_dirent_target(struct btree_trans *trans,
|
||||
target->bi_dir = d.k->p.inode;
|
||||
target->bi_dir_offset = d.k->p.offset;
|
||||
|
||||
ret = __write_inode(trans, target, target_snapshot);
|
||||
ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
out:
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
printbuf_exit(&buf);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_dirent_target(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c_dirent d,
|
||||
struct bch_inode_unpacked *target,
|
||||
u32 target_snapshot)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i_dirent *n;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
ret = check_inode_backpointer(trans, iter, d, target, target_snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (fsck_err_on(d.v->d_type != inode_d_type(target),
|
||||
c, dirent_d_type_wrong,
|
||||
@ -1614,15 +1634,65 @@ static int check_dirent_target(struct btree_trans *trans,
|
||||
|
||||
d = dirent_i_to_s_c(n);
|
||||
}
|
||||
out:
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
printbuf_exit(&buf);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_subvol_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_s_c_dirent d)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_inode_unpacked subvol_root;
|
||||
u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
|
||||
u32 target_snapshot;
|
||||
u64 target_inum;
|
||||
int ret = 0;
|
||||
|
||||
ret = subvol_lookup(trans, target_subvol,
|
||||
&target_snapshot, &target_inum);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
return ret;
|
||||
|
||||
if (fsck_err_on(ret, c, dirent_to_missing_subvol,
|
||||
"dirent points to missing subvolume %u",
|
||||
le32_to_cpu(d.v->d_child_subvol)))
|
||||
return __remove_dirent(trans, d.k->p);
|
||||
|
||||
ret = lookup_inode(trans, target_inum,
|
||||
&subvol_root, &target_snapshot);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
return ret;
|
||||
|
||||
if (fsck_err_on(ret, c, subvol_to_missing_root,
|
||||
"subvolume %u points to missing subvolume root %llu",
|
||||
target_subvol,
|
||||
target_inum)) {
|
||||
bch_err(c, "repair not implemented yet");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (fsck_err_on(subvol_root.bi_subvol != target_subvol,
|
||||
c, subvol_root_wrong_bi_subvol,
|
||||
"subvol root %llu has wrong bi_subvol field: got %u, should be %u",
|
||||
target_inum,
|
||||
subvol_root.bi_subvol, target_subvol)) {
|
||||
subvol_root.bi_subvol = target_subvol;
|
||||
ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = check_dirent_target(trans, iter, d, &subvol_root,
|
||||
target_snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
struct bch_hash_info *hash_info,
|
||||
@ -1707,50 +1777,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
d = bkey_s_c_to_dirent(k);
|
||||
|
||||
if (d.v->d_type == DT_SUBVOL) {
|
||||
struct bch_inode_unpacked subvol_root;
|
||||
u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
|
||||
u32 target_snapshot;
|
||||
u64 target_inum;
|
||||
|
||||
ret = subvol_lookup(trans, target_subvol,
|
||||
&target_snapshot, &target_inum);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
goto err;
|
||||
|
||||
if (fsck_err_on(ret, c, dirent_to_missing_subvol,
|
||||
"dirent points to missing subvolume %u",
|
||||
le32_to_cpu(d.v->d_child_subvol))) {
|
||||
ret = __remove_dirent(trans, d.k->p);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = lookup_inode(trans, target_inum,
|
||||
&subvol_root, &target_snapshot);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
goto err;
|
||||
|
||||
if (fsck_err_on(ret, c, subvol_to_missing_root,
|
||||
"subvolume %u points to missing subvolume root %llu",
|
||||
target_subvol,
|
||||
target_inum)) {
|
||||
bch_err(c, "repair not implemented yet");
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (fsck_err_on(subvol_root.bi_subvol != target_subvol,
|
||||
c, subvol_root_wrong_bi_subvol,
|
||||
"subvol root %llu has wrong bi_subvol field: got %u, should be %u",
|
||||
target_inum,
|
||||
subvol_root.bi_subvol, target_subvol)) {
|
||||
subvol_root.bi_subvol = target_subvol;
|
||||
ret = __write_inode(trans, &subvol_root, target_snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = check_dirent_target(trans, iter, d, &subvol_root,
|
||||
target_snapshot);
|
||||
ret = check_subvol_dirent(trans, iter, d);
|
||||
if (ret)
|
||||
goto err;
|
||||
} else {
|
||||
@ -1776,12 +1803,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (d.v->d_type == DT_DIR)
|
||||
for_each_visible_inode(c, s, dir, equiv.snapshot, i)
|
||||
i->count++;
|
||||
}
|
||||
|
||||
if (d.v->d_type == DT_DIR)
|
||||
for_each_visible_inode(c, s, dir, equiv.snapshot, i)
|
||||
i->count++;
|
||||
|
||||
out:
|
||||
err:
|
||||
fsck_err:
|
||||
@ -1919,7 +1945,7 @@ static int check_root_trans(struct btree_trans *trans)
|
||||
0, NULL);
|
||||
root_inode.bi_inum = inum;
|
||||
|
||||
ret = __write_inode(trans, &root_inode, snapshot);
|
||||
ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot);
|
||||
bch_err_msg(c, ret, "writing root inode");
|
||||
}
|
||||
err:
|
||||
@ -2291,7 +2317,7 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite
|
||||
u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
|
||||
bch2_inode_nlink_get(&u), link->count)) {
|
||||
bch2_inode_nlink_set(&u, link->count);
|
||||
ret = __write_inode(trans, &u, k.k->p.snapshot);
|
||||
ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot);
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
|
@ -324,7 +324,7 @@ int bch2_inode_unpack(struct bkey_s_c k,
|
||||
return bch2_inode_unpack_slowpath(k, unpacked);
|
||||
}
|
||||
|
||||
static int bch2_inode_peek_nowarn(struct btree_trans *trans,
|
||||
int bch2_inode_peek_nowarn(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bch_inode_unpacked *inode,
|
||||
subvol_inum inum, unsigned flags)
|
||||
@ -384,6 +384,34 @@ int bch2_inode_write_flags(struct btree_trans *trans,
|
||||
return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags);
|
||||
}
|
||||
|
||||
int __bch2_fsck_write_inode(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode,
|
||||
u32 snapshot)
|
||||
{
|
||||
struct bkey_inode_buf *inode_p =
|
||||
bch2_trans_kmalloc(trans, sizeof(*inode_p));
|
||||
|
||||
if (IS_ERR(inode_p))
|
||||
return PTR_ERR(inode_p);
|
||||
|
||||
bch2_inode_pack(inode_p, inode);
|
||||
inode_p->inode.k.p.snapshot = snapshot;
|
||||
|
||||
return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
|
||||
&inode_p->inode.k_i,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||
}
|
||||
|
||||
int bch2_fsck_write_inode(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode,
|
||||
u32 snapshot)
|
||||
{
|
||||
int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
__bch2_fsck_write_inode(trans, inode, snapshot));
|
||||
bch_err_fn(trans->c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k)
|
||||
{
|
||||
struct bch_inode_unpacked u;
|
||||
|
@ -95,6 +95,8 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *, struct bkey_i *);
|
||||
|
||||
void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);
|
||||
|
||||
int bch2_inode_peek_nowarn(struct btree_trans *, struct btree_iter *,
|
||||
struct bch_inode_unpacked *, subvol_inum, unsigned);
|
||||
int bch2_inode_peek(struct btree_trans *, struct btree_iter *,
|
||||
struct bch_inode_unpacked *, subvol_inum, unsigned);
|
||||
|
||||
@ -108,6 +110,9 @@ static inline int bch2_inode_write(struct btree_trans *trans,
|
||||
return bch2_inode_write_flags(trans, iter, inode, 0);
|
||||
}
|
||||
|
||||
int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32);
|
||||
int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32);
|
||||
|
||||
void bch2_inode_init_early(struct bch_fs *,
|
||||
struct bch_inode_unpacked *);
|
||||
void bch2_inode_init_late(struct bch_inode_unpacked *, u64,
|
||||
|
@ -134,7 +134,7 @@ static void promote_done(struct bch_write_op *wop)
|
||||
container_of(wop, struct promote_op, write.op);
|
||||
struct bch_fs *c = op->write.op.c;
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
|
||||
time_stats_update(&c->times[BCH_TIME_data_promote],
|
||||
op->start_time);
|
||||
promote_free(c, op);
|
||||
}
|
||||
@ -356,7 +356,7 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
|
||||
static void bch2_rbio_done(struct bch_read_bio *rbio)
|
||||
{
|
||||
if (rbio->start_time)
|
||||
bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
|
||||
time_stats_update(&rbio->c->times[BCH_TIME_data_read],
|
||||
rbio->start_time);
|
||||
bio_endio(&rbio->bio);
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
|
||||
|
||||
bch2_congested_acct(ca, io_latency, now, rw);
|
||||
|
||||
__bch2_time_stats_update(&ca->io_latency[rw], submit_time, now);
|
||||
__time_stats_update(&ca->io_latency[rw].stats, submit_time, now);
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -457,7 +457,7 @@ static void bch2_write_done(struct closure *cl)
|
||||
|
||||
EBUG_ON(op->open_buckets.nr);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
|
||||
time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
|
||||
bch2_disk_reservation_put(c, &op->res);
|
||||
|
||||
if (!(op->flags & BCH_WRITE_MOVE))
|
||||
|
@ -27,6 +27,26 @@ static const char * const bch2_journal_errors[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
|
||||
{
|
||||
return seq > j->seq_ondisk;
|
||||
}
|
||||
|
||||
static bool __journal_entry_is_open(union journal_res_state state)
|
||||
{
|
||||
return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
|
||||
}
|
||||
|
||||
static inline unsigned nr_unwritten_journal_entries(struct journal *j)
|
||||
{
|
||||
return atomic64_read(&j->seq) - j->seq_ondisk;
|
||||
}
|
||||
|
||||
static bool journal_entry_is_open(struct journal *j)
|
||||
{
|
||||
return __journal_entry_is_open(j->reservations);
|
||||
}
|
||||
|
||||
static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u64 seq)
|
||||
{
|
||||
union journal_res_state s = READ_ONCE(j->reservations);
|
||||
@ -54,6 +74,13 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
|
||||
prt_printf(out, "%li jiffies", buf->expires - jiffies);
|
||||
prt_newline(out);
|
||||
|
||||
if (buf->write_done)
|
||||
prt_printf(out, "write done\n");
|
||||
else if (buf->write_allocated)
|
||||
prt_printf(out, "write allocated\n");
|
||||
else if (buf->write_started)
|
||||
prt_printf(out, "write started\n");
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
}
|
||||
|
||||
@ -66,26 +93,7 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
|
||||
seq <= journal_cur_seq(j);
|
||||
seq++)
|
||||
bch2_journal_buf_to_text(out, j, seq);
|
||||
}
|
||||
|
||||
static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
|
||||
{
|
||||
return seq > j->seq_ondisk;
|
||||
}
|
||||
|
||||
static bool __journal_entry_is_open(union journal_res_state state)
|
||||
{
|
||||
return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
|
||||
}
|
||||
|
||||
static inline unsigned nr_unwritten_journal_entries(struct journal *j)
|
||||
{
|
||||
return atomic64_read(&j->seq) - j->seq_ondisk;
|
||||
}
|
||||
|
||||
static bool journal_entry_is_open(struct journal *j)
|
||||
{
|
||||
return __journal_entry_is_open(j->reservations);
|
||||
prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed");
|
||||
}
|
||||
|
||||
static inline struct journal_buf *
|
||||
@ -174,21 +182,40 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
|
||||
return stuck;
|
||||
}
|
||||
|
||||
void bch2_journal_do_writes(struct journal *j)
|
||||
{
|
||||
for (u64 seq = journal_last_unwritten_seq(j);
|
||||
seq <= journal_cur_seq(j);
|
||||
seq++) {
|
||||
unsigned idx = seq & JOURNAL_BUF_MASK;
|
||||
struct journal_buf *w = j->buf + idx;
|
||||
|
||||
if (w->write_started && !w->write_allocated)
|
||||
break;
|
||||
if (w->write_started)
|
||||
continue;
|
||||
|
||||
if (!journal_state_count(j->reservations, idx)) {
|
||||
w->write_started = true;
|
||||
closure_call(&w->io, bch2_journal_write, j->wq, NULL);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Final processing when the last reference of a journal buffer has been
|
||||
* dropped. Drop the pin list reference acquired at journal entry open and write
|
||||
* the buffer, if requested.
|
||||
*/
|
||||
void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write)
|
||||
void bch2_journal_buf_put_final(struct journal *j, u64 seq)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
|
||||
lockdep_assert_held(&j->lock);
|
||||
|
||||
if (__bch2_journal_pin_put(j, seq))
|
||||
bch2_journal_reclaim_fast(j);
|
||||
if (write)
|
||||
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
|
||||
bch2_journal_do_writes(j);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -380,11 +407,14 @@ static int journal_entry_open(struct journal *j)
|
||||
BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
|
||||
|
||||
bkey_extent_init(&buf->key);
|
||||
buf->noflush = false;
|
||||
buf->must_flush = false;
|
||||
buf->separate_flush = false;
|
||||
buf->flush_time = 0;
|
||||
buf->noflush = false;
|
||||
buf->must_flush = false;
|
||||
buf->separate_flush = false;
|
||||
buf->flush_time = 0;
|
||||
buf->need_flush_to_write_buffer = true;
|
||||
buf->write_started = false;
|
||||
buf->write_allocated = false;
|
||||
buf->write_done = false;
|
||||
|
||||
memset(buf->data, 0, sizeof(*buf->data));
|
||||
buf->data->seq = cpu_to_le64(journal_cur_seq(j));
|
||||
@ -418,9 +448,10 @@ static int journal_entry_open(struct journal *j)
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
mod_delayed_work(c->io_complete_wq,
|
||||
&j->write_work,
|
||||
msecs_to_jiffies(c->opts.journal_flush_delay));
|
||||
if (nr_unwritten_journal_entries(j) == 1)
|
||||
mod_delayed_work(j->wq,
|
||||
&j->write_work,
|
||||
msecs_to_jiffies(c->opts.journal_flush_delay));
|
||||
journal_wake(j);
|
||||
|
||||
if (j->early_journal_entries.nr)
|
||||
@ -445,20 +476,16 @@ static void journal_quiesce(struct journal *j)
|
||||
static void journal_write_work(struct work_struct *work)
|
||||
{
|
||||
struct journal *j = container_of(work, struct journal, write_work.work);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
long delta;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
if (!__journal_entry_is_open(j->reservations))
|
||||
goto unlock;
|
||||
if (__journal_entry_is_open(j->reservations)) {
|
||||
long delta = journal_cur_buf(j)->expires - jiffies;
|
||||
|
||||
delta = journal_cur_buf(j)->expires - jiffies;
|
||||
|
||||
if (delta > 0)
|
||||
mod_delayed_work(c->io_complete_wq, &j->write_work, delta);
|
||||
else
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
|
||||
unlock:
|
||||
if (delta > 0)
|
||||
mod_delayed_work(j->wq, &j->write_work, delta);
|
||||
else
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
@ -473,33 +500,32 @@ retry:
|
||||
if (journal_res_get_fast(j, res, flags))
|
||||
return 0;
|
||||
|
||||
if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
|
||||
ret = JOURNAL_ERR_journal_full;
|
||||
can_discard = j->can_discard;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (j->blocked)
|
||||
return -BCH_ERR_journal_res_get_blocked;
|
||||
|
||||
if (bch2_journal_error(j))
|
||||
return -BCH_ERR_erofs_journal_err;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
/* check once more in case somebody else shut things down... */
|
||||
if (bch2_journal_error(j)) {
|
||||
spin_unlock(&j->lock);
|
||||
return -BCH_ERR_erofs_journal_err;
|
||||
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) && !journal_entry_is_open(j)) {
|
||||
ret = JOURNAL_ERR_max_in_flight;
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
/*
|
||||
* Recheck after taking the lock, so we don't race with another thread
|
||||
* that just did journal_entry_open() and call bch2_journal_entry_close()
|
||||
* unnecessarily
|
||||
*/
|
||||
if (journal_res_get_fast(j, res, flags)) {
|
||||
spin_unlock(&j->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
|
||||
/*
|
||||
* Don't want to close current journal entry, just need to
|
||||
* invoke reclaim:
|
||||
*/
|
||||
ret = JOURNAL_ERR_journal_full;
|
||||
ret = 0;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
@ -515,30 +541,30 @@ retry:
|
||||
j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
|
||||
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, false);
|
||||
ret = journal_entry_open(j);
|
||||
|
||||
if (ret == JOURNAL_ERR_max_in_flight) {
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
|
||||
&j->max_in_flight_start, true);
|
||||
if (trace_journal_entry_full_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
buf.atomic++;
|
||||
|
||||
bch2_journal_bufs_to_text(&buf, j);
|
||||
trace_journal_entry_full(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
count_event(c, journal_entry_full);
|
||||
}
|
||||
ret = journal_entry_open(j) ?: JOURNAL_ERR_retry;
|
||||
unlock:
|
||||
can_discard = j->can_discard;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (!ret)
|
||||
out:
|
||||
if (ret == JOURNAL_ERR_retry)
|
||||
goto retry;
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
if (journal_error_check_stuck(j, ret, flags))
|
||||
ret = -BCH_ERR_journal_res_get_blocked;
|
||||
|
||||
if (ret == JOURNAL_ERR_max_in_flight &&
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true)) {
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
|
||||
bch2_journal_bufs_to_text(&buf, j);
|
||||
trace_journal_entry_full(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
count_event(c, journal_entry_full);
|
||||
}
|
||||
|
||||
/*
|
||||
* Journal is full - can't rely on reclaim from work item due to
|
||||
* freezing:
|
||||
@ -727,7 +753,7 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq)
|
||||
ret = wait_event_interruptible(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL)));
|
||||
|
||||
if (!ret)
|
||||
bch2_time_stats_update(j->flush_seq_time, start_time);
|
||||
time_stats_update(j->flush_seq_time, start_time);
|
||||
|
||||
return ret ?: ret2 < 0 ? ret2 : 0;
|
||||
}
|
||||
@ -1157,7 +1183,6 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
|
||||
struct journal_replay *i, **_i;
|
||||
struct genradix_iter iter;
|
||||
bool had_entries = false;
|
||||
unsigned ptr;
|
||||
u64 last_seq = cur_seq, nr, seq;
|
||||
|
||||
genradix_for_each_reverse(&c->journal_entries, iter, _i) {
|
||||
@ -1211,8 +1236,8 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
|
||||
p = journal_seq_pin(j, seq);
|
||||
|
||||
p->devs.nr = 0;
|
||||
for (ptr = 0; ptr < i->nr_ptrs; ptr++)
|
||||
bch2_dev_list_add_dev(&p->devs, i->ptrs[ptr].dev);
|
||||
darray_for_each(i->ptrs, ptr)
|
||||
bch2_dev_list_add_dev(&p->devs, ptr->dev);
|
||||
|
||||
had_entries = true;
|
||||
}
|
||||
@ -1240,13 +1265,17 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
|
||||
|
||||
void bch2_dev_journal_exit(struct bch_dev *ca)
|
||||
{
|
||||
kfree(ca->journal.bio);
|
||||
kfree(ca->journal.buckets);
|
||||
kfree(ca->journal.bucket_seq);
|
||||
struct journal_device *ja = &ca->journal;
|
||||
|
||||
ca->journal.bio = NULL;
|
||||
ca->journal.buckets = NULL;
|
||||
ca->journal.bucket_seq = NULL;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) {
|
||||
kfree(ja->bio[i]);
|
||||
ja->bio[i] = NULL;
|
||||
}
|
||||
|
||||
kfree(ja->buckets);
|
||||
kfree(ja->bucket_seq);
|
||||
ja->buckets = NULL;
|
||||
ja->bucket_seq = NULL;
|
||||
}
|
||||
|
||||
int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
||||
@ -1256,14 +1285,13 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
||||
bch2_sb_field_get(sb, journal);
|
||||
struct bch_sb_field_journal_v2 *journal_buckets_v2 =
|
||||
bch2_sb_field_get(sb, journal_v2);
|
||||
unsigned i, nr_bvecs;
|
||||
|
||||
ja->nr = 0;
|
||||
|
||||
if (journal_buckets_v2) {
|
||||
unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
for (unsigned i = 0; i < nr; i++)
|
||||
ja->nr += le64_to_cpu(journal_buckets_v2->d[i].nr);
|
||||
} else if (journal_buckets) {
|
||||
ja->nr = bch2_nr_journal_buckets(journal_buckets);
|
||||
@ -1273,13 +1301,18 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
||||
if (!ja->bucket_seq)
|
||||
return -BCH_ERR_ENOMEM_dev_journal_init;
|
||||
|
||||
nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE);
|
||||
unsigned nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE);
|
||||
|
||||
ca->journal.bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
|
||||
if (!ca->journal.bio)
|
||||
return -BCH_ERR_ENOMEM_dev_journal_init;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) {
|
||||
ja->bio[i] = kmalloc(struct_size(ja->bio[i], bio.bi_inline_vecs,
|
||||
nr_bvecs), GFP_KERNEL);
|
||||
if (!ja->bio[i])
|
||||
return -BCH_ERR_ENOMEM_dev_journal_init;
|
||||
|
||||
bio_init(ca->journal.bio, NULL, ca->journal.bio->bi_inline_vecs, nr_bvecs, 0);
|
||||
ja->bio[i]->ca = ca;
|
||||
ja->bio[i]->buf_idx = i;
|
||||
bio_init(&ja->bio[i]->bio, NULL, ja->bio[i]->bio.bi_inline_vecs, nr_bvecs, 0);
|
||||
}
|
||||
|
||||
ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
|
||||
if (!ja->buckets)
|
||||
@ -1287,14 +1320,14 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
||||
|
||||
if (journal_buckets_v2) {
|
||||
unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
|
||||
unsigned j, dst = 0;
|
||||
unsigned dst = 0;
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
for (j = 0; j < le64_to_cpu(journal_buckets_v2->d[i].nr); j++)
|
||||
for (unsigned i = 0; i < nr; i++)
|
||||
for (unsigned j = 0; j < le64_to_cpu(journal_buckets_v2->d[i].nr); j++)
|
||||
ja->buckets[dst++] =
|
||||
le64_to_cpu(journal_buckets_v2->d[i].start) + j;
|
||||
} else if (journal_buckets) {
|
||||
for (i = 0; i < ja->nr; i++)
|
||||
for (unsigned i = 0; i < ja->nr; i++)
|
||||
ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
|
||||
}
|
||||
|
||||
@ -1303,19 +1336,19 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
||||
|
||||
void bch2_fs_journal_exit(struct journal *j)
|
||||
{
|
||||
unsigned i;
|
||||
if (j->wq)
|
||||
destroy_workqueue(j->wq);
|
||||
|
||||
darray_exit(&j->early_journal_entries);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(j->buf); i++)
|
||||
kvpfree(j->buf[i].data, j->buf[i].buf_size);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
|
||||
kvfree(j->buf[i].data);
|
||||
free_fifo(&j->pin);
|
||||
}
|
||||
|
||||
int bch2_fs_journal_init(struct journal *j)
|
||||
{
|
||||
static struct lock_class_key res_key;
|
||||
unsigned i;
|
||||
|
||||
mutex_init(&j->buf_lock);
|
||||
spin_lock_init(&j->lock);
|
||||
@ -1336,14 +1369,20 @@ int bch2_fs_journal_init(struct journal *j)
|
||||
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)))
|
||||
return -BCH_ERR_ENOMEM_journal_pin_fifo;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(j->buf); i++) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) {
|
||||
j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN;
|
||||
j->buf[i].data = kvpmalloc(j->buf[i].buf_size, GFP_KERNEL);
|
||||
j->buf[i].data = kvmalloc(j->buf[i].buf_size, GFP_KERNEL);
|
||||
if (!j->buf[i].data)
|
||||
return -BCH_ERR_ENOMEM_journal_buf;
|
||||
j->buf[i].idx = i;
|
||||
}
|
||||
|
||||
j->pin.front = j->pin.back = 1;
|
||||
|
||||
j->wq = alloc_workqueue("bcachefs_journal",
|
||||
WQ_HIGHPRI|WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512);
|
||||
if (!j->wq)
|
||||
return -BCH_ERR_ENOMEM_fs_other_alloc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1455,7 +1494,6 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
struct journal_entry_pin *pin;
|
||||
unsigned i;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
*seq = max(*seq, j->pin.front);
|
||||
@ -1473,7 +1511,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(pin_list->list); i++)
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(pin_list->list); i++)
|
||||
list_for_each_entry(pin, &pin_list->list[i], list) {
|
||||
prt_printf(out, "\t%px %ps", pin, pin->flush);
|
||||
prt_newline(out);
|
||||
|
@ -264,7 +264,8 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u
|
||||
}
|
||||
|
||||
bool bch2_journal_entry_close(struct journal *);
|
||||
void bch2_journal_buf_put_final(struct journal *, u64, bool);
|
||||
void bch2_journal_do_writes(struct journal *);
|
||||
void bch2_journal_buf_put_final(struct journal *, u64);
|
||||
|
||||
static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
|
||||
{
|
||||
@ -272,7 +273,7 @@ static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 s
|
||||
|
||||
s = journal_state_buf_put(j, idx);
|
||||
if (!journal_state_count(s, idx))
|
||||
bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx);
|
||||
bch2_journal_buf_put_final(j, seq);
|
||||
}
|
||||
|
||||
static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
|
||||
@ -282,7 +283,7 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq
|
||||
s = journal_state_buf_put(j, idx);
|
||||
if (!journal_state_count(s, idx)) {
|
||||
spin_lock(&j->lock);
|
||||
bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx);
|
||||
bch2_journal_buf_put_final(j, seq);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
}
|
||||
|
@ -17,6 +17,38 @@
|
||||
#include "sb-clean.h"
|
||||
#include "trace.h"
|
||||
|
||||
void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct journal_replay *j)
|
||||
{
|
||||
darray_for_each(j->ptrs, i) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, i->dev);
|
||||
u64 offset;
|
||||
|
||||
div64_u64_rem(i->sector, ca->mi.bucket_size, &offset);
|
||||
|
||||
if (i != j->ptrs.data)
|
||||
prt_printf(out, " ");
|
||||
prt_printf(out, "%u:%u:%u (sector %llu)",
|
||||
i->dev, i->bucket, i->bucket_offset, i->sector);
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_journal_replay_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct journal_replay *j)
|
||||
{
|
||||
prt_printf(out, "seq %llu ", le64_to_cpu(j->j.seq));
|
||||
|
||||
bch2_journal_ptrs_to_text(out, c, j);
|
||||
|
||||
struct jset_entry *entry;
|
||||
for_each_jset_entry_type(entry, &j->j, BCH_JSET_ENTRY_datetime) {
|
||||
struct jset_entry_datetime *datetime =
|
||||
container_of(entry, struct jset_entry_datetime, entry);
|
||||
bch2_prt_datetime(out, le64_to_cpu(datetime->seconds));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static struct nonce journal_nonce(const struct jset *jset)
|
||||
{
|
||||
return (struct nonce) {{
|
||||
@ -52,8 +84,7 @@ static void __journal_replay_free(struct bch_fs *c,
|
||||
|
||||
BUG_ON(*p != i);
|
||||
*p = NULL;
|
||||
kvpfree(i, offsetof(struct journal_replay, j) +
|
||||
vstruct_bytes(&i->j));
|
||||
kvfree(i);
|
||||
}
|
||||
|
||||
static void journal_replay_free(struct bch_fs *c, struct journal_replay *i)
|
||||
@ -84,9 +115,9 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
|
||||
{
|
||||
struct genradix_iter iter;
|
||||
struct journal_replay **_i, *i, *dup;
|
||||
struct journal_ptr *ptr;
|
||||
size_t bytes = vstruct_bytes(j);
|
||||
u64 last_seq = !JSET_NO_FLUSH(j) ? le64_to_cpu(j->last_seq) : 0;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = JOURNAL_ENTRY_ADD_OK;
|
||||
|
||||
/* Is this entry older than the range we need? */
|
||||
@ -131,72 +162,61 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
|
||||
*/
|
||||
dup = *_i;
|
||||
if (dup) {
|
||||
if (bytes == vstruct_bytes(&dup->j) &&
|
||||
!memcmp(j, &dup->j, bytes)) {
|
||||
i = dup;
|
||||
goto found;
|
||||
}
|
||||
bool identical = bytes == vstruct_bytes(&dup->j) &&
|
||||
!memcmp(j, &dup->j, bytes);
|
||||
bool not_identical = !identical &&
|
||||
entry_ptr.csum_good &&
|
||||
dup->csum_good;
|
||||
|
||||
if (!entry_ptr.csum_good) {
|
||||
i = dup;
|
||||
goto found;
|
||||
}
|
||||
bool same_device = false;
|
||||
darray_for_each(dup->ptrs, ptr)
|
||||
if (ptr->dev == ca->dev_idx)
|
||||
same_device = true;
|
||||
|
||||
if (!dup->csum_good)
|
||||
ret = darray_push(&dup->ptrs, entry_ptr);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
bch2_journal_replay_to_text(&buf, c, dup);
|
||||
|
||||
fsck_err_on(same_device,
|
||||
c, journal_entry_dup_same_device,
|
||||
"duplicate journal entry on same device\n %s",
|
||||
buf.buf);
|
||||
|
||||
fsck_err_on(not_identical,
|
||||
c, journal_entry_replicas_data_mismatch,
|
||||
"found duplicate but non identical journal entries\n %s",
|
||||
buf.buf);
|
||||
|
||||
if (entry_ptr.csum_good && !identical)
|
||||
goto replace;
|
||||
|
||||
fsck_err(c, journal_entry_replicas_data_mismatch,
|
||||
"found duplicate but non identical journal entries (seq %llu)",
|
||||
le64_to_cpu(j->seq));
|
||||
i = dup;
|
||||
goto found;
|
||||
goto out;
|
||||
}
|
||||
replace:
|
||||
i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
|
||||
i = kvmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
|
||||
if (!i)
|
||||
return -BCH_ERR_ENOMEM_journal_entry_add;
|
||||
|
||||
i->nr_ptrs = 0;
|
||||
darray_init(&i->ptrs);
|
||||
i->csum_good = entry_ptr.csum_good;
|
||||
i->ignore = false;
|
||||
unsafe_memcpy(&i->j, j, bytes, "embedded variable length struct");
|
||||
i->ptrs[i->nr_ptrs++] = entry_ptr;
|
||||
|
||||
if (dup) {
|
||||
if (dup->nr_ptrs >= ARRAY_SIZE(dup->ptrs)) {
|
||||
bch_err(c, "found too many copies of journal entry %llu",
|
||||
le64_to_cpu(i->j.seq));
|
||||
dup->nr_ptrs = ARRAY_SIZE(dup->ptrs) - 1;
|
||||
}
|
||||
|
||||
/* The first ptr should represent the jset we kept: */
|
||||
memcpy(i->ptrs + i->nr_ptrs,
|
||||
dup->ptrs,
|
||||
sizeof(dup->ptrs[0]) * dup->nr_ptrs);
|
||||
i->nr_ptrs += dup->nr_ptrs;
|
||||
darray_for_each(dup->ptrs, ptr)
|
||||
darray_push(&i->ptrs, *ptr);
|
||||
__journal_replay_free(c, dup);
|
||||
} else {
|
||||
darray_push(&i->ptrs, entry_ptr);
|
||||
}
|
||||
|
||||
*_i = i;
|
||||
return 0;
|
||||
found:
|
||||
for (ptr = i->ptrs; ptr < i->ptrs + i->nr_ptrs; ptr++) {
|
||||
if (ptr->dev == ca->dev_idx) {
|
||||
bch_err(c, "duplicate journal entry %llu on same device",
|
||||
le64_to_cpu(i->j.seq));
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (i->nr_ptrs >= ARRAY_SIZE(i->ptrs)) {
|
||||
bch_err(c, "found too many copies of journal entry %llu",
|
||||
le64_to_cpu(i->j.seq));
|
||||
goto out;
|
||||
}
|
||||
|
||||
i->ptrs[i->nr_ptrs++] = entry_ptr;
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -741,6 +761,37 @@ static void journal_entry_write_buffer_keys_to_text(struct printbuf *out, struct
|
||||
journal_entry_btree_keys_to_text(out, c, entry);
|
||||
}
|
||||
|
||||
static int journal_entry_datetime_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bkey_invalid_flags flags)
|
||||
{
|
||||
unsigned bytes = vstruct_bytes(entry);
|
||||
unsigned expected = 16;
|
||||
int ret = 0;
|
||||
|
||||
if (journal_entry_err_on(vstruct_bytes(entry) < expected,
|
||||
c, version, jset, entry,
|
||||
journal_entry_dev_usage_bad_size,
|
||||
"bad size (%u < %u)",
|
||||
bytes, expected)) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return ret;
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void journal_entry_datetime_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct jset_entry *entry)
|
||||
{
|
||||
struct jset_entry_datetime *datetime =
|
||||
container_of(entry, struct jset_entry_datetime, entry);
|
||||
|
||||
bch2_prt_datetime(out, le64_to_cpu(datetime->seconds));
|
||||
}
|
||||
|
||||
struct jset_entry_ops {
|
||||
int (*validate)(struct bch_fs *, struct jset *,
|
||||
struct jset_entry *, unsigned, int,
|
||||
@ -913,11 +964,11 @@ static int journal_read_buf_realloc(struct journal_read_buf *b,
|
||||
return -BCH_ERR_ENOMEM_journal_read_buf_realloc;
|
||||
|
||||
new_size = roundup_pow_of_two(new_size);
|
||||
n = kvpmalloc(new_size, GFP_KERNEL);
|
||||
n = kvmalloc(new_size, GFP_KERNEL);
|
||||
if (!n)
|
||||
return -BCH_ERR_ENOMEM_journal_read_buf_realloc;
|
||||
|
||||
kvpfree(b->data, b->size);
|
||||
kvfree(b->data);
|
||||
b->data = n;
|
||||
b->size = new_size;
|
||||
return 0;
|
||||
@ -1102,16 +1153,15 @@ static CLOSURE_CALLBACK(bch2_journal_read_device)
|
||||
if (!r)
|
||||
continue;
|
||||
|
||||
for (i = 0; i < r->nr_ptrs; i++) {
|
||||
if (r->ptrs[i].dev == ca->dev_idx) {
|
||||
unsigned wrote = bucket_remainder(ca, r->ptrs[i].sector) +
|
||||
darray_for_each(r->ptrs, i)
|
||||
if (i->dev == ca->dev_idx) {
|
||||
unsigned wrote = bucket_remainder(ca, i->sector) +
|
||||
vstruct_sectors(&r->j, c->block_bits);
|
||||
|
||||
ja->cur_idx = r->ptrs[i].bucket;
|
||||
ja->cur_idx = i->bucket;
|
||||
ja->sectors_free = ca->mi.bucket_size - wrote;
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
}
|
||||
found:
|
||||
mutex_unlock(&jlist->lock);
|
||||
@ -1144,7 +1194,7 @@ found:
|
||||
ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
|
||||
out:
|
||||
bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret);
|
||||
kvpfree(buf.data, buf.size);
|
||||
kvfree(buf.data);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
closure_return(cl);
|
||||
return;
|
||||
@ -1155,27 +1205,6 @@ err:
|
||||
goto out;
|
||||
}
|
||||
|
||||
void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct journal_replay *j)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < j->nr_ptrs; i++) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, j->ptrs[i].dev);
|
||||
u64 offset;
|
||||
|
||||
div64_u64_rem(j->ptrs[i].sector, ca->mi.bucket_size, &offset);
|
||||
|
||||
if (i)
|
||||
prt_printf(out, " ");
|
||||
prt_printf(out, "%u:%u:%u (sector %llu)",
|
||||
j->ptrs[i].dev,
|
||||
j->ptrs[i].bucket,
|
||||
j->ptrs[i].bucket_offset,
|
||||
j->ptrs[i].sector);
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_journal_read(struct bch_fs *c,
|
||||
u64 *last_seq,
|
||||
u64 *blacklist_seq,
|
||||
@ -1353,32 +1382,31 @@ int bch2_journal_read(struct bch_fs *c,
|
||||
.e.data_type = BCH_DATA_journal,
|
||||
.e.nr_required = 1,
|
||||
};
|
||||
unsigned ptr;
|
||||
|
||||
i = *_i;
|
||||
if (!i || i->ignore)
|
||||
continue;
|
||||
|
||||
for (ptr = 0; ptr < i->nr_ptrs; ptr++) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev);
|
||||
darray_for_each(i->ptrs, ptr) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
|
||||
if (!i->ptrs[ptr].csum_good)
|
||||
bch_err_dev_offset(ca, i->ptrs[ptr].sector,
|
||||
if (!ptr->csum_good)
|
||||
bch_err_dev_offset(ca, ptr->sector,
|
||||
"invalid journal checksum, seq %llu%s",
|
||||
le64_to_cpu(i->j.seq),
|
||||
i->csum_good ? " (had good copy on another device)" : "");
|
||||
}
|
||||
|
||||
ret = jset_validate(c,
|
||||
bch_dev_bkey_exists(c, i->ptrs[0].dev),
|
||||
bch_dev_bkey_exists(c, i->ptrs.data[0].dev),
|
||||
&i->j,
|
||||
i->ptrs[0].sector,
|
||||
i->ptrs.data[0].sector,
|
||||
READ);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for (ptr = 0; ptr < i->nr_ptrs; ptr++)
|
||||
replicas.e.devs[replicas.e.nr_devs++] = i->ptrs[ptr].dev;
|
||||
darray_for_each(i->ptrs, ptr)
|
||||
replicas.e.devs[replicas.e.nr_devs++] = ptr->dev;
|
||||
|
||||
bch2_replicas_entry_sort(&replicas.e);
|
||||
|
||||
@ -1545,7 +1573,7 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
||||
if (bch2_btree_write_buffer_resize(c, btree_write_buffer_size))
|
||||
return;
|
||||
|
||||
new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN);
|
||||
new_buf = kvmalloc(new_size, GFP_NOFS|__GFP_NOWARN);
|
||||
if (!new_buf)
|
||||
return;
|
||||
|
||||
@ -1556,7 +1584,7 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
||||
swap(buf->buf_size, new_size);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
kvpfree(new_buf, new_size);
|
||||
kvfree(new_buf);
|
||||
}
|
||||
|
||||
static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
|
||||
@ -1566,17 +1594,17 @@ static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
|
||||
|
||||
static CLOSURE_CALLBACK(journal_write_done)
|
||||
{
|
||||
closure_type(j, struct journal, io);
|
||||
closure_type(w, struct journal_buf, io);
|
||||
struct journal *j = container_of(w, struct journal, buf[w->idx]);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||
struct bch_replicas_padded replicas;
|
||||
union journal_res_state old, new;
|
||||
u64 v, seq;
|
||||
u64 v, seq = le64_to_cpu(w->data->seq);
|
||||
int err = 0;
|
||||
|
||||
bch2_time_stats_update(!JSET_NO_FLUSH(w->data)
|
||||
? j->flush_write_time
|
||||
: j->noflush_write_time, j->write_start_time);
|
||||
time_stats_update(!JSET_NO_FLUSH(w->data)
|
||||
? j->flush_write_time
|
||||
: j->noflush_write_time, j->write_start_time);
|
||||
|
||||
if (!w->devs_written.nr) {
|
||||
bch_err(c, "unable to write journal to sufficient devices");
|
||||
@ -1591,63 +1619,68 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
if (err)
|
||||
bch2_fatal_error(c);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
seq = le64_to_cpu(w->data->seq);
|
||||
closure_debug_destroy(cl);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
if (seq >= j->pin.front)
|
||||
journal_seq_pin(j, seq)->devs = w->devs_written;
|
||||
if (err && (!j->err_seq || seq < j->err_seq))
|
||||
j->err_seq = seq;
|
||||
w->write_done = true;
|
||||
|
||||
if (!err) {
|
||||
if (!JSET_NO_FLUSH(w->data)) {
|
||||
bool completed = false;
|
||||
|
||||
for (seq = journal_last_unwritten_seq(j);
|
||||
seq <= journal_cur_seq(j);
|
||||
seq++) {
|
||||
w = j->buf + (seq & JOURNAL_BUF_MASK);
|
||||
if (!w->write_done)
|
||||
break;
|
||||
|
||||
if (!j->err_seq && !JSET_NO_FLUSH(w->data)) {
|
||||
j->flushed_seq_ondisk = seq;
|
||||
j->last_seq_ondisk = w->last_seq;
|
||||
|
||||
bch2_do_discards(c);
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
|
||||
bch2_reset_alloc_cursors(c);
|
||||
}
|
||||
} else if (!j->err_seq || seq < j->err_seq)
|
||||
j->err_seq = seq;
|
||||
|
||||
j->seq_ondisk = seq;
|
||||
j->seq_ondisk = seq;
|
||||
|
||||
/*
|
||||
* Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
|
||||
* more buckets:
|
||||
*
|
||||
* Must come before signaling write completion, for
|
||||
* bch2_fs_journal_stop():
|
||||
*/
|
||||
if (j->watermark != BCH_WATERMARK_stripe)
|
||||
journal_reclaim_kick(&c->journal);
|
||||
/*
|
||||
* Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
|
||||
* more buckets:
|
||||
*
|
||||
* Must come before signaling write completion, for
|
||||
* bch2_fs_journal_stop():
|
||||
*/
|
||||
if (j->watermark != BCH_WATERMARK_stripe)
|
||||
journal_reclaim_kick(&c->journal);
|
||||
|
||||
/* also must come before signalling write completion: */
|
||||
closure_debug_destroy(cl);
|
||||
v = atomic64_read(&j->reservations.counter);
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
BUG_ON(journal_state_count(new, new.unwritten_idx));
|
||||
BUG_ON(new.unwritten_idx != (seq & JOURNAL_BUF_MASK));
|
||||
|
||||
v = atomic64_read(&j->reservations.counter);
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
BUG_ON(journal_state_count(new, new.unwritten_idx));
|
||||
new.unwritten_idx++;
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter, old.v, new.v)) != old.v);
|
||||
|
||||
new.unwritten_idx++;
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
completed = true;
|
||||
}
|
||||
|
||||
bch2_journal_reclaim_fast(j);
|
||||
bch2_journal_space_available(j);
|
||||
if (completed) {
|
||||
bch2_journal_reclaim_fast(j);
|
||||
bch2_journal_space_available(j);
|
||||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
|
||||
&j->max_in_flight_start, false);
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], false);
|
||||
|
||||
closure_wake_up(&w->wait);
|
||||
journal_wake(j);
|
||||
closure_wake_up(&w->wait);
|
||||
journal_wake(j);
|
||||
}
|
||||
|
||||
if (!journal_state_count(new, new.unwritten_idx) &&
|
||||
journal_last_unwritten_seq(j) <= journal_cur_seq(j)) {
|
||||
spin_unlock(&j->lock);
|
||||
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
|
||||
} else if (journal_last_unwritten_seq(j) == journal_cur_seq(j) &&
|
||||
if (journal_last_unwritten_seq(j) == journal_cur_seq(j) &&
|
||||
new.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) {
|
||||
struct journal_buf *buf = journal_cur_buf(j);
|
||||
long delta = buf->expires - jiffies;
|
||||
@ -1657,46 +1690,46 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
* previous entries still in flight - the current journal entry
|
||||
* might want to be written now:
|
||||
*/
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
mod_delayed_work(c->io_complete_wq, &j->write_work, max(0L, delta));
|
||||
} else {
|
||||
spin_unlock(&j->lock);
|
||||
mod_delayed_work(j->wq, &j->write_work, max(0L, delta));
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
static void journal_write_endio(struct bio *bio)
|
||||
{
|
||||
struct bch_dev *ca = bio->bi_private;
|
||||
struct journal_bio *jbio = container_of(bio, struct journal_bio, bio);
|
||||
struct bch_dev *ca = jbio->ca;
|
||||
struct journal *j = &ca->fs->journal;
|
||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||
unsigned long flags;
|
||||
struct journal_buf *w = j->buf + jbio->buf_idx;
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
|
||||
"error writing journal entry %llu: %s",
|
||||
le64_to_cpu(w->data->seq),
|
||||
bch2_blk_status_to_str(bio->bi_status)) ||
|
||||
bch2_meta_write_fault("journal")) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&j->err_lock, flags);
|
||||
bch2_dev_list_drop_dev(&w->devs_written, ca->dev_idx);
|
||||
spin_unlock_irqrestore(&j->err_lock, flags);
|
||||
}
|
||||
|
||||
closure_put(&j->io);
|
||||
closure_put(&w->io);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
}
|
||||
|
||||
static CLOSURE_CALLBACK(do_journal_write)
|
||||
{
|
||||
closure_type(j, struct journal, io);
|
||||
closure_type(w, struct journal_buf, io);
|
||||
struct journal *j = container_of(w, struct journal, buf[w->idx]);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||
struct bio *bio;
|
||||
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
|
||||
|
||||
extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
|
||||
ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct journal_device *ja = &ca->journal;
|
||||
|
||||
if (!percpu_ref_tryget(&ca->io_ref)) {
|
||||
/* XXX: fix this */
|
||||
bch_err(c, "missing device for journal write\n");
|
||||
@ -1706,7 +1739,7 @@ static CLOSURE_CALLBACK(do_journal_write)
|
||||
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
|
||||
sectors);
|
||||
|
||||
bio = ca->journal.bio;
|
||||
struct bio *bio = &ja->bio[w->idx]->bio;
|
||||
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
|
||||
bio->bi_iter.bi_sector = ptr->offset;
|
||||
bio->bi_end_io = journal_write_endio;
|
||||
@ -1725,11 +1758,10 @@ static CLOSURE_CALLBACK(do_journal_write)
|
||||
trace_and_count(c, journal_write, bio);
|
||||
closure_bio_submit(bio, cl);
|
||||
|
||||
ca->journal.bucket_seq[ca->journal.cur_idx] =
|
||||
le64_to_cpu(w->data->seq);
|
||||
ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
|
||||
}
|
||||
|
||||
continue_at(cl, journal_write_done, c->io_complete_wq);
|
||||
continue_at(cl, journal_write_done, j->wq);
|
||||
}
|
||||
|
||||
static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
@ -1802,6 +1834,11 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
|
||||
end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have);
|
||||
|
||||
struct jset_entry_datetime *d =
|
||||
container_of(jset_entry_init(&end, sizeof(*d)), struct jset_entry_datetime, entry);
|
||||
d->entry.type = BCH_JSET_ENTRY_datetime;
|
||||
d->seconds = cpu_to_le64(ktime_get_real_seconds());
|
||||
|
||||
bch2_journal_super_entries_add_common(c, &end, seq);
|
||||
u64s = (u64 *) end - (u64 *) start;
|
||||
BUG_ON(u64s > j->entry_u64s_reserved);
|
||||
@ -1901,16 +1938,16 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *
|
||||
|
||||
CLOSURE_CALLBACK(bch2_journal_write)
|
||||
{
|
||||
closure_type(j, struct journal, io);
|
||||
closure_type(w, struct journal_buf, io);
|
||||
struct journal *j = container_of(w, struct journal, buf[w->idx]);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||
struct bch_replicas_padded replicas;
|
||||
struct bio *bio;
|
||||
struct printbuf journal_debug_buf = PRINTBUF;
|
||||
unsigned nr_rw_members = 0;
|
||||
int ret;
|
||||
|
||||
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
|
||||
BUG_ON(w->write_allocated);
|
||||
|
||||
j->write_start_time = local_clock();
|
||||
|
||||
@ -1954,12 +1991,14 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
* bch2_journal_space_available():
|
||||
*/
|
||||
w->sectors = 0;
|
||||
w->write_allocated = true;
|
||||
|
||||
/*
|
||||
* journal entry has been compacted and allocated, recalculate space
|
||||
* available:
|
||||
*/
|
||||
bch2_journal_space_available(j);
|
||||
bch2_journal_do_writes(j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
|
||||
@ -1983,25 +2022,29 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!JSET_NO_FLUSH(w->data))
|
||||
closure_wait_event(&j->async_wait, j->seq_ondisk + 1 == le64_to_cpu(w->data->seq));
|
||||
|
||||
if (!JSET_NO_FLUSH(w->data) && w->separate_flush) {
|
||||
for_each_rw_member(c, ca) {
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
|
||||
bio = ca->journal.bio;
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bio *bio = &ja->bio[w->idx]->bio;
|
||||
bio_reset(bio, ca->disk_sb.bdev,
|
||||
REQ_OP_WRITE|REQ_PREFLUSH);
|
||||
REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH);
|
||||
bio->bi_end_io = journal_write_endio;
|
||||
bio->bi_private = ca;
|
||||
closure_bio_submit(bio, cl);
|
||||
}
|
||||
}
|
||||
|
||||
continue_at(cl, do_journal_write, c->io_complete_wq);
|
||||
continue_at(cl, do_journal_write, j->wq);
|
||||
return;
|
||||
no_io:
|
||||
continue_at(cl, journal_write_done, c->io_complete_wq);
|
||||
continue_at(cl, journal_write_done, j->wq);
|
||||
return;
|
||||
err:
|
||||
bch2_fatal_error(c);
|
||||
continue_at(cl, journal_write_done, c->io_complete_wq);
|
||||
continue_at(cl, journal_write_done, j->wq);
|
||||
}
|
||||
|
@ -2,19 +2,22 @@
|
||||
#ifndef _BCACHEFS_JOURNAL_IO_H
|
||||
#define _BCACHEFS_JOURNAL_IO_H
|
||||
|
||||
#include <linux/darray_types.h>
|
||||
|
||||
struct journal_ptr {
|
||||
bool csum_good;
|
||||
u8 dev;
|
||||
u32 bucket;
|
||||
u32 bucket_offset;
|
||||
u64 sector;
|
||||
};
|
||||
|
||||
/*
|
||||
* Only used for holding the journal entries we read in btree_journal_read()
|
||||
* during cache_registration
|
||||
*/
|
||||
struct journal_replay {
|
||||
struct journal_ptr {
|
||||
bool csum_good;
|
||||
u8 dev;
|
||||
u32 bucket;
|
||||
u32 bucket_offset;
|
||||
u64 sector;
|
||||
} ptrs[BCH_REPLICAS_MAX];
|
||||
unsigned nr_ptrs;
|
||||
DARRAY_PREALLOCATED(struct journal_ptr, 8) ptrs;
|
||||
|
||||
bool csum_good;
|
||||
bool ignore;
|
||||
@ -62,4 +65,20 @@ int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
|
||||
|
||||
CLOSURE_CALLBACK(bch2_journal_write);
|
||||
|
||||
static inline struct jset_entry *jset_entry_init(struct jset_entry **end, size_t size)
|
||||
{
|
||||
struct jset_entry *entry = *end;
|
||||
unsigned u64s = DIV_ROUND_UP(size, sizeof(u64));
|
||||
|
||||
memset(entry, 0, u64s * sizeof(u64));
|
||||
/*
|
||||
* The u64s field counts from the start of data, ignoring the shared
|
||||
* fields.
|
||||
*/
|
||||
entry->u64s = cpu_to_le16(u64s - 1);
|
||||
|
||||
*end = vstruct_next(*end);
|
||||
return entry;
|
||||
}
|
||||
|
||||
#endif /* _BCACHEFS_JOURNAL_IO_H */
|
||||
|
@ -62,12 +62,9 @@ void bch2_journal_set_watermark(struct journal *j)
|
||||
? BCH_WATERMARK_reclaim
|
||||
: BCH_WATERMARK_stripe;
|
||||
|
||||
if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space],
|
||||
&j->low_on_space_start, low_on_space) ||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin],
|
||||
&j->low_on_pin_start, low_on_pin) ||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full],
|
||||
&j->write_buffer_full_start, low_on_wb))
|
||||
if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space], low_on_space) ||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin], low_on_pin) ||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
|
||||
trace_and_count(c, journal_full, c);
|
||||
|
||||
swap(watermark, j->watermark);
|
||||
@ -394,8 +391,6 @@ void bch2_journal_pin_copy(struct journal *j,
|
||||
struct journal_entry_pin *src,
|
||||
journal_pin_flush_fn flush_fn)
|
||||
{
|
||||
bool reclaim;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
u64 seq = READ_ONCE(src->seq);
|
||||
@ -411,44 +406,44 @@ void bch2_journal_pin_copy(struct journal *j,
|
||||
return;
|
||||
}
|
||||
|
||||
reclaim = __journal_pin_drop(j, dst);
|
||||
bool reclaim = __journal_pin_drop(j, dst);
|
||||
|
||||
bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn));
|
||||
|
||||
if (reclaim)
|
||||
bch2_journal_reclaim_fast(j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/*
|
||||
* If the journal is currently full, we might want to call flush_fn
|
||||
* immediately:
|
||||
*/
|
||||
journal_wake(j);
|
||||
if (seq == journal_last_seq(j))
|
||||
journal_wake(j);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
void bch2_journal_pin_set(struct journal *j, u64 seq,
|
||||
struct journal_entry_pin *pin,
|
||||
journal_pin_flush_fn flush_fn)
|
||||
{
|
||||
bool reclaim;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
BUG_ON(seq < journal_last_seq(j));
|
||||
|
||||
reclaim = __journal_pin_drop(j, pin);
|
||||
bool reclaim = __journal_pin_drop(j, pin);
|
||||
|
||||
bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn));
|
||||
|
||||
if (reclaim)
|
||||
bch2_journal_reclaim_fast(j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/*
|
||||
* If the journal is currently full, we might want to call flush_fn
|
||||
* immediately:
|
||||
*/
|
||||
journal_wake(j);
|
||||
if (seq == journal_last_seq(j))
|
||||
journal_wake(j);
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2,8 +2,8 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "journal_sb.h"
|
||||
#include "darray.h"
|
||||
|
||||
#include <linux/darray.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
/* BCH_SB_FIELD_journal: */
|
||||
|
@ -2,10 +2,11 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_iter.h"
|
||||
#include "eytzinger.h"
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/eytzinger.h>
|
||||
|
||||
/*
|
||||
* journal_seq_blacklist machinery:
|
||||
*
|
||||
@ -119,8 +120,7 @@ out:
|
||||
return ret ?: bch2_blacklist_table_initialize(c);
|
||||
}
|
||||
|
||||
static int journal_seq_blacklist_table_cmp(const void *_l,
|
||||
const void *_r, size_t size)
|
||||
static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct journal_seq_blacklist_table_entry *l = _l;
|
||||
const struct journal_seq_blacklist_table_entry *r = _r;
|
||||
|
@ -18,6 +18,7 @@
|
||||
* the journal that are being staged or in flight.
|
||||
*/
|
||||
struct journal_buf {
|
||||
struct closure io;
|
||||
struct jset *data;
|
||||
|
||||
__BKEY_PADDED(key, BCH_REPLICAS_MAX);
|
||||
@ -33,10 +34,14 @@ struct journal_buf {
|
||||
unsigned disk_sectors; /* maximum size entry could have been, if
|
||||
buf_size was bigger */
|
||||
unsigned u64s_reserved;
|
||||
bool noflush; /* write has already been kicked off, and was noflush */
|
||||
bool must_flush; /* something wants a flush */
|
||||
bool separate_flush;
|
||||
bool need_flush_to_write_buffer;
|
||||
bool noflush:1; /* write has already been kicked off, and was noflush */
|
||||
bool must_flush:1; /* something wants a flush */
|
||||
bool separate_flush:1;
|
||||
bool need_flush_to_write_buffer:1;
|
||||
bool write_started:1;
|
||||
bool write_allocated:1;
|
||||
bool write_done:1;
|
||||
u8 idx;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -134,6 +139,7 @@ enum journal_flags {
|
||||
/* Reasons we may fail to get a journal reservation: */
|
||||
#define JOURNAL_ERRORS() \
|
||||
x(ok) \
|
||||
x(retry) \
|
||||
x(blocked) \
|
||||
x(max_in_flight) \
|
||||
x(journal_full) \
|
||||
@ -149,6 +155,13 @@ enum journal_errors {
|
||||
|
||||
typedef DARRAY(u64) darray_u64;
|
||||
|
||||
struct journal_bio {
|
||||
struct bch_dev *ca;
|
||||
unsigned buf_idx;
|
||||
|
||||
struct bio bio;
|
||||
};
|
||||
|
||||
/* Embedded in struct bch_fs */
|
||||
struct journal {
|
||||
/* Fastpath stuff up front: */
|
||||
@ -203,8 +216,8 @@ struct journal {
|
||||
wait_queue_head_t wait;
|
||||
struct closure_waitlist async_wait;
|
||||
|
||||
struct closure io;
|
||||
struct delayed_work write_work;
|
||||
struct workqueue_struct *wq;
|
||||
|
||||
/* Sequence number of most recent journal entry (last entry in @pin) */
|
||||
atomic64_t seq;
|
||||
@ -274,14 +287,9 @@ struct journal {
|
||||
u64 nr_noflush_writes;
|
||||
u64 entry_bytes_written;
|
||||
|
||||
u64 low_on_space_start;
|
||||
u64 low_on_pin_start;
|
||||
u64 max_in_flight_start;
|
||||
u64 write_buffer_full_start;
|
||||
|
||||
struct bch2_time_stats *flush_write_time;
|
||||
struct bch2_time_stats *noflush_write_time;
|
||||
struct bch2_time_stats *flush_seq_time;
|
||||
struct time_stats *flush_write_time;
|
||||
struct time_stats *noflush_write_time;
|
||||
struct time_stats *flush_seq_time;
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
struct lockdep_map res_map;
|
||||
@ -313,7 +321,7 @@ struct journal_device {
|
||||
u64 *buckets;
|
||||
|
||||
/* Bio for journal reads/writes to this device */
|
||||
struct bio *bio;
|
||||
struct journal_bio *bio[JOURNAL_BUF_NR];
|
||||
|
||||
/* for bch_journal_read_device */
|
||||
struct closure read;
|
||||
|
@ -31,7 +31,7 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
|
||||
nr_good = bch2_bkey_durability(c, k.s_c);
|
||||
if ((!nr_good && !(flags & lost)) ||
|
||||
(nr_good < replicas && !(flags & degraded)))
|
||||
return -EINVAL;
|
||||
return -BCH_ERR_remove_would_lose_data;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -111,7 +111,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
|
||||
/* don't handle this yet: */
|
||||
if (flags & BCH_FORCE_IF_METADATA_LOST)
|
||||
return -EINVAL;
|
||||
return -BCH_ERR_remove_with_metadata_missing_unimplemented;
|
||||
|
||||
trans = bch2_trans_get(c);
|
||||
bch2_bkey_buf_init(&k);
|
||||
@ -132,10 +132,8 @@ retry:
|
||||
|
||||
ret = drop_dev_ptrs(c, bkey_i_to_s(k.k),
|
||||
dev_idx, flags, true);
|
||||
if (ret) {
|
||||
bch_err(c, "Cannot drop device without losing data");
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
|
||||
|
@ -85,7 +85,7 @@ void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
|
||||
u64 start_time = local_clock();
|
||||
|
||||
__closure_wait_event(&l->wait, __bch2_bucket_nocow_trylock(l, dev_bucket, flags));
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
|
||||
time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,12 +6,15 @@
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/sort.h>
|
||||
|
||||
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
|
||||
struct bch_replicas_cpu *);
|
||||
|
||||
/* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
|
||||
static int bch2_memcmp(const void *l, const void *r, size_t size)
|
||||
static int bch2_memcmp(const void *l, const void *r, const void *priv)
|
||||
{
|
||||
size_t size = (size_t) priv;
|
||||
return memcmp(l, r, size);
|
||||
}
|
||||
|
||||
@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
|
||||
|
||||
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
|
||||
{
|
||||
eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL);
|
||||
eytzinger0_sort_r(r->entries, r->nr, r->entry_size,
|
||||
bch2_memcmp, NULL, (void *)(size_t)r->entry_size);
|
||||
}
|
||||
|
||||
static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
|
||||
@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
sort_cmp_size(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
cpu_r->entry_size,
|
||||
bch2_memcmp, NULL);
|
||||
sort_r(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
cpu_r->entry_size,
|
||||
bch2_memcmp, NULL,
|
||||
(void *)(size_t)cpu_r->entry_size);
|
||||
|
||||
for (i = 0; i < cpu_r->nr; i++) {
|
||||
struct bch_replicas_entry_v1 *e =
|
||||
|
@ -3,9 +3,10 @@
|
||||
#define _BCACHEFS_REPLICAS_H
|
||||
|
||||
#include "bkey.h"
|
||||
#include "eytzinger.h"
|
||||
#include "replicas_types.h"
|
||||
|
||||
#include <linux/eytzinger.h>
|
||||
|
||||
void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *);
|
||||
void bch2_replicas_entry_to_text(struct printbuf *,
|
||||
struct bch_replicas_entry_v1 *);
|
||||
|
@ -171,22 +171,6 @@ fsck_err:
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static struct jset_entry *jset_entry_init(struct jset_entry **end, size_t size)
|
||||
{
|
||||
struct jset_entry *entry = *end;
|
||||
unsigned u64s = DIV_ROUND_UP(size, sizeof(u64));
|
||||
|
||||
memset(entry, 0, u64s * sizeof(u64));
|
||||
/*
|
||||
* The u64s field counts from the start of data, ignoring the shared
|
||||
* fields.
|
||||
*/
|
||||
entry->u64s = cpu_to_le16(u64s - 1);
|
||||
|
||||
*end = vstruct_next(*end);
|
||||
return entry;
|
||||
}
|
||||
|
||||
void bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
struct jset_entry **end,
|
||||
u64 journal_seq)
|
||||
|
@ -6,12 +6,13 @@
|
||||
*/
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "darray.h"
|
||||
#include "recovery.h"
|
||||
#include "sb-downgrade.h"
|
||||
#include "sb-errors.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/darray.h>
|
||||
|
||||
#define RECOVERY_PASS_ALL_FSCK BIT_ULL(63)
|
||||
|
||||
/*
|
||||
|
@ -2,7 +2,7 @@
|
||||
#ifndef _BCACHEFS_SB_ERRORS_TYPES_H
|
||||
#define _BCACHEFS_SB_ERRORS_TYPES_H
|
||||
|
||||
#include "darray.h"
|
||||
#include <linux/darray_types.h>
|
||||
|
||||
#define BCH_SB_ERRS() \
|
||||
x(clean_but_journal_not_empty, 0) \
|
||||
@ -250,7 +250,10 @@
|
||||
x(hash_table_key_duplicate, 242) \
|
||||
x(hash_table_key_wrong_offset, 243) \
|
||||
x(unlinked_inode_not_on_deleted_list, 244) \
|
||||
x(reflink_p_front_pad_bad, 245)
|
||||
x(reflink_p_front_pad_bad, 245) \
|
||||
x(journal_entry_dup_same_device, 246) \
|
||||
x(inode_bi_subvol_missing, 247) \
|
||||
x(inode_bi_subvol_wrong, 248)
|
||||
|
||||
enum bch_sb_error_id {
|
||||
#define x(t, n) BCH_FSCK_ERR_##t = n,
|
||||
|
@ -2,7 +2,7 @@
|
||||
#ifndef _BCACHEFS_SB_MEMBERS_H
|
||||
#define _BCACHEFS_SB_MEMBERS_H
|
||||
|
||||
#include "darray.h"
|
||||
#include <linux/darray.h>
|
||||
|
||||
extern char * const bch2_member_error_strs[];
|
||||
|
||||
|
@ -160,21 +160,16 @@ static inline bool is_visible_key(struct bch_hash_desc desc, subvol_inum inum, s
|
||||
}
|
||||
|
||||
static __always_inline int
|
||||
bch2_hash_lookup(struct btree_trans *trans,
|
||||
bch2_hash_lookup_in_snapshot(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
subvol_inum inum, const void *key,
|
||||
unsigned flags)
|
||||
unsigned flags, u32 snapshot)
|
||||
{
|
||||
struct bkey_s_c k;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
|
||||
SPOS(inum.inum, desc.hash_key(info, key), snapshot),
|
||||
POS(inum.inum, U64_MAX),
|
||||
@ -194,6 +189,19 @@ bch2_hash_lookup(struct btree_trans *trans,
|
||||
return ret ?: -BCH_ERR_ENOENT_str_hash_lookup;
|
||||
}
|
||||
|
||||
static __always_inline int
|
||||
bch2_hash_lookup(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
subvol_inum inum, const void *key,
|
||||
unsigned flags)
|
||||
{
|
||||
u32 snapshot;
|
||||
return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?:
|
||||
bch2_hash_lookup_in_snapshot(trans, iter, desc, info, inum, key, flags, snapshot);
|
||||
}
|
||||
|
||||
static __always_inline int
|
||||
bch2_hash_hole(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
@ -251,7 +259,7 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
int bch2_hash_set_snapshot(struct btree_trans *trans,
|
||||
int bch2_hash_set_in_snapshot(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
subvol_inum inum, u32 snapshot,
|
||||
@ -320,17 +328,12 @@ int bch2_hash_set(struct btree_trans *trans,
|
||||
struct bkey_i *insert,
|
||||
bch_str_hash_flags_t str_hash_flags)
|
||||
{
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
insert->k.p.inode = inum.inum;
|
||||
|
||||
return bch2_hash_set_snapshot(trans, desc, info, inum,
|
||||
snapshot, insert, str_hash_flags, 0);
|
||||
u32 snapshot;
|
||||
return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?:
|
||||
bch2_hash_set_in_snapshot(trans, desc, info, inum,
|
||||
snapshot, insert, str_hash_flags, 0);
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
|
@ -42,6 +42,36 @@ static int check_subvol(struct btree_trans *trans,
|
||||
return ret ?: -BCH_ERR_transaction_restart_nested;
|
||||
}
|
||||
|
||||
struct bch_inode_unpacked inode;
|
||||
struct btree_iter inode_iter = {};
|
||||
ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode,
|
||||
(subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) },
|
||||
0);
|
||||
bch2_trans_iter_exit(trans, &inode_iter);
|
||||
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
return ret;
|
||||
|
||||
if (fsck_err_on(ret, c, subvol_to_missing_root,
|
||||
"subvolume %llu points to missing subvolume root %llu:%u",
|
||||
k.k->p.offset, le64_to_cpu(subvol.v->inode),
|
||||
le32_to_cpu(subvol.v->snapshot))) {
|
||||
ret = bch2_subvolume_delete(trans, iter->pos.offset);
|
||||
bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
|
||||
return ret ?: -BCH_ERR_transaction_restart_nested;
|
||||
}
|
||||
|
||||
if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset,
|
||||
c, subvol_root_wrong_bi_subvol,
|
||||
"subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu",
|
||||
inode.bi_inum, inode_iter.k.p.snapshot,
|
||||
inode.bi_subvol, subvol.k->p.offset)) {
|
||||
inode.bi_subvol = subvol.k->p.offset;
|
||||
ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
|
||||
u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
|
||||
u32 snapshot_tree;
|
||||
@ -73,6 +103,7 @@ static int check_subvol(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
err:
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
@ -2,7 +2,6 @@
|
||||
#ifndef _BCACHEFS_SUBVOLUME_H
|
||||
#define _BCACHEFS_SUBVOLUME_H
|
||||
|
||||
#include "darray.h"
|
||||
#include "subvolume_types.h"
|
||||
|
||||
enum bkey_invalid_flags;
|
||||
|
@ -2,7 +2,7 @@
|
||||
#ifndef _BCACHEFS_SUBVOLUME_TYPES_H
|
||||
#define _BCACHEFS_SUBVOLUME_TYPES_H
|
||||
|
||||
#include "darray.h"
|
||||
#include <linux/darray_types.h>
|
||||
|
||||
typedef DARRAY(u32) snapshot_id_list;
|
||||
|
||||
|
@ -3,12 +3,12 @@
|
||||
#define _BCACHEFS_SUPER_IO_H
|
||||
|
||||
#include "extents.h"
|
||||
#include "eytzinger.h"
|
||||
#include "super_types.h"
|
||||
#include "super.h"
|
||||
#include "sb-members.h"
|
||||
|
||||
#include <asm/byteorder.h>
|
||||
#include <linux/eytzinger.h>
|
||||
|
||||
static inline bool bch2_version_compatible(u16 version)
|
||||
{
|
||||
|
@ -67,6 +67,7 @@
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/thread_with_file.h>
|
||||
#include <crypto/hash.h>
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
@ -95,16 +96,10 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...)
|
||||
if (likely(!stdio)) {
|
||||
vprintk(fmt, args);
|
||||
} else {
|
||||
unsigned long flags;
|
||||
|
||||
if (fmt[0] == KERN_SOH[0])
|
||||
fmt += 2;
|
||||
|
||||
spin_lock_irqsave(&stdio->output_lock, flags);
|
||||
prt_vprintf(&stdio->output_buf, fmt, args);
|
||||
spin_unlock_irqrestore(&stdio->output_lock, flags);
|
||||
|
||||
wake_up(&stdio->output_wait);
|
||||
stdio_redirect_vprintf(stdio, true, fmt, args);
|
||||
}
|
||||
va_end(args);
|
||||
}
|
||||
@ -520,7 +515,7 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < BCH_TIME_STAT_NR; i++)
|
||||
bch2_time_stats_exit(&c->times[i]);
|
||||
time_stats_exit(&c->times[i]);
|
||||
|
||||
bch2_free_pending_node_rewrites(c);
|
||||
bch2_fs_sb_errors_exit(c);
|
||||
@ -576,7 +571,7 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
destroy_workqueue(c->btree_update_wq);
|
||||
|
||||
bch2_free_super(&c->disk_sb);
|
||||
kvpfree(c, sizeof(*c));
|
||||
kvfree(c);
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
@ -715,7 +710,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
unsigned i, iter_size;
|
||||
int ret = 0;
|
||||
|
||||
c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
|
||||
c = kvmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
|
||||
if (!c) {
|
||||
c = ERR_PTR(-BCH_ERR_ENOMEM_fs_alloc);
|
||||
goto out;
|
||||
@ -753,7 +748,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
c->journal_keys.initial_ref_held = true;
|
||||
|
||||
for (i = 0; i < BCH_TIME_STAT_NR; i++)
|
||||
bch2_time_stats_init(&c->times[i]);
|
||||
time_stats_init(&c->times[i]);
|
||||
|
||||
bch2_fs_copygc_init(c);
|
||||
bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
|
||||
@ -882,8 +877,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
BIOSET_NEED_BVECS) ||
|
||||
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
|
||||
!(c->online_reserved = alloc_percpu(u64)) ||
|
||||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
|
||||
c->opts.btree_node_size) ||
|
||||
mempool_init_kvmalloc_pool(&c->btree_bounce_pool, 1,
|
||||
c->opts.btree_node_size) ||
|
||||
mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
|
||||
!(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits,
|
||||
sizeof(u64), GFP_KERNEL))) {
|
||||
@ -1124,7 +1119,7 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_bdevname(&buf, fs->bdev);
|
||||
prt_str(&buf, "believes seq of ");
|
||||
prt_str(&buf, " believes seq of ");
|
||||
prt_bdevname(&buf, sb->bdev);
|
||||
prt_printf(&buf, " to be %llu, but ", seq_from_fs);
|
||||
prt_bdevname(&buf, sb->bdev);
|
||||
@ -1168,8 +1163,8 @@ static void bch2_dev_free(struct bch_dev *ca)
|
||||
bch2_dev_buckets_free(ca);
|
||||
free_page((unsigned long) ca->sb_read_scratch);
|
||||
|
||||
bch2_time_stats_exit(&ca->io_latency[WRITE]);
|
||||
bch2_time_stats_exit(&ca->io_latency[READ]);
|
||||
time_stats_quantiles_exit(&ca->io_latency[WRITE]);
|
||||
time_stats_quantiles_exit(&ca->io_latency[READ]);
|
||||
|
||||
percpu_ref_exit(&ca->io_ref);
|
||||
percpu_ref_exit(&ca->ref);
|
||||
@ -1260,8 +1255,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
|
||||
|
||||
INIT_WORK(&ca->io_error_work, bch2_io_error_work);
|
||||
|
||||
bch2_time_stats_init(&ca->io_latency[READ]);
|
||||
bch2_time_stats_init(&ca->io_latency[WRITE]);
|
||||
time_stats_quantiles_init(&ca->io_latency[READ]);
|
||||
time_stats_quantiles_init(&ca->io_latency[WRITE]);
|
||||
|
||||
ca->mi = bch2_mi_to_cpu(member);
|
||||
|
||||
|
@ -930,10 +930,10 @@ SHOW(bch2_dev)
|
||||
sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE]));
|
||||
|
||||
if (attr == &sysfs_io_latency_stats_read)
|
||||
bch2_time_stats_to_text(out, &ca->io_latency[READ]);
|
||||
bch2_time_stats_to_text(out, &ca->io_latency[READ].stats);
|
||||
|
||||
if (attr == &sysfs_io_latency_stats_write)
|
||||
bch2_time_stats_to_text(out, &ca->io_latency[WRITE]);
|
||||
bch2_time_stats_to_text(out, &ca->io_latency[WRITE].stats);
|
||||
|
||||
sysfs_printf(congested, "%u%%",
|
||||
clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
|
||||
|
@ -1,299 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#ifndef NO_BCACHEFS_FS
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "printbuf.h"
|
||||
#include "thread_with_file.h"
|
||||
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/poll.h>
|
||||
|
||||
void bch2_thread_with_file_exit(struct thread_with_file *thr)
|
||||
{
|
||||
if (thr->task) {
|
||||
kthread_stop(thr->task);
|
||||
put_task_struct(thr->task);
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_run_thread_with_file(struct thread_with_file *thr,
|
||||
const struct file_operations *fops,
|
||||
int (*fn)(void *))
|
||||
{
|
||||
struct file *file = NULL;
|
||||
int ret, fd = -1;
|
||||
unsigned fd_flags = O_CLOEXEC;
|
||||
|
||||
if (fops->read && fops->write)
|
||||
fd_flags |= O_RDWR;
|
||||
else if (fops->read)
|
||||
fd_flags |= O_RDONLY;
|
||||
else if (fops->write)
|
||||
fd_flags |= O_WRONLY;
|
||||
|
||||
char name[TASK_COMM_LEN];
|
||||
get_task_comm(name, current);
|
||||
|
||||
thr->ret = 0;
|
||||
thr->task = kthread_create(fn, thr, "%s", name);
|
||||
ret = PTR_ERR_OR_ZERO(thr->task);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = get_unused_fd_flags(fd_flags);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
fd = ret;
|
||||
|
||||
file = anon_inode_getfile(name, fops, thr, fd_flags);
|
||||
ret = PTR_ERR_OR_ZERO(file);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
fd_install(fd, file);
|
||||
get_task_struct(thr->task);
|
||||
wake_up_process(thr->task);
|
||||
return fd;
|
||||
err:
|
||||
if (fd >= 0)
|
||||
put_unused_fd(fd);
|
||||
if (thr->task)
|
||||
kthread_stop(thr->task);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool thread_with_stdio_has_output(struct thread_with_stdio *thr)
|
||||
{
|
||||
return thr->stdio.output_buf.pos ||
|
||||
thr->output2.nr ||
|
||||
thr->thr.done;
|
||||
}
|
||||
|
||||
static ssize_t thread_with_stdio_read(struct file *file, char __user *buf,
|
||||
size_t len, loff_t *ppos)
|
||||
{
|
||||
struct thread_with_stdio *thr =
|
||||
container_of(file->private_data, struct thread_with_stdio, thr);
|
||||
size_t copied = 0, b;
|
||||
int ret = 0;
|
||||
|
||||
if ((file->f_flags & O_NONBLOCK) &&
|
||||
!thread_with_stdio_has_output(thr))
|
||||
return -EAGAIN;
|
||||
|
||||
ret = wait_event_interruptible(thr->stdio.output_wait,
|
||||
thread_with_stdio_has_output(thr));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (thr->thr.done)
|
||||
return 0;
|
||||
|
||||
while (len) {
|
||||
ret = darray_make_room(&thr->output2, thr->stdio.output_buf.pos);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
spin_lock_irq(&thr->stdio.output_lock);
|
||||
b = min_t(size_t, darray_room(thr->output2), thr->stdio.output_buf.pos);
|
||||
|
||||
memcpy(&darray_top(thr->output2), thr->stdio.output_buf.buf, b);
|
||||
memmove(thr->stdio.output_buf.buf,
|
||||
thr->stdio.output_buf.buf + b,
|
||||
thr->stdio.output_buf.pos - b);
|
||||
|
||||
thr->output2.nr += b;
|
||||
thr->stdio.output_buf.pos -= b;
|
||||
spin_unlock_irq(&thr->stdio.output_lock);
|
||||
|
||||
b = min(len, thr->output2.nr);
|
||||
if (!b)
|
||||
break;
|
||||
|
||||
b -= copy_to_user(buf, thr->output2.data, b);
|
||||
if (!b) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
copied += b;
|
||||
buf += b;
|
||||
len -= b;
|
||||
|
||||
memmove(thr->output2.data,
|
||||
thr->output2.data + b,
|
||||
thr->output2.nr - b);
|
||||
thr->output2.nr -= b;
|
||||
}
|
||||
|
||||
return copied ?: ret;
|
||||
}
|
||||
|
||||
static int thread_with_stdio_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct thread_with_stdio *thr =
|
||||
container_of(file->private_data, struct thread_with_stdio, thr);
|
||||
|
||||
bch2_thread_with_file_exit(&thr->thr);
|
||||
printbuf_exit(&thr->stdio.input_buf);
|
||||
printbuf_exit(&thr->stdio.output_buf);
|
||||
darray_exit(&thr->output2);
|
||||
thr->exit(thr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define WRITE_BUFFER 4096
|
||||
|
||||
static inline bool thread_with_stdio_has_input_space(struct thread_with_stdio *thr)
|
||||
{
|
||||
return thr->stdio.input_buf.pos < WRITE_BUFFER || thr->thr.done;
|
||||
}
|
||||
|
||||
static ssize_t thread_with_stdio_write(struct file *file, const char __user *ubuf,
|
||||
size_t len, loff_t *ppos)
|
||||
{
|
||||
struct thread_with_stdio *thr =
|
||||
container_of(file->private_data, struct thread_with_stdio, thr);
|
||||
struct printbuf *buf = &thr->stdio.input_buf;
|
||||
size_t copied = 0;
|
||||
ssize_t ret = 0;
|
||||
|
||||
while (len) {
|
||||
if (thr->thr.done) {
|
||||
ret = -EPIPE;
|
||||
break;
|
||||
}
|
||||
|
||||
size_t b = len - fault_in_readable(ubuf, len);
|
||||
if (!b) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
spin_lock(&thr->stdio.input_lock);
|
||||
if (buf->pos < WRITE_BUFFER)
|
||||
bch2_printbuf_make_room(buf, min(b, WRITE_BUFFER - buf->pos));
|
||||
b = min(len, printbuf_remaining_size(buf));
|
||||
|
||||
if (b && !copy_from_user_nofault(&buf->buf[buf->pos], ubuf, b)) {
|
||||
ubuf += b;
|
||||
len -= b;
|
||||
copied += b;
|
||||
buf->pos += b;
|
||||
}
|
||||
spin_unlock(&thr->stdio.input_lock);
|
||||
|
||||
if (b) {
|
||||
wake_up(&thr->stdio.input_wait);
|
||||
} else {
|
||||
if ((file->f_flags & O_NONBLOCK)) {
|
||||
ret = -EAGAIN;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = wait_event_interruptible(thr->stdio.input_wait,
|
||||
thread_with_stdio_has_input_space(thr));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return copied ?: ret;
|
||||
}
|
||||
|
||||
static __poll_t thread_with_stdio_poll(struct file *file, struct poll_table_struct *wait)
|
||||
{
|
||||
struct thread_with_stdio *thr =
|
||||
container_of(file->private_data, struct thread_with_stdio, thr);
|
||||
|
||||
poll_wait(file, &thr->stdio.output_wait, wait);
|
||||
poll_wait(file, &thr->stdio.input_wait, wait);
|
||||
|
||||
__poll_t mask = 0;
|
||||
|
||||
if (thread_with_stdio_has_output(thr))
|
||||
mask |= EPOLLIN;
|
||||
if (thread_with_stdio_has_input_space(thr))
|
||||
mask |= EPOLLOUT;
|
||||
if (thr->thr.done)
|
||||
mask |= EPOLLHUP|EPOLLERR;
|
||||
return mask;
|
||||
}
|
||||
|
||||
static const struct file_operations thread_with_stdio_fops = {
|
||||
.release = thread_with_stdio_release,
|
||||
.read = thread_with_stdio_read,
|
||||
.write = thread_with_stdio_write,
|
||||
.poll = thread_with_stdio_poll,
|
||||
.llseek = no_llseek,
|
||||
};
|
||||
|
||||
int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
|
||||
void (*exit)(struct thread_with_stdio *),
|
||||
int (*fn)(void *))
|
||||
{
|
||||
thr->stdio.input_buf = PRINTBUF;
|
||||
thr->stdio.input_buf.atomic++;
|
||||
spin_lock_init(&thr->stdio.input_lock);
|
||||
init_waitqueue_head(&thr->stdio.input_wait);
|
||||
|
||||
thr->stdio.output_buf = PRINTBUF;
|
||||
thr->stdio.output_buf.atomic++;
|
||||
spin_lock_init(&thr->stdio.output_lock);
|
||||
init_waitqueue_head(&thr->stdio.output_wait);
|
||||
|
||||
darray_init(&thr->output2);
|
||||
thr->exit = exit;
|
||||
|
||||
return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, fn);
|
||||
}
|
||||
|
||||
int bch2_stdio_redirect_read(struct stdio_redirect *stdio, char *buf, size_t len)
|
||||
{
|
||||
wait_event(stdio->input_wait,
|
||||
stdio->input_buf.pos || stdio->done);
|
||||
|
||||
if (stdio->done)
|
||||
return -1;
|
||||
|
||||
spin_lock(&stdio->input_lock);
|
||||
int ret = min(len, stdio->input_buf.pos);
|
||||
stdio->input_buf.pos -= ret;
|
||||
memcpy(buf, stdio->input_buf.buf, ret);
|
||||
memmove(stdio->input_buf.buf,
|
||||
stdio->input_buf.buf + ret,
|
||||
stdio->input_buf.pos);
|
||||
spin_unlock(&stdio->input_lock);
|
||||
|
||||
wake_up(&stdio->input_wait);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_stdio_redirect_readline(struct stdio_redirect *stdio, char *buf, size_t len)
|
||||
{
|
||||
wait_event(stdio->input_wait,
|
||||
stdio->input_buf.pos || stdio->done);
|
||||
|
||||
if (stdio->done)
|
||||
return -1;
|
||||
|
||||
spin_lock(&stdio->input_lock);
|
||||
int ret = min(len, stdio->input_buf.pos);
|
||||
char *n = memchr(stdio->input_buf.buf, '\n', ret);
|
||||
if (n)
|
||||
ret = min(ret, n + 1 - stdio->input_buf.buf);
|
||||
stdio->input_buf.pos -= ret;
|
||||
memcpy(buf, stdio->input_buf.buf, ret);
|
||||
memmove(stdio->input_buf.buf,
|
||||
stdio->input_buf.buf + ret,
|
||||
stdio->input_buf.pos);
|
||||
spin_unlock(&stdio->input_lock);
|
||||
|
||||
wake_up(&stdio->input_wait);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* NO_BCACHEFS_FS */
|
@ -1,41 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_THREAD_WITH_FILE_H
|
||||
#define _BCACHEFS_THREAD_WITH_FILE_H
|
||||
|
||||
#include "thread_with_file_types.h"
|
||||
|
||||
struct task_struct;
|
||||
|
||||
struct thread_with_file {
|
||||
struct task_struct *task;
|
||||
int ret;
|
||||
bool done;
|
||||
};
|
||||
|
||||
void bch2_thread_with_file_exit(struct thread_with_file *);
|
||||
int bch2_run_thread_with_file(struct thread_with_file *,
|
||||
const struct file_operations *,
|
||||
int (*fn)(void *));
|
||||
|
||||
struct thread_with_stdio {
|
||||
struct thread_with_file thr;
|
||||
struct stdio_redirect stdio;
|
||||
DARRAY(char) output2;
|
||||
void (*exit)(struct thread_with_stdio *);
|
||||
};
|
||||
|
||||
static inline void thread_with_stdio_done(struct thread_with_stdio *thr)
|
||||
{
|
||||
thr->thr.done = true;
|
||||
thr->stdio.done = true;
|
||||
wake_up(&thr->stdio.input_wait);
|
||||
wake_up(&thr->stdio.output_wait);
|
||||
}
|
||||
|
||||
int bch2_run_thread_with_stdio(struct thread_with_stdio *,
|
||||
void (*exit)(struct thread_with_stdio *),
|
||||
int (*fn)(void *));
|
||||
int bch2_stdio_redirect_read(struct stdio_redirect *, char *, size_t);
|
||||
int bch2_stdio_redirect_readline(struct stdio_redirect *, char *, size_t);
|
||||
|
||||
#endif /* _BCACHEFS_THREAD_WITH_FILE_H */
|
@ -1,16 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_THREAD_WITH_FILE_TYPES_H
|
||||
#define _BCACHEFS_THREAD_WITH_FILE_TYPES_H
|
||||
|
||||
struct stdio_redirect {
|
||||
spinlock_t output_lock;
|
||||
wait_queue_head_t output_wait;
|
||||
struct printbuf output_buf;
|
||||
|
||||
spinlock_t input_lock;
|
||||
wait_queue_head_t input_wait;
|
||||
struct printbuf input_buf;
|
||||
bool done;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_THREAD_WITH_FILE_TYPES_H */
|
@ -11,6 +11,7 @@
|
||||
#include <linux/console.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/eytzinger.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/log2.h>
|
||||
@ -22,9 +23,8 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/mean_and_variance.h>
|
||||
|
||||
#include "eytzinger.h"
|
||||
#include "mean_and_variance.h"
|
||||
#include "util.h"
|
||||
|
||||
static const char si_units[] = "?kMGTPEZY";
|
||||
@ -337,32 +337,6 @@ void bch2_prt_datetime(struct printbuf *out, time64_t sec)
|
||||
}
|
||||
#endif
|
||||
|
||||
static const struct time_unit {
|
||||
const char *name;
|
||||
u64 nsecs;
|
||||
} time_units[] = {
|
||||
{ "ns", 1 },
|
||||
{ "us", NSEC_PER_USEC },
|
||||
{ "ms", NSEC_PER_MSEC },
|
||||
{ "s", NSEC_PER_SEC },
|
||||
{ "m", (u64) NSEC_PER_SEC * 60},
|
||||
{ "h", (u64) NSEC_PER_SEC * 3600},
|
||||
{ "eon", U64_MAX },
|
||||
};
|
||||
|
||||
static const struct time_unit *pick_time_units(u64 ns)
|
||||
{
|
||||
const struct time_unit *u;
|
||||
|
||||
for (u = time_units;
|
||||
u + 1 < time_units + ARRAY_SIZE(time_units) &&
|
||||
ns >= u[1].nsecs << 1;
|
||||
u++)
|
||||
;
|
||||
|
||||
return u;
|
||||
}
|
||||
|
||||
void bch2_pr_time_units(struct printbuf *out, u64 ns)
|
||||
{
|
||||
const struct time_unit *u = pick_time_units(ns);
|
||||
@ -370,120 +344,6 @@ void bch2_pr_time_units(struct printbuf *out, u64 ns)
|
||||
prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
|
||||
}
|
||||
|
||||
/* time stats: */
|
||||
|
||||
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
|
||||
static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v)
|
||||
{
|
||||
unsigned i = 0;
|
||||
|
||||
while (i < ARRAY_SIZE(q->entries)) {
|
||||
struct bch2_quantile_entry *e = q->entries + i;
|
||||
|
||||
if (unlikely(!e->step)) {
|
||||
e->m = v;
|
||||
e->step = max_t(unsigned, v / 2, 1024);
|
||||
} else if (e->m > v) {
|
||||
e->m = e->m >= e->step
|
||||
? e->m - e->step
|
||||
: 0;
|
||||
} else if (e->m < v) {
|
||||
e->m = e->m + e->step > e->m
|
||||
? e->m + e->step
|
||||
: U32_MAX;
|
||||
}
|
||||
|
||||
if ((e->m > v ? e->m - v : v - e->m) < e->step)
|
||||
e->step = max_t(unsigned, e->step / 2, 1);
|
||||
|
||||
if (v >= e->m)
|
||||
break;
|
||||
|
||||
i = eytzinger0_child(i, v > e->m);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
u64 duration, freq;
|
||||
|
||||
if (time_after64(end, start)) {
|
||||
duration = end - start;
|
||||
mean_and_variance_update(&stats->duration_stats, duration);
|
||||
mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration);
|
||||
stats->max_duration = max(stats->max_duration, duration);
|
||||
stats->min_duration = min(stats->min_duration, duration);
|
||||
stats->total_duration += duration;
|
||||
bch2_quantiles_update(&stats->quantiles, duration);
|
||||
}
|
||||
|
||||
if (time_after64(end, stats->last_event)) {
|
||||
freq = end - stats->last_event;
|
||||
mean_and_variance_update(&stats->freq_stats, freq);
|
||||
mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq);
|
||||
stats->max_freq = max(stats->max_freq, freq);
|
||||
stats->min_freq = min(stats->min_freq, freq);
|
||||
stats->last_event = end;
|
||||
}
|
||||
}
|
||||
|
||||
static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
|
||||
struct bch2_time_stat_buffer *b)
|
||||
{
|
||||
for (struct bch2_time_stat_buffer_entry *i = b->entries;
|
||||
i < b->entries + ARRAY_SIZE(b->entries);
|
||||
i++)
|
||||
bch2_time_stats_update_one(stats, i->start, i->end);
|
||||
b->nr = 0;
|
||||
}
|
||||
|
||||
static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
|
||||
struct bch2_time_stat_buffer *b)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&stats->lock, flags);
|
||||
__bch2_time_stats_clear_buffer(stats, b);
|
||||
spin_unlock_irqrestore(&stats->lock, flags);
|
||||
}
|
||||
|
||||
void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
WARN_ONCE(!stats->duration_stats_weighted.weight ||
|
||||
!stats->freq_stats_weighted.weight,
|
||||
"uninitialized time_stats");
|
||||
|
||||
if (!stats->buffer) {
|
||||
spin_lock_irqsave(&stats->lock, flags);
|
||||
bch2_time_stats_update_one(stats, start, end);
|
||||
|
||||
if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 &&
|
||||
stats->duration_stats.n > 1024)
|
||||
stats->buffer =
|
||||
alloc_percpu_gfp(struct bch2_time_stat_buffer,
|
||||
GFP_ATOMIC);
|
||||
spin_unlock_irqrestore(&stats->lock, flags);
|
||||
} else {
|
||||
struct bch2_time_stat_buffer *b;
|
||||
|
||||
preempt_disable();
|
||||
b = this_cpu_ptr(stats->buffer);
|
||||
|
||||
BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
|
||||
b->entries[b->nr++] = (struct bch2_time_stat_buffer_entry) {
|
||||
.start = start,
|
||||
.end = end
|
||||
};
|
||||
|
||||
if (unlikely(b->nr == ARRAY_SIZE(b->entries)))
|
||||
bch2_time_stats_clear_buffer(stats, b);
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
|
||||
{
|
||||
const struct time_unit *u = pick_time_units(ns);
|
||||
@ -503,19 +363,18 @@ static inline void pr_name_and_units(struct printbuf *out, const char *name, u64
|
||||
|
||||
#define TABSTOP_SIZE 12
|
||||
|
||||
void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats)
|
||||
void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats)
|
||||
{
|
||||
const struct time_unit *u;
|
||||
struct quantiles *quantiles = time_stats_to_quantiles(stats);
|
||||
s64 f_mean = 0, d_mean = 0;
|
||||
u64 q, last_q = 0, f_stddev = 0, d_stddev = 0;
|
||||
int i;
|
||||
u64 f_stddev = 0, d_stddev = 0;
|
||||
|
||||
if (stats->buffer) {
|
||||
int cpu;
|
||||
|
||||
spin_lock_irq(&stats->lock);
|
||||
for_each_possible_cpu(cpu)
|
||||
__bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu));
|
||||
__time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu));
|
||||
spin_unlock_irq(&stats->lock);
|
||||
}
|
||||
|
||||
@ -570,14 +429,14 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
|
||||
prt_tab(out);
|
||||
bch2_pr_time_units_aligned(out, d_mean);
|
||||
prt_tab(out);
|
||||
bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted));
|
||||
bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "stddev:");
|
||||
prt_tab(out);
|
||||
bch2_pr_time_units_aligned(out, d_stddev);
|
||||
prt_tab(out);
|
||||
bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted));
|
||||
bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
prt_newline(out);
|
||||
@ -593,53 +452,38 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
|
||||
prt_tab(out);
|
||||
bch2_pr_time_units_aligned(out, f_mean);
|
||||
prt_tab(out);
|
||||
bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted));
|
||||
bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "stddev:");
|
||||
prt_tab(out);
|
||||
bch2_pr_time_units_aligned(out, f_stddev);
|
||||
prt_tab(out);
|
||||
bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted));
|
||||
bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
prt_newline(out);
|
||||
|
||||
printbuf_tabstops_reset(out);
|
||||
|
||||
i = eytzinger0_first(NR_QUANTILES);
|
||||
u = pick_time_units(stats->quantiles.entries[i].m);
|
||||
if (quantiles) {
|
||||
int i = eytzinger0_first(NR_QUANTILES);
|
||||
const struct time_unit *u =
|
||||
pick_time_units(quantiles->entries[i].m);
|
||||
u64 last_q = 0;
|
||||
|
||||
prt_printf(out, "quantiles (%s):\t", u->name);
|
||||
eytzinger0_for_each(i, NR_QUANTILES) {
|
||||
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
|
||||
prt_printf(out, "quantiles (%s):\t", u->name);
|
||||
eytzinger0_for_each(i, NR_QUANTILES) {
|
||||
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
|
||||
|
||||
q = max(stats->quantiles.entries[i].m, last_q);
|
||||
prt_printf(out, "%llu ",
|
||||
div_u64(q, u->nsecs));
|
||||
if (is_last)
|
||||
prt_newline(out);
|
||||
last_q = q;
|
||||
u64 q = max(quantiles->entries[i].m, last_q);
|
||||
prt_printf(out, "%llu ", div_u64(q, u->nsecs));
|
||||
if (is_last)
|
||||
prt_newline(out);
|
||||
last_q = q;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) {}
|
||||
#endif
|
||||
|
||||
void bch2_time_stats_exit(struct bch2_time_stats *stats)
|
||||
{
|
||||
free_percpu(stats->buffer);
|
||||
}
|
||||
|
||||
void bch2_time_stats_init(struct bch2_time_stats *stats)
|
||||
{
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
stats->duration_stats_weighted.weight = 8;
|
||||
stats->freq_stats_weighted.weight = 8;
|
||||
stats->min_duration = U64_MAX;
|
||||
stats->min_freq = U64_MAX;
|
||||
spin_lock_init(&stats->lock);
|
||||
}
|
||||
|
||||
/* ratelimit: */
|
||||
|
||||
@ -863,171 +707,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
|
||||
}
|
||||
}
|
||||
|
||||
static int alignment_ok(const void *base, size_t align)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
|
||||
((unsigned long)base & (align - 1)) == 0;
|
||||
}
|
||||
|
||||
static void u32_swap(void *a, void *b, size_t size)
|
||||
{
|
||||
u32 t = *(u32 *)a;
|
||||
*(u32 *)a = *(u32 *)b;
|
||||
*(u32 *)b = t;
|
||||
}
|
||||
|
||||
static void u64_swap(void *a, void *b, size_t size)
|
||||
{
|
||||
u64 t = *(u64 *)a;
|
||||
*(u64 *)a = *(u64 *)b;
|
||||
*(u64 *)b = t;
|
||||
}
|
||||
|
||||
static void generic_swap(void *a, void *b, size_t size)
|
||||
{
|
||||
char t;
|
||||
|
||||
do {
|
||||
t = *(char *)a;
|
||||
*(char *)a++ = *(char *)b;
|
||||
*(char *)b++ = t;
|
||||
} while (--size > 0);
|
||||
}
|
||||
|
||||
static inline int do_cmp(void *base, size_t n, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
size_t l, size_t r)
|
||||
{
|
||||
return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
size);
|
||||
}
|
||||
|
||||
static inline void do_swap(void *base, size_t n, size_t size,
|
||||
void (*swap_func)(void *, void *, size_t),
|
||||
size_t l, size_t r)
|
||||
{
|
||||
swap_func(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
size);
|
||||
}
|
||||
|
||||
void eytzinger0_sort(void *base, size_t n, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t))
|
||||
{
|
||||
int i, c, r;
|
||||
|
||||
if (!swap_func) {
|
||||
if (size == 4 && alignment_ok(base, 4))
|
||||
swap_func = u32_swap;
|
||||
else if (size == 8 && alignment_ok(base, 8))
|
||||
swap_func = u64_swap;
|
||||
else
|
||||
swap_func = generic_swap;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for (i = n / 2 - 1; i >= 0; --i) {
|
||||
for (r = i; r * 2 + 1 < n; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < n &&
|
||||
do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
|
||||
break;
|
||||
|
||||
do_swap(base, n, size, swap_func, r, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - 1; i > 0; --i) {
|
||||
do_swap(base, n, size, swap_func, 0, i);
|
||||
|
||||
for (r = 0; r * 2 + 1 < i; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < i &&
|
||||
do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
|
||||
break;
|
||||
|
||||
do_swap(base, n, size, swap_func, r, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sort_cmp_size(void *base, size_t num, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t size))
|
||||
{
|
||||
/* pre-scale counters for performance */
|
||||
int i = (num/2 - 1) * size, n = num * size, c, r;
|
||||
|
||||
if (!swap_func) {
|
||||
if (size == 4 && alignment_ok(base, 4))
|
||||
swap_func = u32_swap;
|
||||
else if (size == 8 && alignment_ok(base, 8))
|
||||
swap_func = u64_swap;
|
||||
else
|
||||
swap_func = generic_swap;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for ( ; i >= 0; i -= size) {
|
||||
for (r = i; r * 2 + size < n; r = c) {
|
||||
c = r * 2 + size;
|
||||
if (c < n - size &&
|
||||
cmp_func(base + c, base + c + size, size) < 0)
|
||||
c += size;
|
||||
if (cmp_func(base + r, base + c, size) >= 0)
|
||||
break;
|
||||
swap_func(base + r, base + c, size);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - size; i > 0; i -= size) {
|
||||
swap_func(base, base + i, size);
|
||||
for (r = 0; r * 2 + size < i; r = c) {
|
||||
c = r * 2 + size;
|
||||
if (c < i - size &&
|
||||
cmp_func(base + c, base + c + size, size) < 0)
|
||||
c += size;
|
||||
if (cmp_func(base + r, base + c, size) >= 0)
|
||||
break;
|
||||
swap_func(base + r, base + c, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mempool_free_vp(void *element, void *pool_data)
|
||||
{
|
||||
size_t size = (size_t) pool_data;
|
||||
|
||||
vpfree(element, size);
|
||||
}
|
||||
|
||||
static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data)
|
||||
{
|
||||
size_t size = (size_t) pool_data;
|
||||
|
||||
return vpmalloc(size, gfp_mask);
|
||||
}
|
||||
|
||||
int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
|
||||
{
|
||||
return size < PAGE_SIZE
|
||||
? mempool_init_kmalloc_pool(pool, min_nr, size)
|
||||
: mempool_init(pool, min_nr, mempool_alloc_vp,
|
||||
mempool_free_vp, (void *) size);
|
||||
}
|
||||
|
||||
#if 0
|
||||
void eytzinger1_test(void)
|
||||
{
|
||||
|
@ -5,22 +5,21 @@
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/closure.h>
|
||||
#include <linux/darray.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/llist.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/time_stats.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
#include "mean_and_variance.h"
|
||||
|
||||
#include "darray.h"
|
||||
#include <linux/mean_and_variance.h>
|
||||
|
||||
struct closure;
|
||||
|
||||
@ -53,38 +52,6 @@ static inline size_t buf_pages(void *p, size_t len)
|
||||
PAGE_SIZE);
|
||||
}
|
||||
|
||||
static inline void vpfree(void *p, size_t size)
|
||||
{
|
||||
if (is_vmalloc_addr(p))
|
||||
vfree(p);
|
||||
else
|
||||
free_pages((unsigned long) p, get_order(size));
|
||||
}
|
||||
|
||||
static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
|
||||
{
|
||||
return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
|
||||
get_order(size)) ?:
|
||||
__vmalloc(size, gfp_mask);
|
||||
}
|
||||
|
||||
static inline void kvpfree(void *p, size_t size)
|
||||
{
|
||||
if (size < PAGE_SIZE)
|
||||
kfree(p);
|
||||
else
|
||||
vpfree(p, size);
|
||||
}
|
||||
|
||||
static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
|
||||
{
|
||||
return size < PAGE_SIZE
|
||||
? kmalloc(size, gfp_mask)
|
||||
: vpmalloc(size, gfp_mask);
|
||||
}
|
||||
|
||||
int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t);
|
||||
|
||||
#define HEAP(type) \
|
||||
struct { \
|
||||
size_t size, used; \
|
||||
@ -97,13 +64,13 @@ struct { \
|
||||
({ \
|
||||
(heap)->used = 0; \
|
||||
(heap)->size = (_size); \
|
||||
(heap)->data = kvpmalloc((heap)->size * sizeof((heap)->data[0]),\
|
||||
(heap)->data = kvmalloc((heap)->size * sizeof((heap)->data[0]),\
|
||||
(gfp)); \
|
||||
})
|
||||
|
||||
#define free_heap(heap) \
|
||||
do { \
|
||||
kvpfree((heap)->data, (heap)->size * sizeof((heap)->data[0])); \
|
||||
kvfree((heap)->data); \
|
||||
(heap)->data = NULL; \
|
||||
} while (0)
|
||||
|
||||
@ -361,83 +328,7 @@ static inline void prt_bdevname(struct printbuf *out, struct block_device *bdev)
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NR_QUANTILES 15
|
||||
#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES)
|
||||
#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
|
||||
#define QUANTILE_LAST eytzinger0_last(NR_QUANTILES)
|
||||
|
||||
struct bch2_quantiles {
|
||||
struct bch2_quantile_entry {
|
||||
u64 m;
|
||||
u64 step;
|
||||
} entries[NR_QUANTILES];
|
||||
};
|
||||
|
||||
struct bch2_time_stat_buffer {
|
||||
unsigned nr;
|
||||
struct bch2_time_stat_buffer_entry {
|
||||
u64 start;
|
||||
u64 end;
|
||||
} entries[32];
|
||||
};
|
||||
|
||||
struct bch2_time_stats {
|
||||
spinlock_t lock;
|
||||
/* all fields are in nanoseconds */
|
||||
u64 min_duration;
|
||||
u64 max_duration;
|
||||
u64 total_duration;
|
||||
u64 max_freq;
|
||||
u64 min_freq;
|
||||
u64 last_event;
|
||||
struct bch2_quantiles quantiles;
|
||||
|
||||
struct mean_and_variance duration_stats;
|
||||
struct mean_and_variance_weighted duration_stats_weighted;
|
||||
struct mean_and_variance freq_stats;
|
||||
struct mean_and_variance_weighted freq_stats_weighted;
|
||||
struct bch2_time_stat_buffer __percpu *buffer;
|
||||
};
|
||||
|
||||
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
|
||||
void __bch2_time_stats_update(struct bch2_time_stats *stats, u64, u64);
|
||||
|
||||
static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start)
|
||||
{
|
||||
__bch2_time_stats_update(stats, start, local_clock());
|
||||
}
|
||||
|
||||
static inline bool track_event_change(struct bch2_time_stats *stats,
|
||||
u64 *start, bool v)
|
||||
{
|
||||
if (v != !!*start) {
|
||||
if (!v) {
|
||||
bch2_time_stats_update(stats, *start);
|
||||
*start = 0;
|
||||
} else {
|
||||
*start = local_clock() ?: 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
static inline void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) {}
|
||||
static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start) {}
|
||||
static inline bool track_event_change(struct bch2_time_stats *stats,
|
||||
u64 *start, bool v)
|
||||
{
|
||||
bool ret = v && !*start;
|
||||
*start = v;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
void bch2_time_stats_to_text(struct printbuf *, struct bch2_time_stats *);
|
||||
|
||||
void bch2_time_stats_exit(struct bch2_time_stats *);
|
||||
void bch2_time_stats_init(struct bch2_time_stats *);
|
||||
void bch2_time_stats_to_text(struct printbuf *, struct time_stats *);
|
||||
|
||||
#define ewma_add(ewma, val, weight) \
|
||||
({ \
|
||||
@ -738,34 +629,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
|
||||
memset(s + bytes, c, rem);
|
||||
}
|
||||
|
||||
void sort_cmp_size(void *base, size_t num, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t));
|
||||
|
||||
/* just the memmove, doesn't update @_nr */
|
||||
#define __array_insert_item(_array, _nr, _pos) \
|
||||
memmove(&(_array)[(_pos) + 1], \
|
||||
&(_array)[(_pos)], \
|
||||
sizeof((_array)[0]) * ((_nr) - (_pos)))
|
||||
|
||||
#define array_insert_item(_array, _nr, _pos, _new_item) \
|
||||
do { \
|
||||
__array_insert_item(_array, _nr, _pos); \
|
||||
(_nr)++; \
|
||||
(_array)[(_pos)] = (_new_item); \
|
||||
} while (0)
|
||||
|
||||
#define array_remove_items(_array, _nr, _pos, _nr_to_remove) \
|
||||
do { \
|
||||
(_nr) -= (_nr_to_remove); \
|
||||
memmove(&(_array)[(_pos)], \
|
||||
&(_array)[(_pos) + (_nr_to_remove)], \
|
||||
sizeof((_array)[0]) * ((_nr) - (_pos))); \
|
||||
} while (0)
|
||||
|
||||
#define array_remove_item(_array, _nr, _pos) \
|
||||
array_remove_items(_array, _nr, _pos, 1)
|
||||
|
||||
static inline void __move_gap(void *array, size_t element_size,
|
||||
size_t nr, size_t size,
|
||||
size_t old_gap, size_t new_gap)
|
||||
|
@ -1,10 +1,13 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* (C) 2022-2024 Kent Overstreet <kent.overstreet@linux.dev>
|
||||
*/
|
||||
|
||||
#include <linux/darray.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/slab.h>
|
||||
#include "darray.h"
|
||||
|
||||
int __bch2_darray_resize(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp)
|
||||
int __darray_resize_slowpath(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp)
|
||||
{
|
||||
if (new_size > d->size) {
|
||||
new_size = roundup_pow_of_two(new_size);
|
@ -40,10 +40,9 @@
|
||||
#include <linux/limits.h>
|
||||
#include <linux/math.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/mean_and_variance.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include "mean_and_variance.h"
|
||||
|
||||
u128_u u128_div(u128_u n, u64 d)
|
||||
{
|
||||
u128_u r;
|
||||
@ -107,10 +106,11 @@ EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev);
|
||||
* see linked pdf: function derived from equations 140-143 where alpha = 2^w.
|
||||
* values are stored bitshifted for performance and added precision.
|
||||
*/
|
||||
void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 x)
|
||||
void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s,
|
||||
s64 x, bool initted, u8 weight)
|
||||
{
|
||||
// previous weighted variance.
|
||||
u8 w = s->weight;
|
||||
u8 w = weight;
|
||||
u64 var_w0 = s->variance;
|
||||
// new value weighted.
|
||||
s64 x_w = x << w;
|
||||
@ -119,14 +119,13 @@ void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64
|
||||
// new mean weighted.
|
||||
s64 u_w1 = s->mean + diff;
|
||||
|
||||
if (!s->init) {
|
||||
if (!initted) {
|
||||
s->mean = x_w;
|
||||
s->variance = 0;
|
||||
} else {
|
||||
s->mean = u_w1;
|
||||
s->variance = ((var_w0 << w) - var_w0 + ((diff_w * (x_w - u_w1)) >> w)) >> w;
|
||||
}
|
||||
s->init = true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
|
||||
|
||||
@ -134,9 +133,10 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
|
||||
* mean_and_variance_weighted_get_mean() - get mean from @s
|
||||
* @s: mean and variance number of samples and their sums
|
||||
*/
|
||||
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s)
|
||||
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s,
|
||||
u8 weight)
|
||||
{
|
||||
return fast_divpow2(s.mean, s.weight);
|
||||
return fast_divpow2(s.mean, weight);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
|
||||
|
||||
@ -144,10 +144,11 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
|
||||
* mean_and_variance_weighted_get_variance() -- get variance from @s
|
||||
* @s: mean and variance number of samples and their sums
|
||||
*/
|
||||
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s)
|
||||
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s,
|
||||
u8 weight)
|
||||
{
|
||||
// always positive don't need fast divpow2
|
||||
return s.variance >> s.weight;
|
||||
return s.variance >> weight;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
|
||||
|
||||
@ -155,9 +156,10 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
|
||||
* mean_and_variance_weighted_get_stddev() - get standard deviation from @s
|
||||
* @s: mean and variance number of samples and their sums
|
||||
*/
|
||||
u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s)
|
||||
u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s,
|
||||
u8 weight)
|
||||
{
|
||||
return int_sqrt64(mean_and_variance_weighted_get_variance(s));
|
||||
return int_sqrt64(mean_and_variance_weighted_get_variance(s, weight));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_stddev);
|
||||
|
@ -522,6 +522,19 @@ void mempool_kfree(void *element, void *pool_data)
|
||||
}
|
||||
EXPORT_SYMBOL(mempool_kfree);
|
||||
|
||||
void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data)
|
||||
{
|
||||
size_t size = (size_t)pool_data;
|
||||
return kvmalloc(size, gfp_mask);
|
||||
}
|
||||
EXPORT_SYMBOL(mempool_kvmalloc);
|
||||
|
||||
void mempool_kvfree(void *element, void *pool_data)
|
||||
{
|
||||
kvfree(element);
|
||||
}
|
||||
EXPORT_SYMBOL(mempool_kvfree);
|
||||
|
||||
/*
|
||||
* A simple mempool-backed page allocator that allocates pages
|
||||
* of the order specified by pool_data.
|
||||
|
368
linux/sort.c
Normal file
368
linux/sort.c
Normal file
@ -0,0 +1,368 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* A fast, small, non-recursive O(n log n) sort for the Linux kernel
|
||||
*
|
||||
* This performs n*log2(n) + 0.37*n + o(n) comparisons on average,
|
||||
* and 1.5*n*log2(n) + O(n) in the (very contrived) worst case.
|
||||
*
|
||||
* Glibc qsort() manages n*log2(n) - 1.26*n for random inputs (1.63*n
|
||||
* better) at the expense of stack usage and much larger code to avoid
|
||||
* quicksort's O(n^2) worst case.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
/**
|
||||
* is_aligned - is this pointer & size okay for word-wide copying?
|
||||
* @base: pointer to data
|
||||
* @size: size of each element
|
||||
* @align: required alignment (typically 4 or 8)
|
||||
*
|
||||
* Returns true if elements can be copied using word loads and stores.
|
||||
* The size must be a multiple of the alignment, and the base address must
|
||||
* be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
|
||||
*
|
||||
* For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
|
||||
* to "if ((a | b) & mask)", so we do that by hand.
|
||||
*/
|
||||
__attribute_const__ __always_inline
|
||||
static bool is_aligned(const void *base, size_t size, unsigned char align)
|
||||
{
|
||||
unsigned char lsbits = (unsigned char)size;
|
||||
|
||||
(void)base;
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
lsbits |= (unsigned char)(uintptr_t)base;
|
||||
#endif
|
||||
return (lsbits & (align - 1)) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_words_32 - swap two elements in 32-bit chunks
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size (must be a multiple of 4)
|
||||
*
|
||||
* Exchange the two objects in memory. This exploits base+index addressing,
|
||||
* which basically all CPUs have, to minimize loop overhead computations.
|
||||
*
|
||||
* For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
|
||||
* bottom of the loop, even though the zero flag is still valid from the
|
||||
* subtract (since the intervening mov instructions don't alter the flags).
|
||||
* Gcc 8.1.0 doesn't have that problem.
|
||||
*/
|
||||
static void swap_words_32(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
u32 t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_words_64 - swap two elements in 64-bit chunks
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size (must be a multiple of 8)
|
||||
*
|
||||
* Exchange the two objects in memory. This exploits base+index
|
||||
* addressing, which basically all CPUs have, to minimize loop overhead
|
||||
* computations.
|
||||
*
|
||||
* We'd like to use 64-bit loads if possible. If they're not, emulating
|
||||
* one requires base+index+4 addressing which x86 has but most other
|
||||
* processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
|
||||
* but it's possible to have 64-bit loads without 64-bit pointers (e.g.
|
||||
* x32 ABI). Are there any cases the kernel needs to worry about?
|
||||
*/
|
||||
static void swap_words_64(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
#ifdef CONFIG_64BIT
|
||||
u64 t = *(u64 *)(a + (n -= 8));
|
||||
*(u64 *)(a + n) = *(u64 *)(b + n);
|
||||
*(u64 *)(b + n) = t;
|
||||
#else
|
||||
/* Use two 32-bit transfers to avoid base+index+4 addressing */
|
||||
u32 t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
|
||||
t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
#endif
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_bytes - swap two elements a byte at a time
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size
|
||||
*
|
||||
* This is the fallback if alignment doesn't allow using larger chunks.
|
||||
*/
|
||||
static void swap_bytes(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
char t = ((char *)a)[--n];
|
||||
((char *)a)[n] = ((char *)b)[n];
|
||||
((char *)b)[n] = t;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/*
|
||||
* The values are arbitrary as long as they can't be confused with
|
||||
* a pointer, but small integers make for the smallest compare
|
||||
* instructions.
|
||||
*/
|
||||
#define SWAP_WORDS_64 (swap_r_func_t)0
|
||||
#define SWAP_WORDS_32 (swap_r_func_t)1
|
||||
#define SWAP_BYTES (swap_r_func_t)2
|
||||
#define SWAP_WRAPPER (swap_r_func_t)3
|
||||
|
||||
struct wrapper {
|
||||
cmp_func_t cmp;
|
||||
swap_func_t swap;
|
||||
};
|
||||
|
||||
/*
|
||||
* The function pointer is last to make tail calls most efficient if the
|
||||
* compiler decides not to inline this function.
|
||||
*/
|
||||
static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
|
||||
{
|
||||
if (swap_func == SWAP_WRAPPER) {
|
||||
((const struct wrapper *)priv)->swap(a, b, (int)size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (swap_func == SWAP_WORDS_64)
|
||||
swap_words_64(a, b, size);
|
||||
else if (swap_func == SWAP_WORDS_32)
|
||||
swap_words_32(a, b, size);
|
||||
else if (swap_func == SWAP_BYTES)
|
||||
swap_bytes(a, b, size);
|
||||
else
|
||||
swap_func(a, b, (int)size, priv);
|
||||
}
|
||||
|
||||
#define _CMP_WRAPPER ((cmp_r_func_t)0L)
|
||||
|
||||
static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv)
|
||||
{
|
||||
if (cmp == _CMP_WRAPPER)
|
||||
return ((const struct wrapper *)priv)->cmp(a, b);
|
||||
return cmp(a, b, priv);
|
||||
}
|
||||
|
||||
/**
|
||||
* parent - given the offset of the child, find the offset of the parent.
|
||||
* @i: the offset of the heap element whose parent is sought. Non-zero.
|
||||
* @lsbit: a precomputed 1-bit mask, equal to "size & -size"
|
||||
* @size: size of each element
|
||||
*
|
||||
* In terms of array indexes, the parent of element j = @i/@size is simply
|
||||
* (j-1)/2. But when working in byte offsets, we can't use implicit
|
||||
* truncation of integer divides.
|
||||
*
|
||||
* Fortunately, we only need one bit of the quotient, not the full divide.
|
||||
* @size has a least significant bit. That bit will be clear if @i is
|
||||
* an even multiple of @size, and set if it's an odd multiple.
|
||||
*
|
||||
* Logically, we're doing "if (i & lsbit) i -= size;", but since the
|
||||
* branch is unpredictable, it's done with a bit of clever branch-free
|
||||
* code instead.
|
||||
*/
|
||||
__attribute_const__ __always_inline
|
||||
static size_t parent(size_t i, unsigned int lsbit, size_t size)
|
||||
{
|
||||
i -= size;
|
||||
i -= size & -(i & lsbit);
|
||||
return i / 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* sort_r - sort an array of elements
|
||||
* @base: pointer to data to sort
|
||||
* @num: number of elements
|
||||
* @size: size of each element
|
||||
* @cmp_func: pointer to comparison function
|
||||
* @swap_func: pointer to swap function or NULL
|
||||
* @priv: third argument passed to comparison function
|
||||
*
|
||||
* This function does a heapsort on the given array. You may provide
|
||||
* a swap_func function if you need to do something more than a memory
|
||||
* copy (e.g. fix up pointers or auxiliary data), but the built-in swap
|
||||
* avoids a slow retpoline and so is significantly faster.
|
||||
*
|
||||
* Sorting time is O(n log n) both on average and worst-case. While
|
||||
* quicksort is slightly faster on average, it suffers from exploitable
|
||||
* O(n*n) worst-case behavior and extra memory requirements that make
|
||||
* it less suitable for kernel use.
|
||||
*/
|
||||
void sort_r(void *base, size_t num, size_t size,
|
||||
cmp_r_func_t cmp_func,
|
||||
swap_r_func_t swap_func,
|
||||
const void *priv)
|
||||
{
|
||||
/* pre-scale counters for performance */
|
||||
size_t n = num * size, a = (num/2) * size;
|
||||
const unsigned int lsbit = size & -size; /* Used to find parent */
|
||||
|
||||
if (!a) /* num < 2 || size == 0 */
|
||||
return;
|
||||
|
||||
/* called from 'sort' without swap function, let's pick the default */
|
||||
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap)
|
||||
swap_func = NULL;
|
||||
|
||||
if (!swap_func) {
|
||||
if (is_aligned(base, size, 8))
|
||||
swap_func = SWAP_WORDS_64;
|
||||
else if (is_aligned(base, size, 4))
|
||||
swap_func = SWAP_WORDS_32;
|
||||
else
|
||||
swap_func = SWAP_BYTES;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop invariants:
|
||||
* 1. elements [a,n) satisfy the heap property (compare greater than
|
||||
* all of their children),
|
||||
* 2. elements [n,num*size) are sorted, and
|
||||
* 3. a <= b <= c <= d <= n (whenever they are valid).
|
||||
*/
|
||||
for (;;) {
|
||||
size_t b, c, d;
|
||||
|
||||
if (a) /* Building heap: sift down --a */
|
||||
a -= size;
|
||||
else if (n -= size) /* Sorting: Extract root to --n */
|
||||
do_swap(base, base + n, size, swap_func, priv);
|
||||
else /* Sort complete */
|
||||
break;
|
||||
|
||||
/*
|
||||
* Sift element at "a" down into heap. This is the
|
||||
* "bottom-up" variant, which significantly reduces
|
||||
* calls to cmp_func(): we find the sift-down path all
|
||||
* the way to the leaves (one compare per level), then
|
||||
* backtrack to find where to insert the target element.
|
||||
*
|
||||
* Because elements tend to sift down close to the leaves,
|
||||
* this uses fewer compares than doing two per level
|
||||
* on the way down. (A bit more than half as many on
|
||||
* average, 3/4 worst-case.)
|
||||
*/
|
||||
for (b = a; c = 2*b + size, (d = c + size) < n;)
|
||||
b = do_cmp(base + c, base + d, cmp_func, priv) >= 0 ? c : d;
|
||||
if (d == n) /* Special case last leaf with no sibling */
|
||||
b = c;
|
||||
|
||||
/* Now backtrack from "b" to the correct location for "a" */
|
||||
while (b != a && do_cmp(base + a, base + b, cmp_func, priv) >= 0)
|
||||
b = parent(b, lsbit, size);
|
||||
c = b; /* Where "a" belongs */
|
||||
while (b != a) { /* Shift it into place */
|
||||
b = parent(b, lsbit, size);
|
||||
do_swap(base + b, base + c, size, swap_func, priv);
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(sort_r);
|
||||
|
||||
#include <linux/eytzinger.h>
|
||||
|
||||
static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
|
||||
cmp_r_func_t cmp_func, const void *priv,
|
||||
size_t l, size_t r)
|
||||
{
|
||||
return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
cmp_func, priv);
|
||||
}
|
||||
|
||||
static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
|
||||
swap_r_func_t swap_func, const void *priv,
|
||||
size_t l, size_t r)
|
||||
{
|
||||
do_swap(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
size, swap_func, priv);
|
||||
}
|
||||
|
||||
void eytzinger0_sort_r(void *base, size_t n, size_t size,
|
||||
cmp_r_func_t cmp_func,
|
||||
swap_r_func_t swap_func,
|
||||
const void *priv)
|
||||
{
|
||||
int i, c, r;
|
||||
|
||||
/* called from 'sort' without swap function, let's pick the default */
|
||||
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap)
|
||||
swap_func = NULL;
|
||||
|
||||
if (!swap_func) {
|
||||
if (is_aligned(base, size, 8))
|
||||
swap_func = SWAP_WORDS_64;
|
||||
else if (is_aligned(base, size, 4))
|
||||
swap_func = SWAP_WORDS_32;
|
||||
else
|
||||
swap_func = SWAP_BYTES;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for (i = n / 2 - 1; i >= 0; --i) {
|
||||
for (r = i; r * 2 + 1 < n; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < n &&
|
||||
eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
|
||||
break;
|
||||
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - 1; i > 0; --i) {
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
|
||||
|
||||
for (r = 0; r * 2 + 1 < i; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < i &&
|
||||
eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
|
||||
break;
|
||||
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(eytzinger0_sort_r);
|
||||
|
||||
void eytzinger0_sort(void *base, size_t n, size_t size,
|
||||
cmp_func_t cmp_func,
|
||||
swap_func_t swap_func)
|
||||
{
|
||||
struct wrapper w = {
|
||||
.cmp = cmp_func,
|
||||
.swap = swap_func,
|
||||
};
|
||||
|
||||
return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(eytzinger0_sort);
|
373
linux/time_stats.c
Normal file
373
linux/time_stats.c
Normal file
@ -0,0 +1,373 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/eytzinger.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/time_stats.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
static const struct time_unit time_units[] = {
|
||||
{ "ns", 1 },
|
||||
{ "us", NSEC_PER_USEC },
|
||||
{ "ms", NSEC_PER_MSEC },
|
||||
{ "s", NSEC_PER_SEC },
|
||||
{ "m", (u64) NSEC_PER_SEC * 60},
|
||||
{ "h", (u64) NSEC_PER_SEC * 3600},
|
||||
{ "d", (u64) NSEC_PER_SEC * 3600 * 24},
|
||||
{ "w", (u64) NSEC_PER_SEC * 3600 * 24 * 7},
|
||||
{ "y", (u64) NSEC_PER_SEC * ((3600 * 24 * 7 * 365) + (3600 * (24 / 4) * 7))}, /* 365.25d */
|
||||
{ "eon", U64_MAX },
|
||||
};
|
||||
|
||||
const struct time_unit *pick_time_units(u64 ns)
|
||||
{
|
||||
const struct time_unit *u;
|
||||
|
||||
for (u = time_units;
|
||||
u + 1 < time_units + ARRAY_SIZE(time_units) &&
|
||||
ns >= u[1].nsecs << 1;
|
||||
u++)
|
||||
;
|
||||
|
||||
return u;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pick_time_units);
|
||||
|
||||
static void quantiles_update(struct quantiles *q, u64 v)
|
||||
{
|
||||
unsigned i = 0;
|
||||
|
||||
while (i < ARRAY_SIZE(q->entries)) {
|
||||
struct quantile_entry *e = q->entries + i;
|
||||
|
||||
if (unlikely(!e->step)) {
|
||||
e->m = v;
|
||||
e->step = max_t(unsigned, v / 2, 1024);
|
||||
} else if (e->m > v) {
|
||||
e->m = e->m >= e->step
|
||||
? e->m - e->step
|
||||
: 0;
|
||||
} else if (e->m < v) {
|
||||
e->m = e->m + e->step > e->m
|
||||
? e->m + e->step
|
||||
: U32_MAX;
|
||||
}
|
||||
|
||||
if ((e->m > v ? e->m - v : v - e->m) < e->step)
|
||||
e->step = max_t(unsigned, e->step / 2, 1);
|
||||
|
||||
if (v >= e->m)
|
||||
break;
|
||||
|
||||
i = eytzinger0_child(i, v > e->m);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void time_stats_update_one(struct time_stats *stats,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
u64 duration, freq;
|
||||
bool initted = stats->last_event != 0;
|
||||
|
||||
if (time_after64(end, start)) {
|
||||
struct quantiles *quantiles = time_stats_to_quantiles(stats);
|
||||
|
||||
duration = end - start;
|
||||
mean_and_variance_update(&stats->duration_stats, duration);
|
||||
mean_and_variance_weighted_update(&stats->duration_stats_weighted,
|
||||
duration, initted, TIME_STATS_MV_WEIGHT);
|
||||
stats->max_duration = max(stats->max_duration, duration);
|
||||
stats->min_duration = min(stats->min_duration, duration);
|
||||
stats->total_duration += duration;
|
||||
|
||||
if (quantiles)
|
||||
quantiles_update(quantiles, duration);
|
||||
}
|
||||
|
||||
if (stats->last_event && time_after64(end, stats->last_event)) {
|
||||
freq = end - stats->last_event;
|
||||
mean_and_variance_update(&stats->freq_stats, freq);
|
||||
mean_and_variance_weighted_update(&stats->freq_stats_weighted,
|
||||
freq, initted, TIME_STATS_MV_WEIGHT);
|
||||
stats->max_freq = max(stats->max_freq, freq);
|
||||
stats->min_freq = min(stats->min_freq, freq);
|
||||
}
|
||||
|
||||
stats->last_event = end;
|
||||
}
|
||||
|
||||
void __time_stats_clear_buffer(struct time_stats *stats,
|
||||
struct time_stat_buffer *b)
|
||||
{
|
||||
for (struct time_stat_buffer_entry *i = b->entries;
|
||||
i < b->entries + ARRAY_SIZE(b->entries);
|
||||
i++)
|
||||
time_stats_update_one(stats, i->start, i->end);
|
||||
b->nr = 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__time_stats_clear_buffer);
|
||||
|
||||
static noinline void time_stats_clear_buffer(struct time_stats *stats,
|
||||
struct time_stat_buffer *b)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&stats->lock, flags);
|
||||
__time_stats_clear_buffer(stats, b);
|
||||
spin_unlock_irqrestore(&stats->lock, flags);
|
||||
}
|
||||
|
||||
void __time_stats_update(struct time_stats *stats, u64 start, u64 end)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!stats->buffer) {
|
||||
spin_lock_irqsave(&stats->lock, flags);
|
||||
time_stats_update_one(stats, start, end);
|
||||
|
||||
if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT) < 32 &&
|
||||
stats->duration_stats.n > 1024)
|
||||
stats->buffer =
|
||||
alloc_percpu_gfp(struct time_stat_buffer,
|
||||
GFP_ATOMIC);
|
||||
spin_unlock_irqrestore(&stats->lock, flags);
|
||||
} else {
|
||||
struct time_stat_buffer *b;
|
||||
|
||||
preempt_disable();
|
||||
b = this_cpu_ptr(stats->buffer);
|
||||
|
||||
BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
|
||||
b->entries[b->nr++] = (struct time_stat_buffer_entry) {
|
||||
.start = start,
|
||||
.end = end
|
||||
};
|
||||
|
||||
if (unlikely(b->nr == ARRAY_SIZE(b->entries)))
|
||||
time_stats_clear_buffer(stats, b);
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__time_stats_update);
|
||||
|
||||
#include <linux/seq_buf.h>
|
||||
|
||||
static void seq_buf_time_units_aligned(struct seq_buf *out, u64 ns)
|
||||
{
|
||||
const struct time_unit *u = pick_time_units(ns);
|
||||
|
||||
seq_buf_printf(out, "%8llu %s", div64_u64(ns, u->nsecs), u->name);
|
||||
}
|
||||
|
||||
static inline u64 time_stats_lifetime(const struct time_stats *stats)
|
||||
{
|
||||
return local_clock() - stats->start_time;
|
||||
}
|
||||
|
||||
void time_stats_to_seq_buf(struct seq_buf *out, struct time_stats *stats,
|
||||
const char *epoch_name, unsigned int flags)
|
||||
{
|
||||
struct quantiles *quantiles = time_stats_to_quantiles(stats);
|
||||
s64 f_mean = 0, d_mean = 0;
|
||||
u64 f_stddev = 0, d_stddev = 0;
|
||||
u64 lifetime = time_stats_lifetime(stats);
|
||||
|
||||
if (stats->buffer) {
|
||||
int cpu;
|
||||
|
||||
spin_lock_irq(&stats->lock);
|
||||
for_each_possible_cpu(cpu)
|
||||
__time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu));
|
||||
spin_unlock_irq(&stats->lock);
|
||||
}
|
||||
|
||||
if (stats->freq_stats.n) {
|
||||
/* avoid divide by zero */
|
||||
f_mean = mean_and_variance_get_mean(stats->freq_stats);
|
||||
f_stddev = mean_and_variance_get_stddev(stats->freq_stats);
|
||||
d_mean = mean_and_variance_get_mean(stats->duration_stats);
|
||||
d_stddev = mean_and_variance_get_stddev(stats->duration_stats);
|
||||
} else if (flags & TIME_STATS_PRINT_NO_ZEROES) {
|
||||
/* unless we didn't want zeroes anyway */
|
||||
return;
|
||||
}
|
||||
|
||||
seq_buf_printf(out, "count: %llu\n", stats->duration_stats.n);
|
||||
seq_buf_printf(out, "lifetime: ");
|
||||
seq_buf_time_units_aligned(out, lifetime);
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " since %-12s recent\n", epoch_name);
|
||||
|
||||
seq_buf_printf(out, "duration of events\n");
|
||||
|
||||
seq_buf_printf(out, " min: ");
|
||||
seq_buf_time_units_aligned(out, stats->min_duration);
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " max: ");
|
||||
seq_buf_time_units_aligned(out, stats->max_duration);
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " total: ");
|
||||
seq_buf_time_units_aligned(out, stats->total_duration);
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " mean: ");
|
||||
seq_buf_time_units_aligned(out, d_mean);
|
||||
seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " stddev: ");
|
||||
seq_buf_time_units_aligned(out, d_stddev);
|
||||
seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, "time between events\n");
|
||||
|
||||
seq_buf_printf(out, " min: ");
|
||||
seq_buf_time_units_aligned(out, stats->min_freq);
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " max: ");
|
||||
seq_buf_time_units_aligned(out, stats->max_freq);
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " mean: ");
|
||||
seq_buf_time_units_aligned(out, f_mean);
|
||||
seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
seq_buf_printf(out, " stddev: ");
|
||||
seq_buf_time_units_aligned(out, f_stddev);
|
||||
seq_buf_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
|
||||
seq_buf_printf(out, "\n");
|
||||
|
||||
if (quantiles) {
|
||||
int i = eytzinger0_first(NR_QUANTILES);
|
||||
const struct time_unit *u =
|
||||
pick_time_units(quantiles->entries[i].m);
|
||||
u64 last_q = 0;
|
||||
|
||||
seq_buf_printf(out, "quantiles (%s):\t", u->name);
|
||||
eytzinger0_for_each(i, NR_QUANTILES) {
|
||||
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
|
||||
|
||||
u64 q = max(quantiles->entries[i].m, last_q);
|
||||
seq_buf_printf(out, "%llu ", div_u64(q, u->nsecs));
|
||||
if (is_last)
|
||||
seq_buf_printf(out, "\n");
|
||||
last_q = q;
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(time_stats_to_seq_buf);
|
||||
|
||||
void time_stats_to_json(struct seq_buf *out, struct time_stats *stats,
|
||||
const char *epoch_name, unsigned int flags)
|
||||
{
|
||||
struct quantiles *quantiles = time_stats_to_quantiles(stats);
|
||||
s64 f_mean = 0, d_mean = 0;
|
||||
u64 f_stddev = 0, d_stddev = 0;
|
||||
|
||||
if (stats->buffer) {
|
||||
int cpu;
|
||||
|
||||
spin_lock_irq(&stats->lock);
|
||||
for_each_possible_cpu(cpu)
|
||||
__time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu));
|
||||
spin_unlock_irq(&stats->lock);
|
||||
}
|
||||
|
||||
if (stats->freq_stats.n) {
|
||||
/* avoid divide by zero */
|
||||
f_mean = mean_and_variance_get_mean(stats->freq_stats);
|
||||
f_stddev = mean_and_variance_get_stddev(stats->freq_stats);
|
||||
d_mean = mean_and_variance_get_mean(stats->duration_stats);
|
||||
d_stddev = mean_and_variance_get_stddev(stats->duration_stats);
|
||||
} else if (flags & TIME_STATS_PRINT_NO_ZEROES) {
|
||||
/* unless we didn't want zeroes anyway */
|
||||
return;
|
||||
}
|
||||
|
||||
seq_buf_printf(out, "{\n");
|
||||
seq_buf_printf(out, " \"epoch\": \"%s\",\n", epoch_name);
|
||||
seq_buf_printf(out, " \"count\": %llu,\n", stats->duration_stats.n);
|
||||
|
||||
seq_buf_printf(out, " \"duration_ns\": {\n");
|
||||
seq_buf_printf(out, " \"min\": %llu,\n", stats->min_duration);
|
||||
seq_buf_printf(out, " \"max\": %llu,\n", stats->max_duration);
|
||||
seq_buf_printf(out, " \"total\": %llu,\n", stats->total_duration);
|
||||
seq_buf_printf(out, " \"mean\": %llu,\n", d_mean);
|
||||
seq_buf_printf(out, " \"stddev\": %llu\n", d_stddev);
|
||||
seq_buf_printf(out, " },\n");
|
||||
|
||||
d_mean = mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT);
|
||||
d_stddev = mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT);
|
||||
|
||||
seq_buf_printf(out, " \"duration_ewma_ns\": {\n");
|
||||
seq_buf_printf(out, " \"mean\": %llu,\n", d_mean);
|
||||
seq_buf_printf(out, " \"stddev\": %llu\n", d_stddev);
|
||||
seq_buf_printf(out, " },\n");
|
||||
|
||||
seq_buf_printf(out, " \"frequency_ns\": {\n");
|
||||
seq_buf_printf(out, " \"min\": %llu,\n", stats->min_freq);
|
||||
seq_buf_printf(out, " \"max\": %llu,\n", stats->max_freq);
|
||||
seq_buf_printf(out, " \"mean\": %llu,\n", f_mean);
|
||||
seq_buf_printf(out, " \"stddev\": %llu\n", f_stddev);
|
||||
seq_buf_printf(out, " },\n");
|
||||
|
||||
f_mean = mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT);
|
||||
f_stddev = mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT);
|
||||
|
||||
seq_buf_printf(out, " \"frequency_ewma_ns\": {\n");
|
||||
seq_buf_printf(out, " \"mean\": %llu,\n", f_mean);
|
||||
seq_buf_printf(out, " \"stddev\": %llu\n", f_stddev);
|
||||
|
||||
if (quantiles) {
|
||||
u64 last_q = 0;
|
||||
|
||||
/* close frequency_ewma_ns but signal more items */
|
||||
seq_buf_printf(out, " },\n");
|
||||
|
||||
seq_buf_printf(out, " \"quantiles_ns\": [\n");
|
||||
eytzinger0_for_each(i, NR_QUANTILES) {
|
||||
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
|
||||
|
||||
u64 q = max(quantiles->entries[i].m, last_q);
|
||||
seq_buf_printf(out, " %llu", q);
|
||||
if (!is_last)
|
||||
seq_buf_printf(out, ", ");
|
||||
last_q = q;
|
||||
}
|
||||
seq_buf_printf(out, " ]\n");
|
||||
} else {
|
||||
/* close frequency_ewma_ns without dumping further */
|
||||
seq_buf_printf(out, " }\n");
|
||||
}
|
||||
|
||||
seq_buf_printf(out, "}\n");
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(time_stats_to_json);
|
||||
|
||||
void time_stats_exit(struct time_stats *stats)
|
||||
{
|
||||
free_percpu(stats->buffer);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(time_stats_exit);
|
||||
|
||||
void time_stats_init(struct time_stats *stats)
|
||||
{
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
stats->min_duration = U64_MAX;
|
||||
stats->min_freq = U64_MAX;
|
||||
stats->start_time = local_clock();
|
||||
spin_lock_init(&stats->lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(time_stats_init);
|
||||
|
||||
MODULE_AUTHOR("Kent Overstreet");
|
||||
MODULE_LICENSE("GPL");
|
@ -61,7 +61,7 @@ impl BcachefsHandle {
|
||||
pub fn create_subvolume<P: AsRef<Path>>(&self, dst: P) -> Result<(), Errno> {
|
||||
let dst = CString::new(dst.as_ref().as_os_str().as_bytes()).expect("Failed to cast destination path for subvolume in a C-style string");
|
||||
self.ioctl(BcachefsIoctl::SubvolumeCreate, &BcachefsIoctlPayload::Subvolume(bch_ioctl_subvolume {
|
||||
dirfd: libc::AT_FDCWD,
|
||||
dirfd: libc::AT_FDCWD as u32,
|
||||
mode: 0o777,
|
||||
dst_ptr: dst.as_ptr() as u64,
|
||||
..Default::default()
|
||||
@ -73,7 +73,7 @@ impl BcachefsHandle {
|
||||
pub fn delete_subvolume<P: AsRef<Path>>(&self, dst: P) -> Result<(), Errno> {
|
||||
let dst = CString::new(dst.as_ref().as_os_str().as_bytes()).expect("Failed to cast destination path for subvolume in a C-style string");
|
||||
self.ioctl(BcachefsIoctl::SubvolumeDestroy, &BcachefsIoctlPayload::Subvolume(bch_ioctl_subvolume {
|
||||
dirfd: libc::AT_FDCWD,
|
||||
dirfd: libc::AT_FDCWD as u32,
|
||||
mode: 0o777,
|
||||
dst_ptr: dst.as_ptr() as u64,
|
||||
..Default::default()
|
||||
@ -88,7 +88,7 @@ impl BcachefsHandle {
|
||||
|
||||
let res = self.ioctl(BcachefsIoctl::SubvolumeCreate, &BcachefsIoctlPayload::Subvolume(bch_ioctl_subvolume {
|
||||
flags: BCH_SUBVOL_SNAPSHOT_CREATE | extra_flags,
|
||||
dirfd: libc::AT_FDCWD,
|
||||
dirfd: libc::AT_FDCWD as u32,
|
||||
mode: 0o777,
|
||||
src_ptr: src.as_ref().map_or(0, |x| x.as_ptr() as u64),
|
||||
//src_ptr: if let Some(src) = src { src.as_ptr() } else { std::ptr::null() } as u64,
|
||||
|
Loading…
Reference in New Issue
Block a user