mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 3e93567c51 bcachefs: Switch to local_clock() for fastpath time source
This commit is contained in:
parent
3165f53b28
commit
e0a51ccce8
@ -1 +1 @@
|
||||
6ee8a33cee5dfb74a1fb6ff348578fd43aae3a14
|
||||
3e93567c5196ef0c80e2ac3c08295130d858dfd6
|
||||
|
6
Makefile
6
Makefile
@ -199,6 +199,12 @@ update-bcachefs-sources:
|
||||
git add include/linux/printbuf.h
|
||||
cp $(LINUX_DIR)/lib/printbuf.c linux/
|
||||
git add linux/printbuf.c
|
||||
cp $(LINUX_DIR)/lib/math/mean_and_variance.c linux/
|
||||
git add linux/mean_and_variance.c
|
||||
cp $(LINUX_DIR)/include/linux/mean_and_variance.h include/linux/
|
||||
git add include/linux/mean_and_variance.h
|
||||
cp $(LINUX_DIR)/lib/math/int_sqrt.c linux/
|
||||
git add linux/int_sqrt.c
|
||||
cp $(LINUX_DIR)/scripts/Makefile.compiler ./
|
||||
git add Makefile.compiler
|
||||
$(RM) libbcachefs/*.mod.c
|
||||
|
170
include/linux/mean_and_variance.h
Normal file
170
include/linux/mean_and_variance.h
Normal file
@ -0,0 +1,170 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef MEAN_AND_VARIANCE_H_
|
||||
#define MEAN_AND_VARIANCE_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/printbuf.h>
|
||||
|
||||
#define SQRT_U64_MAX 4294967295ULL
|
||||
|
||||
|
||||
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
|
||||
|
||||
typedef unsigned __int128 u128;
|
||||
|
||||
static inline u128 u64_to_u128(u64 a)
|
||||
{
|
||||
return (u128)a;
|
||||
}
|
||||
|
||||
static inline u64 u128_to_u64(u128 a)
|
||||
{
|
||||
return (u64)a;
|
||||
}
|
||||
|
||||
static inline u64 u128_shr64_to_u64(u128 a)
|
||||
{
|
||||
return (u64)(a >> 64);
|
||||
}
|
||||
|
||||
static inline u128 u128_add(u128 a, u128 b)
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
|
||||
static inline u128 u128_sub(u128 a, u128 b)
|
||||
{
|
||||
return a - b;
|
||||
}
|
||||
|
||||
static inline u128 u128_shl(u128 i, s8 shift)
|
||||
{
|
||||
return i << shift;
|
||||
}
|
||||
|
||||
static inline u128 u128_shl64_add(u64 a, u64 b)
|
||||
{
|
||||
return ((u128)a << 64) + b;
|
||||
}
|
||||
|
||||
static inline u128 u128_square(u64 i)
|
||||
{
|
||||
return i*i;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
typedef struct {
|
||||
u64 hi, lo;
|
||||
} u128;
|
||||
|
||||
static inline u128 u64_to_u128(u64 a)
|
||||
{
|
||||
return (u128){ .lo = a };
|
||||
}
|
||||
|
||||
static inline u64 u128_to_u64(u128 a)
|
||||
{
|
||||
return a.lo;
|
||||
}
|
||||
|
||||
static inline u64 u128_shr64_to_u64(u128 a)
|
||||
{
|
||||
return a.hi;
|
||||
}
|
||||
|
||||
static inline u128 u128_add(u128 a, u128 b)
|
||||
{
|
||||
u128 c;
|
||||
|
||||
c.lo = a.lo + b.lo;
|
||||
c.hi = a.hi + b.hi + (c.lo < a.lo);
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline u128 u128_sub(u128 a, u128 b)
|
||||
{
|
||||
u128 c;
|
||||
|
||||
c.lo = a.lo - b.lo;
|
||||
c.hi = a.hi - b.hi - (c.lo > a.lo);
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline u128 u128_shl(u128 i, s8 shift)
|
||||
{
|
||||
u128 r;
|
||||
|
||||
r.lo = i.lo << shift;
|
||||
if (shift < 64)
|
||||
r.hi = (i.hi << shift) | (i.lo >> (64 - shift));
|
||||
else {
|
||||
r.hi = i.lo << (shift - 64);
|
||||
r.lo = 0;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline u128 u128_shl64_add(u64 a, u64 b)
|
||||
{
|
||||
return u128_add(u128_shl(u64_to_u128(a), 64), u64_to_u128(b));
|
||||
}
|
||||
|
||||
static inline u128 u128_square(u64 i)
|
||||
{
|
||||
u128 r;
|
||||
u64 h = i >> 32, l = i & (u64)U32_MAX;
|
||||
|
||||
r = u128_shl(u64_to_u128(h*h), 64);
|
||||
r = u128_add(r, u128_shl(u64_to_u128(h*l), 32));
|
||||
r = u128_add(r, u128_shl(u64_to_u128(l*h), 32));
|
||||
r = u128_add(r, u64_to_u128(l*l));
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline u128 u128_div(u128 n, u64 d)
|
||||
{
|
||||
u128 r;
|
||||
u64 rem;
|
||||
u64 hi = u128_shr64_to_u64(n);
|
||||
u64 lo = u128_to_u64(n);
|
||||
u64 h = hi & ((u64)U32_MAX << 32);
|
||||
u64 l = (hi & (u64)U32_MAX) << 32;
|
||||
|
||||
r = u128_shl(u64_to_u128(div64_u64_rem(h, d, &rem)), 64);
|
||||
r = u128_add(r, u128_shl(u64_to_u128(div64_u64_rem(l + (rem << 32), d, &rem)), 32));
|
||||
r = u128_add(r, u64_to_u128(div64_u64_rem(lo + (rem << 32), d, &rem)));
|
||||
return r;
|
||||
}
|
||||
|
||||
struct mean_and_variance {
|
||||
s64 n;
|
||||
s64 sum;
|
||||
u128 sum_squares;
|
||||
};
|
||||
|
||||
/* expontentially weighted variant */
|
||||
struct mean_and_variance_weighted {
|
||||
bool init;
|
||||
u8 w;
|
||||
s64 mean;
|
||||
u64 variance;
|
||||
};
|
||||
|
||||
inline s64 fast_divpow2(s64 n, u8 d);
|
||||
|
||||
struct mean_and_variance mean_and_variance_update(struct mean_and_variance s1, s64 v1);
|
||||
s64 mean_and_variance_get_mean(struct mean_and_variance s);
|
||||
u64 mean_and_variance_get_variance(struct mean_and_variance s1);
|
||||
u32 mean_and_variance_get_stddev(struct mean_and_variance s);
|
||||
|
||||
struct mean_and_variance_weighted mean_and_variance_weighted_update(struct mean_and_variance_weighted s1, s64 v1);
|
||||
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s);
|
||||
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s);
|
||||
u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s);
|
||||
|
||||
#endif // MEAN_AND_VAIRANCE_H_
|
@ -414,7 +414,8 @@ err:
|
||||
int bch2_get_next_backpointer(struct btree_trans *trans,
|
||||
struct bpos bucket, int gen,
|
||||
u64 *bp_offset,
|
||||
struct bch_backpointer *dst)
|
||||
struct bch_backpointer *dst,
|
||||
unsigned iter_flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bpos bp_pos, bp_end_pos;
|
||||
@ -1023,7 +1024,7 @@ static int check_one_backpointer(struct btree_trans *trans,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret;
|
||||
|
||||
ret = bch2_get_next_backpointer(trans, bucket, -1, bp_offset, &bp);
|
||||
ret = bch2_get_next_backpointer(trans, bucket, -1, bp_offset, &bp, 0);
|
||||
if (ret || *bp_offset == U64_MAX)
|
||||
return ret;
|
||||
|
||||
|
@ -25,7 +25,7 @@ int bch2_bucket_backpointer_del(struct btree_trans *, struct bkey_i_alloc_v4 *,
|
||||
int bch2_bucket_backpointer_add(struct btree_trans *, struct bkey_i_alloc_v4 *,
|
||||
struct bch_backpointer, struct bkey_s_c);
|
||||
int bch2_get_next_backpointer(struct btree_trans *, struct bpos, int,
|
||||
u64 *, struct bch_backpointer *);
|
||||
u64 *, struct bch_backpointer *, unsigned);
|
||||
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *,
|
||||
struct bpos, u64, struct bch_backpointer);
|
||||
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *,
|
||||
|
@ -1979,11 +1979,11 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
gc_btree_gens_key(&trans, &iter, k));
|
||||
if (ret) {
|
||||
if (ret && ret != -EROFS)
|
||||
bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
|
||||
POS_MIN,
|
||||
@ -1992,10 +1992,10 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
bch2_alloc_write_oldest_gen(&trans, &iter, k));
|
||||
if (ret) {
|
||||
if (ret && ret != -EROFS)
|
||||
bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
c->gc_gens_btree = 0;
|
||||
c->gc_gens_pos = POS_MIN;
|
||||
|
@ -772,7 +772,7 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
while (nr && !ret) {
|
||||
while (nr-- && !ret) {
|
||||
if (!bch2_btree_node_relock(trans, path, path->level))
|
||||
break;
|
||||
|
||||
@ -807,7 +807,7 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
while (nr && !ret) {
|
||||
while (nr-- && !ret) {
|
||||
if (!bch2_btree_node_relock(trans, path, path->level))
|
||||
break;
|
||||
|
||||
@ -2386,6 +2386,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
}
|
||||
|
||||
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
|
||||
if (unlikely(!k.k))
|
||||
goto out_no_locked;
|
||||
} else {
|
||||
struct bpos next;
|
||||
|
||||
@ -2783,7 +2785,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
|
||||
|
||||
if (!trans->restarted &&
|
||||
(need_resched() ||
|
||||
ktime_get_ns() - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) {
|
||||
local_clock() - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) {
|
||||
bch2_trans_unlock(trans);
|
||||
cond_resched();
|
||||
bch2_trans_relock(trans);
|
||||
@ -2793,7 +2795,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
|
||||
if (trans->restarted)
|
||||
bch2_btree_path_traverse_all(trans);
|
||||
|
||||
trans->last_begin_time = ktime_get_ns();
|
||||
trans->last_begin_time = local_clock();
|
||||
return trans->restart_count;
|
||||
}
|
||||
|
||||
@ -2850,7 +2852,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *
|
||||
memset(trans, 0, sizeof(*trans));
|
||||
trans->c = c;
|
||||
trans->fn = fn;
|
||||
trans->last_begin_time = ktime_get_ns();
|
||||
trans->last_begin_time = local_clock();
|
||||
trans->fn_idx = bch2_trans_get_fn_idx(trans, c, fn);
|
||||
trans->locking_wait.task = current;
|
||||
closure_init_stack(&trans->ref);
|
||||
|
@ -112,6 +112,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
|
||||
BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
|
||||
|
||||
if (!ck->c.lock.readers) {
|
||||
#ifdef __KERNEL__
|
||||
preempt_disable();
|
||||
f = this_cpu_ptr(bc->pcpu_freed);
|
||||
|
||||
@ -136,6 +137,11 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
|
||||
list_move_tail(&ck->list, &bc->freed_nonpcpu);
|
||||
mutex_unlock(&bc->lock);
|
||||
}
|
||||
#else
|
||||
mutex_lock(&bc->lock);
|
||||
list_move_tail(&ck->list, &bc->freed_nonpcpu);
|
||||
mutex_unlock(&bc->lock);
|
||||
#endif
|
||||
} else {
|
||||
mutex_lock(&bc->lock);
|
||||
list_move_tail(&ck->list, &bc->freed_pcpu);
|
||||
@ -174,6 +180,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path)
|
||||
bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id);
|
||||
|
||||
if (!pcpu_readers) {
|
||||
#ifdef __KERNEL__
|
||||
preempt_disable();
|
||||
f = this_cpu_ptr(bc->pcpu_freed);
|
||||
if (f->nr)
|
||||
@ -196,6 +203,14 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path)
|
||||
preempt_enable();
|
||||
mutex_unlock(&bc->lock);
|
||||
}
|
||||
#else
|
||||
mutex_lock(&bc->lock);
|
||||
if (!list_empty(&bc->freed_nonpcpu)) {
|
||||
ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
|
||||
list_del_init(&ck->list);
|
||||
}
|
||||
mutex_unlock(&bc->lock);
|
||||
#endif
|
||||
} else {
|
||||
mutex_lock(&bc->lock);
|
||||
if (!list_empty(&bc->freed_pcpu)) {
|
||||
@ -228,6 +243,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path)
|
||||
return ck;
|
||||
}
|
||||
|
||||
/* GFP_NOFS because we're holding btree locks: */
|
||||
ck = kmem_cache_alloc(bch2_key_cache, GFP_NOFS|__GFP_ZERO);
|
||||
if (likely(ck)) {
|
||||
INIT_LIST_HEAD(&ck->list);
|
||||
@ -252,6 +268,7 @@ bkey_cached_reuse(struct btree_key_cache *c)
|
||||
struct bkey_cached *ck;
|
||||
unsigned i;
|
||||
|
||||
mutex_lock(&c->lock);
|
||||
rcu_read_lock();
|
||||
tbl = rht_dereference_rcu(c->table.tbl, &c->table);
|
||||
for (i = 0; i < tbl->size; i++)
|
||||
@ -259,14 +276,15 @@ bkey_cached_reuse(struct btree_key_cache *c)
|
||||
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
|
||||
bkey_cached_lock_for_evict(ck)) {
|
||||
bkey_cached_evict(c, ck);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ck = NULL;
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
mutex_unlock(&c->lock);
|
||||
return ck;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct bkey_cached *
|
||||
btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
|
||||
@ -759,12 +777,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
unsigned start, flags;
|
||||
int srcu_idx;
|
||||
|
||||
/* Return -1 if we can't do anything right now */
|
||||
if (sc->gfp_mask & __GFP_FS)
|
||||
mutex_lock(&bc->lock);
|
||||
else if (!mutex_trylock(&bc->lock))
|
||||
return -1;
|
||||
|
||||
srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
flags = memalloc_nofs_save();
|
||||
|
||||
@ -869,7 +882,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
struct bkey_cached *ck, *n;
|
||||
struct rhash_head *pos;
|
||||
unsigned i;
|
||||
#ifdef __KERNEL__
|
||||
int cpu;
|
||||
#endif
|
||||
|
||||
if (bc->shrink.list.next)
|
||||
unregister_shrinker(&bc->shrink);
|
||||
@ -886,6 +901,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
#ifdef __KERNEL__
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct btree_key_cache_freelist *f =
|
||||
per_cpu_ptr(bc->pcpu_freed, cpu);
|
||||
@ -895,6 +911,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
list_add(&ck->list, &bc->freed_nonpcpu);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
|
||||
|
||||
@ -910,10 +927,15 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
}
|
||||
|
||||
BUG_ON(atomic_long_read(&bc->nr_dirty) &&
|
||||
if (atomic_long_read(&bc->nr_dirty) &&
|
||||
!bch2_journal_error(&c->journal) &&
|
||||
test_bit(BCH_FS_WAS_RW, &c->flags));
|
||||
BUG_ON(atomic_long_read(&bc->nr_keys));
|
||||
test_bit(BCH_FS_WAS_RW, &c->flags))
|
||||
panic("btree key cache shutdown error: nr_dirty nonzero (%li)\n",
|
||||
atomic_long_read(&bc->nr_dirty));
|
||||
|
||||
if (atomic_long_read(&bc->nr_keys))
|
||||
panic("btree key cache shutdown error: nr_keys nonzero (%li)\n",
|
||||
atomic_long_read(&bc->nr_keys));
|
||||
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
@ -943,9 +965,11 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
|
||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
||||
int ret;
|
||||
|
||||
#ifdef __KERNEL__
|
||||
bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist);
|
||||
if (!bc->pcpu_freed)
|
||||
return -ENOMEM;
|
||||
#endif
|
||||
|
||||
ret = rhashtable_init(&bc->table, &bch2_btree_key_cache_params);
|
||||
if (ret)
|
||||
|
@ -94,6 +94,37 @@ static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
static void lock_graph_up(struct lock_graph *g)
|
||||
{
|
||||
closure_put(&g->g[--g->nr].trans->ref);
|
||||
}
|
||||
|
||||
static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
|
||||
{
|
||||
closure_get(&trans->ref);
|
||||
|
||||
g->g[g->nr++] = (struct trans_waiting_for_lock) {
|
||||
.trans = trans,
|
||||
.node_want = trans->locking,
|
||||
.lock_want = trans->locking_wait.lock_want,
|
||||
};
|
||||
}
|
||||
|
||||
static bool lock_graph_remove_non_waiters(struct lock_graph *g)
|
||||
{
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
for (i = g->g + 1; i < g->g + g->nr; i++)
|
||||
if (i->trans->locking != i->node_want ||
|
||||
i->trans->locking_wait.start_time != i[-1].lock_start_time) {
|
||||
while (g->g + g->nr > i)
|
||||
lock_graph_up(g);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
|
||||
{
|
||||
if (i == g->g) {
|
||||
@ -106,40 +137,42 @@ static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
|
||||
}
|
||||
}
|
||||
|
||||
static noinline int break_cycle(struct lock_graph *g)
|
||||
static int btree_trans_abort_preference(struct btree_trans *trans)
|
||||
{
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
/*
|
||||
* We'd like to prioritize aborting transactions that have done less
|
||||
* work - but it appears breaking cycles by telling other transactions
|
||||
* to abort may still be buggy:
|
||||
*/
|
||||
#if 0
|
||||
for (i = g->g; i < g->g + g->nr; i++) {
|
||||
if (i->trans->lock_may_not_fail ||
|
||||
i->trans->locking_wait.lock_want == SIX_LOCK_write)
|
||||
continue;
|
||||
|
||||
return abort_lock(g, i);
|
||||
}
|
||||
|
||||
for (i = g->g; i < g->g + g->nr; i++) {
|
||||
if (i->trans->lock_may_not_fail ||
|
||||
!i->trans->in_traverse_all)
|
||||
continue;
|
||||
|
||||
return abort_lock(g, i);
|
||||
}
|
||||
#endif
|
||||
for (i = g->g; i < g->g + g->nr; i++) {
|
||||
if (i->trans->lock_may_not_fail)
|
||||
continue;
|
||||
|
||||
return abort_lock(g, i);
|
||||
if (trans->lock_may_not_fail)
|
||||
return 0;
|
||||
if (trans->locking_wait.lock_want == SIX_LOCK_write)
|
||||
return 1;
|
||||
if (!trans->in_traverse_all)
|
||||
return 2;
|
||||
return 3;
|
||||
}
|
||||
|
||||
static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
|
||||
{
|
||||
struct trans_waiting_for_lock *i, *abort = NULL;
|
||||
unsigned best = 0, pref;
|
||||
int ret;
|
||||
|
||||
if (lock_graph_remove_non_waiters(g))
|
||||
return 0;
|
||||
|
||||
/* Only checking, for debugfs: */
|
||||
if (cycle) {
|
||||
print_cycle(cycle, g);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = g->g; i < g->g + g->nr; i++) {
|
||||
pref = btree_trans_abort_preference(i->trans);
|
||||
if (pref > best) {
|
||||
abort = i;
|
||||
best = pref;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(!best)) {
|
||||
struct bch_fs *c = g->g->trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
@ -162,21 +195,13 @@ static noinline int break_cycle(struct lock_graph *g)
|
||||
printbuf_exit(&buf);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static void lock_graph_pop(struct lock_graph *g)
|
||||
{
|
||||
closure_put(&g->g[--g->nr].trans->ref);
|
||||
}
|
||||
|
||||
static void lock_graph_pop_above(struct lock_graph *g, struct trans_waiting_for_lock *above,
|
||||
struct printbuf *cycle)
|
||||
{
|
||||
if (g->nr > 1 && cycle)
|
||||
print_chain(cycle, g);
|
||||
|
||||
while (g->g + g->nr > above)
|
||||
lock_graph_pop(g);
|
||||
ret = abort_lock(g, abort);
|
||||
out:
|
||||
if (ret)
|
||||
while (g->nr)
|
||||
lock_graph_up(g);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
|
||||
@ -184,67 +209,23 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
|
||||
{
|
||||
struct btree_trans *orig_trans = g->g->trans;
|
||||
struct trans_waiting_for_lock *i;
|
||||
int ret = 0;
|
||||
|
||||
for (i = g->g; i < g->g + g->nr; i++) {
|
||||
if (i->trans->locking != i->node_want) {
|
||||
lock_graph_pop_above(g, i - 1, cycle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (i->trans == trans) {
|
||||
if (cycle) {
|
||||
/* Only checking: */
|
||||
print_cycle(cycle, g);
|
||||
ret = -1;
|
||||
} else {
|
||||
ret = break_cycle(g);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto deadlock;
|
||||
/*
|
||||
* If we didn't abort (instead telling another
|
||||
* transaction to abort), keep checking:
|
||||
*/
|
||||
}
|
||||
}
|
||||
for (i = g->g; i < g->g + g->nr; i++)
|
||||
if (i->trans == trans)
|
||||
return break_cycle(g, cycle);
|
||||
|
||||
if (g->nr == ARRAY_SIZE(g->g)) {
|
||||
if (orig_trans->lock_may_not_fail)
|
||||
return 0;
|
||||
|
||||
while (g->nr)
|
||||
lock_graph_up(g);
|
||||
trace_and_count(trans->c, trans_restart_would_deadlock_recursion_limit, trans, _RET_IP_);
|
||||
ret = btree_trans_restart(orig_trans, BCH_ERR_transaction_restart_deadlock_recursion_limit);
|
||||
goto deadlock;
|
||||
return btree_trans_restart(orig_trans, BCH_ERR_transaction_restart_deadlock_recursion_limit);
|
||||
}
|
||||
|
||||
closure_get(&trans->ref);
|
||||
|
||||
g->g[g->nr++] = (struct trans_waiting_for_lock) {
|
||||
.trans = trans,
|
||||
.node_want = trans->locking,
|
||||
.lock_want = trans->locking_wait.lock_want,
|
||||
};
|
||||
|
||||
lock_graph_down(g, trans);
|
||||
return 0;
|
||||
deadlock:
|
||||
lock_graph_pop_above(g, g->g, cycle);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline void lock_graph_remove_non_waiters(struct lock_graph *g,
|
||||
struct printbuf *cycle)
|
||||
{
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
for (i = g->g + 1; i < g->g + g->nr; i++)
|
||||
if (i->trans->locking != i->node_want ||
|
||||
i->trans->locking_wait.start_time != i[-1].lock_start_time) {
|
||||
lock_graph_pop_above(g, i - 1, cycle);
|
||||
return;
|
||||
}
|
||||
BUG();
|
||||
}
|
||||
|
||||
static bool lock_type_conflicts(enum six_lock_type t1, enum six_lock_type t2)
|
||||
@ -266,8 +247,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
|
||||
}
|
||||
|
||||
g.nr = 0;
|
||||
ret = lock_graph_descend(&g, trans, cycle);
|
||||
BUG_ON(ret);
|
||||
lock_graph_down(&g, trans);
|
||||
next:
|
||||
if (!g.nr)
|
||||
return 0;
|
||||
@ -295,7 +275,7 @@ next:
|
||||
b = &READ_ONCE(path->l[top->level].b)->c;
|
||||
|
||||
if (unlikely(IS_ERR_OR_NULL(b))) {
|
||||
lock_graph_remove_non_waiters(&g, cycle);
|
||||
BUG_ON(!lock_graph_remove_non_waiters(&g));
|
||||
goto next;
|
||||
}
|
||||
|
||||
@ -321,7 +301,7 @@ next:
|
||||
raw_spin_unlock(&b->lock.wait_lock);
|
||||
|
||||
if (ret)
|
||||
return ret < 0 ? ret : 0;
|
||||
return ret;
|
||||
goto next;
|
||||
|
||||
}
|
||||
@ -331,7 +311,7 @@ next:
|
||||
|
||||
if (g.nr > 1 && cycle)
|
||||
print_chain(cycle, &g);
|
||||
lock_graph_pop(&g);
|
||||
lock_graph_up(&g);
|
||||
goto next;
|
||||
}
|
||||
|
||||
|
@ -88,7 +88,7 @@ static inline void mark_btree_node_locked(struct btree_trans *trans,
|
||||
{
|
||||
mark_btree_node_locked_noreset(path, level, type);
|
||||
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
|
||||
path->l[level].lock_taken_time = ktime_get_ns();
|
||||
path->l[level].lock_taken_time = local_clock();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -120,7 +120,7 @@ static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
|
||||
if (s)
|
||||
__bch2_time_stats_update(&s->lock_hold_times,
|
||||
path->l[level].lock_taken_time,
|
||||
ktime_get_ns());
|
||||
local_clock());
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -260,7 +260,7 @@ static inline int btree_node_lock(struct btree_trans *trans,
|
||||
btree_node_lock_increment(trans, b, level, type) ||
|
||||
!(ret = btree_node_lock_nopath(trans, b, type))) {
|
||||
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
|
||||
path->l[b->level].lock_taken_time = ktime_get_ns();
|
||||
path->l[b->level].lock_taken_time = local_clock();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -872,7 +872,9 @@ retry:
|
||||
while (1) {
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
ret = bch2_get_next_backpointer(trans, bucket_pos, bucket.gen, &bp_offset, &bp);
|
||||
ret = bch2_get_next_backpointer(trans, bucket_pos, bucket.gen,
|
||||
&bp_offset, &bp,
|
||||
BTREE_ITER_CACHED);
|
||||
if (ret)
|
||||
break;
|
||||
if (bp_offset == U64_MAX)
|
||||
|
@ -212,6 +212,11 @@ int bch2_link_trans(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (bch2_reinherit_attrs(inode_u, dir_u)) {
|
||||
ret = -EXDEV;
|
||||
goto err;
|
||||
}
|
||||
|
||||
dir_u->bi_mtime = dir_u->bi_ctime = now;
|
||||
|
||||
dir_hash = bch2_hash_info_init(c, dir_u);
|
||||
|
@ -1527,7 +1527,7 @@ out:
|
||||
if (!bch2_page_state_create(page, __GFP_NOFAIL)->uptodate) {
|
||||
ret = bch2_page_state_set(c, inode_inum(inode), &page, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_page_reservation_get(c, inode, page, res,
|
||||
@ -3102,6 +3102,10 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
|
||||
inode_dio_wait(&inode->v);
|
||||
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
|
||||
|
||||
ret = file_modified(file);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE)))
|
||||
ret = bchfs_fallocate(inode, mode, offset, len);
|
||||
else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
|
||||
@ -3112,8 +3116,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
|
||||
ret = bchfs_fcollapse_finsert(inode, offset, len, false);
|
||||
else
|
||||
ret = -EOPNOTSUPP;
|
||||
|
||||
|
||||
err:
|
||||
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
|
||||
inode_unlock(&inode->v);
|
||||
percpu_ref_put(&c->writes);
|
||||
|
@ -26,6 +26,9 @@ struct flags_set {
|
||||
unsigned flags;
|
||||
|
||||
unsigned projid;
|
||||
|
||||
bool set_projinherit;
|
||||
bool projinherit;
|
||||
};
|
||||
|
||||
static int bch2_inode_flags_set(struct bch_inode_info *inode,
|
||||
@ -50,6 +53,11 @@ static int bch2_inode_flags_set(struct bch_inode_info *inode,
|
||||
(newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
|
||||
return -EINVAL;
|
||||
|
||||
if (s->set_projinherit) {
|
||||
bi->bi_fields_set &= ~(1 << Inode_opt_project);
|
||||
bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
|
||||
}
|
||||
|
||||
bi->bi_flags &= ~s->mask;
|
||||
bi->bi_flags |= newflags;
|
||||
|
||||
@ -107,6 +115,10 @@ static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
|
||||
struct fsxattr fa = { 0 };
|
||||
|
||||
fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
|
||||
|
||||
if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
|
||||
fa.fsx_xflags |= FS_XFLAG_PROJINHERIT;
|
||||
|
||||
fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
|
||||
|
||||
return copy_to_user(arg, &fa, sizeof(fa));
|
||||
@ -138,6 +150,10 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
|
||||
if (copy_from_user(&fa, arg, sizeof(fa)))
|
||||
return -EFAULT;
|
||||
|
||||
s.set_projinherit = true;
|
||||
s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0;
|
||||
fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
|
||||
|
||||
s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
|
||||
if (fa.fsx_xflags)
|
||||
return -EOPNOTSUPP;
|
||||
|
@ -419,7 +419,7 @@ static int bch2_mknod(struct user_namespace *mnt_userns,
|
||||
(subvol_inum) { 0 }, 0);
|
||||
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
return bch2_err_class(PTR_ERR(inode));
|
||||
|
||||
d_instantiate(dentry, &inode->v);
|
||||
return 0;
|
||||
@ -529,7 +529,7 @@ static int bch2_symlink(struct user_namespace *mnt_userns,
|
||||
inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
|
||||
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
|
||||
if (unlikely(IS_ERR(inode)))
|
||||
return PTR_ERR(inode);
|
||||
return bch2_err_class(PTR_ERR(inode));
|
||||
|
||||
inode_lock(&inode->v);
|
||||
ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
|
||||
@ -838,7 +838,7 @@ static int bch2_tmpfile(struct user_namespace *mnt_userns,
|
||||
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
|
||||
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
return bch2_err_class(PTR_ERR(inode));
|
||||
|
||||
d_mark_tmpfile(dentry, &inode->v);
|
||||
d_instantiate(dentry, &inode->v);
|
||||
|
@ -17,6 +17,23 @@
|
||||
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static struct nonce journal_nonce(const struct jset *jset)
|
||||
{
|
||||
return (struct nonce) {{
|
||||
[0] = 0,
|
||||
[1] = ((__le32 *) &jset->seq)[0],
|
||||
[2] = ((__le32 *) &jset->seq)[1],
|
||||
[3] = BCH_NONCE_JOURNAL,
|
||||
}};
|
||||
}
|
||||
|
||||
static bool jset_csum_good(struct bch_fs *c, struct jset *j)
|
||||
{
|
||||
return bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)) &&
|
||||
!bch2_crc_cmp(j->csum,
|
||||
csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j));
|
||||
}
|
||||
|
||||
static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq)
|
||||
{
|
||||
return (seq - c->journal_entries_base_seq) & (~0U >> 1);
|
||||
@ -59,8 +76,7 @@ struct journal_list {
|
||||
*/
|
||||
static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct journal_ptr entry_ptr,
|
||||
struct journal_list *jlist, struct jset *j,
|
||||
bool bad)
|
||||
struct journal_list *jlist, struct jset *j)
|
||||
{
|
||||
struct genradix_iter iter;
|
||||
struct journal_replay **_i, *i, *dup;
|
||||
@ -111,38 +127,53 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
|
||||
*/
|
||||
dup = *_i;
|
||||
if (dup) {
|
||||
if (dup->bad) {
|
||||
/* we'll replace @dup: */
|
||||
} else if (bad) {
|
||||
if (bytes == vstruct_bytes(&dup->j) &&
|
||||
!memcmp(j, &dup->j, bytes)) {
|
||||
i = dup;
|
||||
goto found;
|
||||
} else {
|
||||
fsck_err_on(bytes != vstruct_bytes(&dup->j) ||
|
||||
memcmp(j, &dup->j, bytes), c,
|
||||
"found duplicate but non identical journal entries (seq %llu)",
|
||||
}
|
||||
|
||||
if (!entry_ptr.csum_good) {
|
||||
i = dup;
|
||||
goto found;
|
||||
}
|
||||
|
||||
if (!dup->csum_good)
|
||||
goto replace;
|
||||
|
||||
fsck_err(c, "found duplicate but non identical journal entries (seq %llu)",
|
||||
le64_to_cpu(j->seq));
|
||||
i = dup;
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
|
||||
replace:
|
||||
i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
|
||||
if (!i)
|
||||
return -ENOMEM;
|
||||
|
||||
i->nr_ptrs = 0;
|
||||
i->bad = bad;
|
||||
i->csum_good = entry_ptr.csum_good;
|
||||
i->ignore = false;
|
||||
memcpy(&i->j, j, bytes);
|
||||
i->ptrs[i->nr_ptrs++] = entry_ptr;
|
||||
|
||||
if (dup) {
|
||||
i->nr_ptrs = dup->nr_ptrs;
|
||||
memcpy(i->ptrs, dup->ptrs, sizeof(dup->ptrs));
|
||||
if (dup->nr_ptrs >= ARRAY_SIZE(dup->ptrs)) {
|
||||
bch_err(c, "found too many copies of journal entry %llu",
|
||||
le64_to_cpu(i->j.seq));
|
||||
dup->nr_ptrs = ARRAY_SIZE(dup->ptrs) - 1;
|
||||
}
|
||||
|
||||
/* The first ptr should represent the jset we kept: */
|
||||
memcpy(i->ptrs + i->nr_ptrs,
|
||||
dup->ptrs,
|
||||
sizeof(dup->ptrs[0]) * dup->nr_ptrs);
|
||||
i->nr_ptrs += dup->nr_ptrs;
|
||||
__journal_replay_free(c, dup);
|
||||
}
|
||||
|
||||
|
||||
*_i = i;
|
||||
return 0;
|
||||
found:
|
||||
for (ptr = i->ptrs; ptr < i->ptrs + i->nr_ptrs; ptr++) {
|
||||
if (ptr->dev == ca->dev_idx) {
|
||||
@ -164,16 +195,6 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct nonce journal_nonce(const struct jset *jset)
|
||||
{
|
||||
return (struct nonce) {{
|
||||
[0] = 0,
|
||||
[1] = ((__le32 *) &jset->seq)[0],
|
||||
[2] = ((__le32 *) &jset->seq)[1],
|
||||
[3] = BCH_NONCE_JOURNAL,
|
||||
}};
|
||||
}
|
||||
|
||||
/* this fills in a range with empty jset_entries: */
|
||||
static void journal_entry_null_range(void *start, void *end)
|
||||
{
|
||||
@ -715,12 +736,8 @@ fsck_err:
|
||||
static int jset_validate(struct bch_fs *c,
|
||||
struct bch_dev *ca,
|
||||
struct jset *jset, u64 sector,
|
||||
unsigned bucket_sectors_left,
|
||||
unsigned sectors_read,
|
||||
int write)
|
||||
{
|
||||
size_t bytes = vstruct_bytes(jset);
|
||||
struct bch_csum csum;
|
||||
unsigned version;
|
||||
int ret = 0;
|
||||
|
||||
@ -737,21 +754,7 @@ static int jset_validate(struct bch_fs *c,
|
||||
sector, le64_to_cpu(jset->seq),
|
||||
version)) {
|
||||
/* don't try to continue: */
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
if (bytes > (sectors_read << 9) &&
|
||||
sectors_read < bucket_sectors_left)
|
||||
return JOURNAL_ENTRY_REREAD;
|
||||
|
||||
if (journal_entry_err_on(bytes > bucket_sectors_left << 9,
|
||||
c, jset, NULL,
|
||||
"%s sector %llu seq %llu: journal entry too big (%zu bytes)",
|
||||
ca ? ca->name : c->name,
|
||||
sector, le64_to_cpu(jset->seq), bytes)) {
|
||||
ret = JOURNAL_ENTRY_BAD;
|
||||
le32_add_cpu(&jset->u64s,
|
||||
-((bytes - (bucket_sectors_left << 9)) / 8));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (journal_entry_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)),
|
||||
@ -759,28 +762,9 @@ static int jset_validate(struct bch_fs *c,
|
||||
"%s sector %llu seq %llu: journal entry with unknown csum type %llu",
|
||||
ca ? ca->name : c->name,
|
||||
sector, le64_to_cpu(jset->seq),
|
||||
JSET_CSUM_TYPE(jset))) {
|
||||
ret = JOURNAL_ENTRY_BAD;
|
||||
goto csum_done;
|
||||
}
|
||||
|
||||
if (write)
|
||||
goto csum_done;
|
||||
|
||||
csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
|
||||
if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum),
|
||||
c, jset, NULL,
|
||||
"%s sector %llu seq %llu: journal checksum bad",
|
||||
ca ? ca->name : c->name,
|
||||
sector, le64_to_cpu(jset->seq)))
|
||||
JSET_CSUM_TYPE(jset)))
|
||||
ret = JOURNAL_ENTRY_BAD;
|
||||
|
||||
ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
|
||||
jset->encrypted_start,
|
||||
vstruct_end(jset) - (void *) jset->encrypted_start);
|
||||
bch2_fs_fatal_err_on(ret, c,
|
||||
"error decrypting journal entry: %i", ret);
|
||||
csum_done:
|
||||
/* last_seq is ignored when JSET_NO_FLUSH is true */
|
||||
if (journal_entry_err_on(!JSET_NO_FLUSH(jset) &&
|
||||
le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq),
|
||||
@ -791,16 +775,52 @@ csum_done:
|
||||
jset->last_seq = jset->seq;
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
}
|
||||
|
||||
ret = jset_validate_entries(c, jset, write);
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int jset_validate_for_write(struct bch_fs *c, struct jset *jset)
|
||||
static int jset_validate_early(struct bch_fs *c,
|
||||
struct bch_dev *ca,
|
||||
struct jset *jset, u64 sector,
|
||||
unsigned bucket_sectors_left,
|
||||
unsigned sectors_read)
|
||||
{
|
||||
unsigned sectors = vstruct_sectors(jset, c->block_bits);
|
||||
size_t bytes = vstruct_bytes(jset);
|
||||
unsigned version;
|
||||
int write = READ;
|
||||
int ret = 0;
|
||||
|
||||
return jset_validate(c, NULL, jset, 0, sectors, sectors, WRITE) ?:
|
||||
jset_validate_entries(c, jset, WRITE);
|
||||
if (le64_to_cpu(jset->magic) != jset_magic(c))
|
||||
return JOURNAL_ENTRY_NONE;
|
||||
|
||||
version = le32_to_cpu(jset->version);
|
||||
if (journal_entry_err_on((version != BCH_JSET_VERSION_OLD &&
|
||||
version < bcachefs_metadata_version_min) ||
|
||||
version >= bcachefs_metadata_version_max,
|
||||
c, jset, NULL,
|
||||
"%s sector %llu seq %llu: unknown journal entry version %u",
|
||||
ca ? ca->name : c->name,
|
||||
sector, le64_to_cpu(jset->seq),
|
||||
version)) {
|
||||
/* don't try to continue: */
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (bytes > (sectors_read << 9) &&
|
||||
sectors_read < bucket_sectors_left)
|
||||
return JOURNAL_ENTRY_REREAD;
|
||||
|
||||
if (journal_entry_err_on(bytes > bucket_sectors_left << 9,
|
||||
c, jset, NULL,
|
||||
"%s sector %llu seq %llu: journal entry too big (%zu bytes)",
|
||||
ca ? ca->name : c->name,
|
||||
sector, le64_to_cpu(jset->seq), bytes))
|
||||
le32_add_cpu(&jset->u64s,
|
||||
-((bytes - (bucket_sectors_left << 9)) / 8));
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct journal_read_buf {
|
||||
@ -839,7 +859,7 @@ static int journal_read_bucket(struct bch_dev *ca,
|
||||
unsigned sectors, sectors_read = 0;
|
||||
u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
|
||||
end = offset + ca->mi.bucket_size;
|
||||
bool saw_bad = false;
|
||||
bool saw_bad = false, csum_good;
|
||||
int ret = 0;
|
||||
|
||||
pr_debug("reading %u", bucket);
|
||||
@ -878,9 +898,8 @@ reread:
|
||||
j = buf->data;
|
||||
}
|
||||
|
||||
ret = jset_validate(c, ca, j, offset,
|
||||
end - offset, sectors_read,
|
||||
READ);
|
||||
ret = jset_validate_early(c, ca, j, offset,
|
||||
end - offset, sectors_read);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
sectors = vstruct_sectors(j, c->block_bits);
|
||||
@ -896,17 +915,13 @@ reread:
|
||||
case JOURNAL_ENTRY_NONE:
|
||||
if (!saw_bad)
|
||||
return 0;
|
||||
sectors = block_sectors(c);
|
||||
goto next_block;
|
||||
case JOURNAL_ENTRY_BAD:
|
||||
saw_bad = true;
|
||||
/*
|
||||
* On checksum error we don't really trust the size
|
||||
* field of the journal entry we read, so try reading
|
||||
* again at next block boundary:
|
||||
*/
|
||||
sectors = block_sectors(c);
|
||||
break;
|
||||
goto next_block;
|
||||
default:
|
||||
return ret;
|
||||
}
|
||||
@ -922,14 +937,25 @@ reread:
|
||||
|
||||
ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
|
||||
|
||||
csum_good = jset_csum_good(c, j);
|
||||
if (!csum_good)
|
||||
saw_bad = true;
|
||||
|
||||
ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
|
||||
j->encrypted_start,
|
||||
vstruct_end(j) - (void *) j->encrypted_start);
|
||||
bch2_fs_fatal_err_on(ret, c,
|
||||
"error decrypting journal entry: %i", ret);
|
||||
|
||||
mutex_lock(&jlist->lock);
|
||||
ret = journal_entry_add(c, ca, (struct journal_ptr) {
|
||||
.csum_good = csum_good,
|
||||
.dev = ca->dev_idx,
|
||||
.bucket = bucket,
|
||||
.bucket_offset = offset -
|
||||
bucket_to_sector(ca, ja->buckets[bucket]),
|
||||
.sector = offset,
|
||||
}, jlist, j, ret != 0);
|
||||
}, jlist, j);
|
||||
mutex_unlock(&jlist->lock);
|
||||
|
||||
switch (ret) {
|
||||
@ -1128,6 +1154,19 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
|
||||
*start_seq = le64_to_cpu(i->j.seq) + 1;
|
||||
|
||||
if (!JSET_NO_FLUSH(&i->j)) {
|
||||
int write = READ;
|
||||
if (journal_entry_err_on(le64_to_cpu(i->j.last_seq) > le64_to_cpu(i->j.seq),
|
||||
c, &i->j, NULL,
|
||||
"invalid journal entry: last_seq > seq (%llu > %llu)",
|
||||
le64_to_cpu(i->j.last_seq),
|
||||
le64_to_cpu(i->j.seq)))
|
||||
i->j.last_seq = i->j.seq;
|
||||
|
||||
pr_info("last flush %llu-%llu csum good %u",
|
||||
le64_to_cpu(i->j.last_seq),
|
||||
le64_to_cpu(i->j.seq),
|
||||
i->csum_good);
|
||||
|
||||
last_seq = le64_to_cpu(i->j.last_seq);
|
||||
*blacklist_seq = le64_to_cpu(i->j.seq) + 1;
|
||||
break;
|
||||
@ -1231,7 +1270,21 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
|
||||
if (!i || i->ignore)
|
||||
continue;
|
||||
|
||||
ret = jset_validate_entries(c, &i->j, READ);
|
||||
for (ptr = 0; ptr < i->nr_ptrs; ptr++) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev);
|
||||
|
||||
if (!i->ptrs[ptr].csum_good)
|
||||
printk(KERN_ERR "bcachefs (%s) sector %llu: invalid journal checksum, seq %llu%s\n",
|
||||
ca->name, i->ptrs[ptr].sector,
|
||||
le64_to_cpu(i->j.seq),
|
||||
i->csum_good ? " (had good copy on another device)" : "");
|
||||
}
|
||||
|
||||
ret = jset_validate(c,
|
||||
bch_dev_bkey_exists(c, i->ptrs[0].dev),
|
||||
&i->j,
|
||||
i->ptrs[0].sector,
|
||||
READ);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -1667,7 +1720,7 @@ void bch2_journal_write(struct closure *cl)
|
||||
validate_before_checksum = true;
|
||||
|
||||
if (validate_before_checksum &&
|
||||
jset_validate_for_write(c, jset))
|
||||
jset_validate(c, NULL, jset, 0, WRITE))
|
||||
goto err;
|
||||
|
||||
ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
|
||||
@ -1681,7 +1734,7 @@ void bch2_journal_write(struct closure *cl)
|
||||
journal_nonce(jset), jset);
|
||||
|
||||
if (!validate_before_checksum &&
|
||||
jset_validate_for_write(c, jset))
|
||||
jset_validate(c, NULL, jset, 0, WRITE))
|
||||
goto err;
|
||||
|
||||
sectors = vstruct_sectors(jset, c->block_bits);
|
||||
|
@ -8,6 +8,7 @@
|
||||
*/
|
||||
struct journal_replay {
|
||||
struct journal_ptr {
|
||||
bool csum_good;
|
||||
u8 dev;
|
||||
u32 bucket;
|
||||
u32 bucket_offset;
|
||||
@ -15,8 +16,7 @@ struct journal_replay {
|
||||
} ptrs[BCH_REPLICAS_MAX];
|
||||
unsigned nr_ptrs;
|
||||
|
||||
/* checksum error, but we may want to try using it anyways: */
|
||||
bool bad;
|
||||
bool csum_good;
|
||||
bool ignore;
|
||||
/* must be last: */
|
||||
struct jset j;
|
||||
|
@ -628,7 +628,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_get_next_backpointer(&trans, bucket, gen,
|
||||
&bp_offset, &bp);
|
||||
&bp_offset, &bp,
|
||||
BTREE_ITER_CACHED);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
if (ret)
|
||||
|
@ -162,7 +162,7 @@ static int bch2_copygc(struct bch_fs *c)
|
||||
|
||||
bch2_moving_ctxt_exit(&ctxt);
|
||||
|
||||
if (ret < 0)
|
||||
if (ret < 0 && ret != -EROFS)
|
||||
bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
|
||||
|
||||
trace_and_count(c, copygc, c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
|
||||
|
@ -95,6 +95,113 @@ void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
#include <linux/fs.h>
|
||||
#include <linux/quota.h>
|
||||
|
||||
static void qc_info_to_text(struct printbuf *out, struct qc_info *i)
|
||||
{
|
||||
printbuf_tabstops_reset(out);
|
||||
printbuf_tabstop_push(out, 20);
|
||||
|
||||
prt_str(out, "i_fieldmask");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%x", i->i_fieldmask);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "i_flags");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%u", i->i_flags);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "i_spc_timelimit");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%u", i->i_spc_timelimit);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "i_ino_timelimit");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%u", i->i_ino_timelimit);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "i_rt_spc_timelimit");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%u", i->i_rt_spc_timelimit);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "i_spc_warnlimit");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%u", i->i_spc_warnlimit);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "i_ino_warnlimit");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%u", i->i_ino_warnlimit);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "i_rt_spc_warnlimit");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%u", i->i_rt_spc_warnlimit);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
static void qc_dqblk_to_text(struct printbuf *out, struct qc_dqblk *q)
|
||||
{
|
||||
printbuf_tabstops_reset(out);
|
||||
printbuf_tabstop_push(out, 20);
|
||||
|
||||
prt_str(out, "d_fieldmask");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%x", q->d_fieldmask);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "d_spc_hardlimit");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu", q->d_spc_hardlimit);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "d_spc_softlimit");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu", q->d_spc_softlimit);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "d_ino_hardlimit");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu", q->d_ino_hardlimit);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "d_ino_softlimit");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu", q->d_ino_softlimit);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "d_space");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu", q->d_space);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "d_ino_count");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu", q->d_ino_count);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "d_ino_timer");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu", q->d_ino_timer);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "d_spc_timer");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu", q->d_spc_timer);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "d_ino_warns");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%i", q->d_ino_warns);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "d_spc_warns");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%i", q->d_spc_warns);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
static inline unsigned __next_qtype(unsigned i, unsigned qtypes)
|
||||
{
|
||||
qtypes >>= i;
|
||||
@ -413,6 +520,26 @@ void bch2_fs_quota_init(struct bch_fs *c)
|
||||
mutex_init(&c->quotas[i].lock);
|
||||
}
|
||||
|
||||
static struct bch_sb_field_quota *bch2_sb_get_or_create_quota(struct bch_sb_handle *sb)
|
||||
{
|
||||
struct bch_sb_field_quota *sb_quota = bch2_sb_get_quota(sb->sb);
|
||||
|
||||
if (sb_quota)
|
||||
return sb_quota;
|
||||
|
||||
sb_quota = bch2_sb_resize_quota(sb, sizeof(*sb_quota) / sizeof(u64));
|
||||
if (sb_quota) {
|
||||
unsigned qtype, qc;
|
||||
|
||||
for (qtype = 0; qtype < QTYP_NR; qtype++)
|
||||
for (qc = 0; qc < Q_COUNTERS; qc++)
|
||||
sb_quota->q[qtype].c[qc].timelimit =
|
||||
cpu_to_le32(7 * 24 * 60 * 60);
|
||||
}
|
||||
|
||||
return sb_quota;
|
||||
}
|
||||
|
||||
static void bch2_sb_quota_read(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_quota *sb_quota;
|
||||
@ -471,12 +598,19 @@ advance:
|
||||
|
||||
int bch2_fs_quota_read(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_quota *sb_quota;
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
|
||||
if (!sb_quota) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return -BCH_ERR_ENOSPC_sb_quota;
|
||||
}
|
||||
|
||||
bch2_sb_quota_read(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
@ -500,6 +634,8 @@ int bch2_fs_quota_read(struct bch_fs *c)
|
||||
static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
|
||||
{
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
struct bch_sb_field_quota *sb_quota;
|
||||
int ret = 0;
|
||||
|
||||
if (sb->s_flags & SB_RDONLY)
|
||||
return -EROFS;
|
||||
@ -519,6 +655,12 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
|
||||
if (!sb_quota) {
|
||||
ret = -BCH_ERR_ENOSPC_sb_quota;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (uflags & FS_QUOTA_UDQ_ENFD)
|
||||
SET_BCH_SB_USRQUOTA(c->disk_sb.sb, true);
|
||||
|
||||
@ -529,9 +671,10 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
|
||||
SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true);
|
||||
|
||||
bch2_write_super(c);
|
||||
unlock:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
|
||||
@ -643,6 +786,15 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
struct bch_sb_field_quota *sb_quota;
|
||||
struct bch_memquota_type *q;
|
||||
int ret = 0;
|
||||
|
||||
if (0) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
qc_info_to_text(&buf, info);
|
||||
pr_info("setting:\n%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
if (sb->s_flags & SB_RDONLY)
|
||||
return -EROFS;
|
||||
@ -660,12 +812,10 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
|
||||
q = &c->quotas[type];
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
sb_quota = bch2_sb_get_quota(c->disk_sb.sb);
|
||||
sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
|
||||
if (!sb_quota) {
|
||||
sb_quota = bch2_sb_resize_quota(&c->disk_sb,
|
||||
sizeof(*sb_quota) / sizeof(u64));
|
||||
if (!sb_quota)
|
||||
return -BCH_ERR_ENOSPC_sb_quota;
|
||||
ret = -BCH_ERR_ENOSPC_sb_quota;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (info->i_fieldmask & QC_SPC_TIMER)
|
||||
@ -687,9 +837,10 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
|
||||
bch2_sb_quota_read(c);
|
||||
|
||||
bch2_write_super(c);
|
||||
unlock:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
/* Get/set individual quotas: */
|
||||
@ -794,6 +945,14 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
|
||||
struct bkey_i_quota new_quota;
|
||||
int ret;
|
||||
|
||||
if (0) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
qc_dqblk_to_text(&buf, qdq);
|
||||
pr_info("setting:\n%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
if (sb->s_flags & SB_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
|
@ -895,6 +895,12 @@ int bch2_fs_start(struct bch_fs *c)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
bch2_recalc_capacity(c);
|
||||
|
||||
for (i = 0; i < BCH_TRANSACTIONS_NR; i++) {
|
||||
mutex_lock(&c->btree_transaction_stats[i].lock);
|
||||
bch2_time_stats_init(&c->btree_transaction_stats[i].lock_hold_times);
|
||||
mutex_unlock(&c->btree_transaction_stats[i].lock);
|
||||
}
|
||||
|
||||
ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
|
||||
? bch2_fs_recovery(c)
|
||||
: bch2_fs_initialize(c);
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/mean_and_variance.h>
|
||||
|
||||
#include "eytzinger.h"
|
||||
#include "util.h"
|
||||
@ -323,38 +324,44 @@ static void bch2_time_stats_update_one(struct time_stats *stats,
|
||||
{
|
||||
u64 duration, freq;
|
||||
|
||||
duration = time_after64(end, start)
|
||||
? end - start : 0;
|
||||
freq = time_after64(end, stats->last_event)
|
||||
? end - stats->last_event : 0;
|
||||
|
||||
stats->count++;
|
||||
|
||||
stats->average_duration = stats->average_duration
|
||||
? ewma_add(stats->average_duration, duration, 6)
|
||||
: duration;
|
||||
|
||||
stats->average_frequency = stats->average_frequency
|
||||
? ewma_add(stats->average_frequency, freq, 6)
|
||||
: freq;
|
||||
|
||||
if (time_after64(end, start)) {
|
||||
duration = end - start;
|
||||
stats->duration_stats = mean_and_variance_update(stats->duration_stats,
|
||||
duration);
|
||||
stats->duration_stats_weighted = mean_and_variance_weighted_update(
|
||||
stats->duration_stats_weighted,
|
||||
duration);
|
||||
stats->max_duration = max(stats->max_duration, duration);
|
||||
|
||||
stats->last_event = end;
|
||||
|
||||
stats->min_duration = min(stats->min_duration, duration);
|
||||
bch2_quantiles_update(&stats->quantiles, duration);
|
||||
}
|
||||
|
||||
if (time_after64(end, stats->last_event)) {
|
||||
freq = end - stats->last_event;
|
||||
stats->freq_stats = mean_and_variance_update(stats->freq_stats, freq);
|
||||
stats->freq_stats_weighted = mean_and_variance_weighted_update(
|
||||
stats->freq_stats_weighted,
|
||||
freq);
|
||||
stats->max_freq = max(stats->max_freq, freq);
|
||||
stats->min_freq = min(stats->min_freq, freq);
|
||||
stats->last_event = end;
|
||||
}
|
||||
}
|
||||
|
||||
void __bch2_time_stats_update(struct time_stats *stats, u64 start, u64 end)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
WARN_RATELIMIT(!stats->min_duration || !stats->min_freq,
|
||||
"time_stats: min_duration = %llu, min_freq = %llu",
|
||||
stats->min_duration, stats->min_freq);
|
||||
|
||||
if (!stats->buffer) {
|
||||
spin_lock_irqsave(&stats->lock, flags);
|
||||
bch2_time_stats_update_one(stats, start, end);
|
||||
|
||||
if (stats->average_frequency < 32 &&
|
||||
stats->count > 1024)
|
||||
if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 &&
|
||||
stats->duration_stats.n > 1024)
|
||||
stats->buffer =
|
||||
alloc_percpu_gfp(struct time_stat_buffer,
|
||||
GFP_ATOMIC);
|
||||
@ -389,12 +396,15 @@ void __bch2_time_stats_update(struct time_stats *stats, u64 start, u64 end)
|
||||
|
||||
static const struct time_unit {
|
||||
const char *name;
|
||||
u32 nsecs;
|
||||
u64 nsecs;
|
||||
} time_units[] = {
|
||||
{ "ns", 1 },
|
||||
{ "us", NSEC_PER_USEC },
|
||||
{ "ms", NSEC_PER_MSEC },
|
||||
{ "sec", NSEC_PER_SEC },
|
||||
{ "s", NSEC_PER_SEC },
|
||||
{ "m", NSEC_PER_SEC * 60},
|
||||
{ "h", NSEC_PER_SEC * 3600},
|
||||
{ "eon", U64_MAX },
|
||||
};
|
||||
|
||||
static const struct time_unit *pick_time_units(u64 ns)
|
||||
@ -414,38 +424,117 @@ static void pr_time_units(struct printbuf *out, u64 ns)
|
||||
{
|
||||
const struct time_unit *u = pick_time_units(ns);
|
||||
|
||||
prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
|
||||
prt_printf(out, "%llu ", div64_u64(ns, u->nsecs));
|
||||
prt_tab_rjust(out);
|
||||
prt_printf(out, "%s", u->name);
|
||||
}
|
||||
|
||||
#define TABSTOP_SIZE 12
|
||||
|
||||
static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns)
|
||||
{
|
||||
prt_printf(out, name);
|
||||
prt_tab(out);
|
||||
pr_time_units(out, ns);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats)
|
||||
{
|
||||
const struct time_unit *u;
|
||||
u64 freq = READ_ONCE(stats->average_frequency);
|
||||
u64 q, last_q = 0;
|
||||
s64 f_mean = 0, d_mean = 0;
|
||||
u64 q, last_q = 0, f_stddev = 0, d_stddev = 0;
|
||||
int i;
|
||||
/*
|
||||
* avoid divide by zero
|
||||
*/
|
||||
if (stats->freq_stats.n) {
|
||||
f_mean = mean_and_variance_get_mean(stats->freq_stats);
|
||||
f_stddev = mean_and_variance_get_stddev(stats->freq_stats);
|
||||
d_mean = mean_and_variance_get_mean(stats->duration_stats);
|
||||
d_stddev = mean_and_variance_get_stddev(stats->duration_stats);
|
||||
}
|
||||
|
||||
prt_printf(out, "count:\t\t%llu",
|
||||
stats->count);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "rate:\t\t%llu/sec",
|
||||
freq ? div64_u64(NSEC_PER_SEC, freq) : 0);
|
||||
printbuf_tabstop_push(out, out->indent + TABSTOP_SIZE);
|
||||
prt_printf(out, "count:");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu ",
|
||||
stats->duration_stats.n);
|
||||
printbuf_tabstop_pop(out);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "frequency:\t");
|
||||
pr_time_units(out, freq);
|
||||
printbuf_tabstops_reset(out);
|
||||
|
||||
prt_newline(out);
|
||||
prt_printf(out, "avg duration:\t");
|
||||
pr_time_units(out, stats->average_duration);
|
||||
printbuf_tabstop_push(out, out->indent + 20);
|
||||
printbuf_tabstop_push(out, TABSTOP_SIZE + 2);
|
||||
printbuf_tabstop_push(out, 0);
|
||||
printbuf_tabstop_push(out, TABSTOP_SIZE + 2);
|
||||
|
||||
prt_tab(out);
|
||||
prt_printf(out, "since mount");
|
||||
prt_tab_rjust(out);
|
||||
prt_tab(out);
|
||||
prt_printf(out, "recent");
|
||||
prt_tab_rjust(out);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "max duration:\t");
|
||||
pr_time_units(out, stats->max_duration);
|
||||
|
||||
printbuf_tabstops_reset(out);
|
||||
printbuf_tabstop_push(out, out->indent + 20);
|
||||
printbuf_tabstop_push(out, TABSTOP_SIZE);
|
||||
printbuf_tabstop_push(out, 2);
|
||||
printbuf_tabstop_push(out, TABSTOP_SIZE);
|
||||
|
||||
prt_printf(out, "duration of events");
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
pr_name_and_units(out, "min:", stats->min_duration);
|
||||
pr_name_and_units(out, "max:", stats->max_duration);
|
||||
|
||||
prt_printf(out, "mean:");
|
||||
prt_tab(out);
|
||||
pr_time_units(out, d_mean);
|
||||
prt_tab(out);
|
||||
pr_time_units(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted));
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "stddev:");
|
||||
prt_tab(out);
|
||||
pr_time_units(out, d_stddev);
|
||||
prt_tab(out);
|
||||
pr_time_units(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted));
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "time between events");
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
pr_name_and_units(out, "min:", stats->min_freq);
|
||||
pr_name_and_units(out, "max:", stats->max_freq);
|
||||
|
||||
prt_printf(out, "mean:");
|
||||
prt_tab(out);
|
||||
pr_time_units(out, f_mean);
|
||||
prt_tab(out);
|
||||
pr_time_units(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted));
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "stddev:");
|
||||
prt_tab(out);
|
||||
pr_time_units(out, f_stddev);
|
||||
prt_tab(out);
|
||||
pr_time_units(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted));
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
prt_newline(out);
|
||||
|
||||
printbuf_tabstops_reset(out);
|
||||
|
||||
i = eytzinger0_first(NR_QUANTILES);
|
||||
u = pick_time_units(stats->quantiles.entries[i].m);
|
||||
|
||||
prt_newline(out);
|
||||
prt_printf(out, "quantiles (%s):\t", u->name);
|
||||
eytzinger0_for_each(i, NR_QUANTILES) {
|
||||
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
|
||||
@ -467,6 +556,10 @@ void bch2_time_stats_exit(struct time_stats *stats)
|
||||
void bch2_time_stats_init(struct time_stats *stats)
|
||||
{
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
stats->duration_stats_weighted.w = 8;
|
||||
stats->freq_stats_weighted.w = 8;
|
||||
stats->min_duration = U64_MAX;
|
||||
stats->min_freq = U64_MAX;
|
||||
spin_lock_init(&stats->lock);
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/mean_and_variance.h>
|
||||
|
||||
struct closure;
|
||||
|
||||
@ -380,14 +381,18 @@ struct time_stat_buffer {
|
||||
|
||||
struct time_stats {
|
||||
spinlock_t lock;
|
||||
u64 count;
|
||||
/* all fields are in nanoseconds */
|
||||
u64 average_duration;
|
||||
u64 average_frequency;
|
||||
u64 max_duration;
|
||||
u64 min_duration;
|
||||
u64 max_freq;
|
||||
u64 min_freq;
|
||||
u64 last_event;
|
||||
struct quantiles quantiles;
|
||||
|
||||
struct mean_and_variance duration_stats;
|
||||
struct mean_and_variance_weighted duration_stats_weighted;
|
||||
struct mean_and_variance freq_stats;
|
||||
struct mean_and_variance_weighted freq_stats_weighted;
|
||||
struct time_stat_buffer __percpu *buffer;
|
||||
};
|
||||
|
||||
|
@ -371,8 +371,10 @@ static int bch2_xattr_get_handler(const struct xattr_handler *handler,
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
int ret;
|
||||
|
||||
return bch2_xattr_get(c, inode, name, buffer, size, handler->flags);
|
||||
ret = bch2_xattr_get(c, inode, name, buffer, size, handler->flags);
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
static int bch2_xattr_set_handler(const struct xattr_handler *handler,
|
||||
@ -384,11 +386,13 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler,
|
||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
|
||||
int ret;
|
||||
|
||||
return bch2_trans_do(c, NULL, NULL, 0,
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
bch2_xattr_set(&trans, inode_inum(inode), &hash,
|
||||
name, value, size,
|
||||
handler->flags, flags));
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
static const struct xattr_handler bch_xattr_user_handler = {
|
||||
|
71
linux/int_sqrt.c
Normal file
71
linux/int_sqrt.c
Normal file
@ -0,0 +1,71 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2013 Davidlohr Bueso <davidlohr.bueso@hp.com>
|
||||
*
|
||||
* Based on the shift-and-subtract algorithm for computing integer
|
||||
* square root from Guy L. Steele.
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/math.h>
|
||||
|
||||
/**
|
||||
* int_sqrt - computes the integer square root
|
||||
* @x: integer of which to calculate the sqrt
|
||||
*
|
||||
* Computes: floor(sqrt(x))
|
||||
*/
|
||||
unsigned long int_sqrt(unsigned long x)
|
||||
{
|
||||
unsigned long b, m, y = 0;
|
||||
|
||||
if (x <= 1)
|
||||
return x;
|
||||
|
||||
m = 1UL << (__fls(x) & ~1UL);
|
||||
while (m != 0) {
|
||||
b = y + m;
|
||||
y >>= 1;
|
||||
|
||||
if (x >= b) {
|
||||
x -= b;
|
||||
y += m;
|
||||
}
|
||||
m >>= 2;
|
||||
}
|
||||
|
||||
return y;
|
||||
}
|
||||
EXPORT_SYMBOL(int_sqrt);
|
||||
|
||||
#if BITS_PER_LONG < 64
|
||||
/**
|
||||
* int_sqrt64 - strongly typed int_sqrt function when minimum 64 bit input
|
||||
* is expected.
|
||||
* @x: 64bit integer of which to calculate the sqrt
|
||||
*/
|
||||
u32 int_sqrt64(u64 x)
|
||||
{
|
||||
u64 b, m, y = 0;
|
||||
|
||||
if (x <= ULONG_MAX)
|
||||
return int_sqrt((unsigned long) x);
|
||||
|
||||
m = 1ULL << ((fls64(x) - 1) & ~1ULL);
|
||||
while (m != 0) {
|
||||
b = y + m;
|
||||
y >>= 1;
|
||||
|
||||
if (x >= b) {
|
||||
x -= b;
|
||||
y += m;
|
||||
}
|
||||
m >>= 2;
|
||||
}
|
||||
|
||||
return y;
|
||||
}
|
||||
EXPORT_SYMBOL(int_sqrt64);
|
||||
#endif
|
178
linux/mean_and_variance.c
Normal file
178
linux/mean_and_variance.c
Normal file
@ -0,0 +1,178 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Functions for incremental mean and variance.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* Copyright © 2022 Daniel B. Hill
|
||||
*
|
||||
* Author: Daniel B. Hill <daniel@gluo.nz>
|
||||
*
|
||||
* Description:
|
||||
*
|
||||
* This is includes some incremental algorithms for mean and variance calculation
|
||||
*
|
||||
* Derived from the paper: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
|
||||
*
|
||||
* Create a struct and if it's the weighted variant set the w field (weight = 2^k).
|
||||
*
|
||||
* Use mean_and_variance[_weighted]_update() on the struct to update it's state.
|
||||
*
|
||||
* Use the mean_and_variance[_weighted]_get_* functions to calculate the mean and variance, some computation
|
||||
* is deferred to these functions for performance reasons.
|
||||
*
|
||||
* see lib/math/mean_and_variance_test.c for examples of usage.
|
||||
*
|
||||
* DO NOT access the mean and variance fields of the weighted variants directly.
|
||||
* DO NOT change the weight after calling update.
|
||||
*/
|
||||
|
||||
#include <linux/bug.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/math.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/mean_and_variance.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/printbuf.h>
|
||||
|
||||
|
||||
/**
|
||||
* fast_divpow2() - fast approximation for n / (1 << d)
|
||||
* @n: numerator
|
||||
* @d: the power of 2 denominator.
|
||||
*
|
||||
* note: this rounds towards 0.
|
||||
*/
|
||||
inline s64 fast_divpow2(s64 n, u8 d)
|
||||
{
|
||||
return (n + ((n < 0) ? ((1 << d) - 1) : 0)) >> d;
|
||||
}
|
||||
|
||||
/**
|
||||
* mean_and_variance_update() - update a mean_and_variance struct @s1 with a new sample @v1
|
||||
* and return it.
|
||||
* @s1: the mean_and_variance to update.
|
||||
* @v1: the new sample.
|
||||
*
|
||||
* see linked pdf equation 12.
|
||||
*/
|
||||
struct mean_and_variance mean_and_variance_update(struct mean_and_variance s1, s64 v1)
|
||||
{
|
||||
struct mean_and_variance s2;
|
||||
u64 v2 = abs(v1);
|
||||
|
||||
s2.n = s1.n + 1;
|
||||
s2.sum = s1.sum + v1;
|
||||
s2.sum_squares = u128_add(s1.sum_squares, u128_square(v2));
|
||||
return s2;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_update);
|
||||
|
||||
/**
|
||||
* mean_and_variance_get_mean() - get mean from @s
|
||||
*/
|
||||
s64 mean_and_variance_get_mean(struct mean_and_variance s)
|
||||
{
|
||||
return div64_u64(s.sum, s.n);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_get_mean);
|
||||
|
||||
/**
|
||||
* mean_and_variance_get_variance() - get variance from @s1
|
||||
*
|
||||
* see linked pdf equation 12.
|
||||
*/
|
||||
u64 mean_and_variance_get_variance(struct mean_and_variance s1)
|
||||
{
|
||||
u128 s2 = u128_div(s1.sum_squares, s1.n);
|
||||
u64 s3 = abs(mean_and_variance_get_mean(s1));
|
||||
|
||||
return u128_to_u64(u128_sub(s2, u128_square(s3)));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_get_variance);
|
||||
|
||||
/**
|
||||
* mean_and_variance_get_stddev() - get standard deviation from @s
|
||||
*/
|
||||
u32 mean_and_variance_get_stddev(struct mean_and_variance s)
|
||||
{
|
||||
return int_sqrt64(mean_and_variance_get_variance(s));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev);
|
||||
|
||||
/**
|
||||
* mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update()
|
||||
* @s1: ..
|
||||
* @s2: ..
|
||||
*
|
||||
* see linked pdf: function derived from equations 140-143 where alpha = 2^w.
|
||||
* values are stored bitshifted for performance and added precision.
|
||||
*/
|
||||
struct mean_and_variance_weighted mean_and_variance_weighted_update(struct mean_and_variance_weighted s1,
|
||||
s64 x)
|
||||
{
|
||||
struct mean_and_variance_weighted s2;
|
||||
// previous weighted variance.
|
||||
u64 var_w0 = s1.variance;
|
||||
u8 w = s2.w = s1.w;
|
||||
// new value weighted.
|
||||
s64 x_w = x << w;
|
||||
s64 diff_w = x_w - s1.mean;
|
||||
s64 diff = fast_divpow2(diff_w, w);
|
||||
// new mean weighted.
|
||||
s64 u_w1 = s1.mean + diff;
|
||||
|
||||
BUG_ON(w % 2 != 0);
|
||||
|
||||
if (!s1.init) {
|
||||
s2.mean = x_w;
|
||||
s2.variance = 0;
|
||||
} else {
|
||||
s2.mean = u_w1;
|
||||
s2.variance = ((var_w0 << w) - var_w0 + ((diff_w * (x_w - u_w1)) >> w)) >> w;
|
||||
}
|
||||
s2.init = true;
|
||||
|
||||
return s2;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
|
||||
|
||||
/**
|
||||
* mean_and_variance_weighted_get_mean() - get mean from @s
|
||||
*/
|
||||
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s)
|
||||
{
|
||||
return fast_divpow2(s.mean, s.w);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
|
||||
|
||||
/**
|
||||
* mean_and_variance_weighted_get_variance() -- get variance from @s
|
||||
*/
|
||||
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s)
|
||||
{
|
||||
// always positive don't need fast divpow2
|
||||
return s.variance >> s.w;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
|
||||
|
||||
/**
|
||||
* mean_and_variance_weighted_get_stddev() - get standard deviation from @s
|
||||
*/
|
||||
u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s)
|
||||
{
|
||||
return int_sqrt64(mean_and_variance_weighted_get_variance(s));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_stddev);
|
||||
|
||||
MODULE_AUTHOR("Daniel B. Hill");
|
||||
MODULE_LICENSE("GPL");
|
11
linux/six.c
11
linux/six.c
@ -148,6 +148,14 @@ static int __do_six_trylock_type(struct six_lock *lock,
|
||||
atomic64_add(__SIX_VAL(write_locking, 1),
|
||||
&lock->state.counter);
|
||||
smp_mb__after_atomic();
|
||||
} else if (!(lock->state.waiters & (1 << SIX_LOCK_write))) {
|
||||
atomic64_add(__SIX_VAL(waiters, 1 << SIX_LOCK_write),
|
||||
&lock->state.counter);
|
||||
/*
|
||||
* pairs with barrier after unlock and before checking
|
||||
* for readers in unlock path
|
||||
*/
|
||||
smp_mb__after_atomic();
|
||||
}
|
||||
|
||||
ret = !pcpu_read_count(lock);
|
||||
@ -162,9 +170,6 @@ static int __do_six_trylock_type(struct six_lock *lock,
|
||||
if (ret || try)
|
||||
v -= __SIX_VAL(write_locking, 1);
|
||||
|
||||
if (!ret && !try && !(lock->state.waiters & (1 << SIX_LOCK_write)))
|
||||
v += __SIX_VAL(waiters, 1 << SIX_LOCK_write);
|
||||
|
||||
if (try && !ret) {
|
||||
old.v = atomic64_add_return(v, &lock->state.counter);
|
||||
if (old.waiters & (1 << SIX_LOCK_read))
|
||||
|
Loading…
Reference in New Issue
Block a user