mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to d1fd471830 bcachefs: Add more debug checks
This commit is contained in:
parent
3420d86959
commit
13f53aa228
@ -1 +1 @@
|
|||||||
1d669389f79de8571732c13fdf4d23039e2308fd
|
d1fd47183051729471bce1c9f84fa63cb84dc557
|
||||||
|
@ -85,6 +85,17 @@ static inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
|
|||||||
return (old & mask) != 0;
|
return (old & mask) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||||
|
{
|
||||||
|
unsigned long mask = BIT_MASK(nr);
|
||||||
|
unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr);
|
||||||
|
unsigned long old;
|
||||||
|
|
||||||
|
old = __atomic_fetch_and(p, ~mask, __ATOMIC_RELAXED);
|
||||||
|
|
||||||
|
return (old & mask) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
|
static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||||
{
|
{
|
||||||
unsigned long mask = BIT_MASK(nr);
|
unsigned long mask = BIT_MASK(nr);
|
||||||
|
@ -219,4 +219,6 @@ struct qstr {
|
|||||||
|
|
||||||
#define POISON_FREE 0x6b
|
#define POISON_FREE 0x6b
|
||||||
|
|
||||||
|
static inline void dump_stack(void) {}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
31
include/linux/srcu.h
Normal file
31
include/linux/srcu.h
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#ifndef __TOOLS_LINUX_SRCU_H
|
||||||
|
#define __TOOLS_LINUX_SRCU_H
|
||||||
|
|
||||||
|
struct srcu_struct {
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) {}
|
||||||
|
|
||||||
|
static inline int srcu_read_lock(struct srcu_struct *ssp)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void cleanup_srcu_struct(struct srcu_struct *ssp) {}
|
||||||
|
|
||||||
|
static inline int init_srcu_struct(struct srcu_struct *ssp)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __TOOLS_LINUX_SRCU_H */
|
@ -31,6 +31,7 @@ typedef unsigned gfp_t;
|
|||||||
#define __GFP_IO 0
|
#define __GFP_IO 0
|
||||||
#define __GFP_NOWARN 0
|
#define __GFP_NOWARN 0
|
||||||
#define __GFP_NORETRY 0
|
#define __GFP_NORETRY 0
|
||||||
|
#define __GFP_NOFAIL 0
|
||||||
#define __GFP_ZERO 1
|
#define __GFP_ZERO 1
|
||||||
|
|
||||||
#define PAGE_ALLOC_COSTLY_ORDER 6
|
#define PAGE_ALLOC_COSTLY_ORDER 6
|
||||||
|
@ -513,7 +513,7 @@ TRACE_EVENT(transaction_restart_ip,
|
|||||||
__entry->ip = ip;
|
__entry->ip = ip;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("%pF %pF", (void *) __entry->caller, (void *) __entry->ip)
|
TP_printk("%ps %pS", (void *) __entry->caller, (void *) __entry->ip)
|
||||||
);
|
);
|
||||||
|
|
||||||
DECLARE_EVENT_CLASS(transaction_restart,
|
DECLARE_EVENT_CLASS(transaction_restart,
|
||||||
@ -528,7 +528,7 @@ DECLARE_EVENT_CLASS(transaction_restart,
|
|||||||
__entry->ip = ip;
|
__entry->ip = ip;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("%pf", (void *) __entry->ip)
|
TP_printk("%ps", (void *) __entry->ip)
|
||||||
);
|
);
|
||||||
|
|
||||||
DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused,
|
DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused,
|
||||||
@ -568,7 +568,7 @@ TRACE_EVENT(trans_restart_would_deadlock,
|
|||||||
__entry->want_iter_type = want_iter_type;
|
__entry->want_iter_type = want_iter_type;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("%pF %pF because %u have %u:%u want %u:%u",
|
TP_printk("%ps %pS because %u have %u:%u want %u:%u",
|
||||||
(void *) __entry->trans_ip,
|
(void *) __entry->trans_ip,
|
||||||
(void *) __entry->caller_ip,
|
(void *) __entry->caller_ip,
|
||||||
__entry->reason,
|
__entry->reason,
|
||||||
@ -592,7 +592,7 @@ TRACE_EVENT(trans_restart_iters_realloced,
|
|||||||
__entry->nr = nr;
|
__entry->nr = nr;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("%pf nr %u", (void *) __entry->ip, __entry->nr)
|
TP_printk("%ps nr %u", (void *) __entry->ip, __entry->nr)
|
||||||
);
|
);
|
||||||
|
|
||||||
TRACE_EVENT(trans_restart_mem_realloced,
|
TRACE_EVENT(trans_restart_mem_realloced,
|
||||||
@ -609,7 +609,7 @@ TRACE_EVENT(trans_restart_mem_realloced,
|
|||||||
__entry->bytes = bytes;
|
__entry->bytes = bytes;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("%pf bytes %lu", (void *) __entry->ip, __entry->bytes)
|
TP_printk("%ps bytes %lu", (void *) __entry->ip, __entry->bytes)
|
||||||
);
|
);
|
||||||
|
|
||||||
DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get,
|
DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get,
|
||||||
|
@ -193,6 +193,7 @@
|
|||||||
#include <linux/semaphore.h>
|
#include <linux/semaphore.h>
|
||||||
#include <linux/seqlock.h>
|
#include <linux/seqlock.h>
|
||||||
#include <linux/shrinker.h>
|
#include <linux/shrinker.h>
|
||||||
|
#include <linux/srcu.h>
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/workqueue.h>
|
#include <linux/workqueue.h>
|
||||||
#include <linux/zstd.h>
|
#include <linux/zstd.h>
|
||||||
@ -642,6 +643,8 @@ struct bch_fs {
|
|||||||
mempool_t btree_iters_pool;
|
mempool_t btree_iters_pool;
|
||||||
struct btree_iter_buf __percpu *btree_iters_bufs;
|
struct btree_iter_buf __percpu *btree_iters_bufs;
|
||||||
|
|
||||||
|
struct srcu_struct btree_trans_barrier;
|
||||||
|
|
||||||
struct btree_key_cache btree_key_cache;
|
struct btree_key_cache btree_key_cache;
|
||||||
|
|
||||||
struct workqueue_struct *wq;
|
struct workqueue_struct *wq;
|
||||||
|
@ -181,8 +181,12 @@ void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
|
|||||||
void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
|
void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
|
||||||
{
|
{
|
||||||
if (k) {
|
if (k) {
|
||||||
pr_buf(out, "u64s %u type %s ", k->u64s,
|
pr_buf(out, "u64s %u type ", k->u64s);
|
||||||
bch2_bkey_types[k->type]);
|
|
||||||
|
if (k->type < KEY_TYPE_MAX)
|
||||||
|
pr_buf(out, "%s ", bch2_bkey_types[k->type]);
|
||||||
|
else
|
||||||
|
pr_buf(out, "%u ", k->type);
|
||||||
|
|
||||||
bch2_bpos_to_text(out, k->p);
|
bch2_bpos_to_text(out, k->p);
|
||||||
|
|
||||||
@ -196,10 +200,14 @@ void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
|
|||||||
void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
|
void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
|
||||||
struct bkey_s_c k)
|
struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
|
if (k.k->type < KEY_TYPE_MAX) {
|
||||||
const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
|
const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
|
||||||
|
|
||||||
if (likely(ops->val_to_text))
|
if (likely(ops->val_to_text))
|
||||||
ops->val_to_text(out, c, k);
|
ops->val_to_text(out, c, k);
|
||||||
|
} else {
|
||||||
|
pr_buf(out, "(invalid type %u)", k.k->type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
|
void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
|
||||||
|
@ -604,53 +604,23 @@ static inline unsigned bkey_mantissa(const struct bkey_packed *k,
|
|||||||
return (u16) v;
|
return (u16) v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void make_bfloat(struct btree *b, struct bset_tree *t,
|
__always_inline
|
||||||
|
static inline void __make_bfloat(struct btree *b, struct bset_tree *t,
|
||||||
unsigned j,
|
unsigned j,
|
||||||
struct bkey_packed *min_key,
|
struct bkey_packed *min_key,
|
||||||
struct bkey_packed *max_key)
|
struct bkey_packed *max_key)
|
||||||
{
|
{
|
||||||
struct bkey_float *f = bkey_float(b, t, j);
|
struct bkey_float *f = bkey_float(b, t, j);
|
||||||
struct bkey_packed *m = tree_to_bkey(b, t, j);
|
struct bkey_packed *m = tree_to_bkey(b, t, j);
|
||||||
struct bkey_packed *l, *r;
|
struct bkey_packed *l = is_power_of_2(j)
|
||||||
|
? min_key
|
||||||
|
: tree_to_prev_bkey(b, t, j >> ffs(j));
|
||||||
|
struct bkey_packed *r = is_power_of_2(j + 1)
|
||||||
|
? max_key
|
||||||
|
: tree_to_bkey(b, t, j >> (ffz(j) + 1));
|
||||||
unsigned mantissa;
|
unsigned mantissa;
|
||||||
int shift, exponent, high_bit;
|
int shift, exponent, high_bit;
|
||||||
|
|
||||||
if (is_power_of_2(j)) {
|
|
||||||
l = min_key;
|
|
||||||
|
|
||||||
if (!l->u64s) {
|
|
||||||
if (!bkey_pack_pos(l, b->data->min_key, b)) {
|
|
||||||
struct bkey_i tmp;
|
|
||||||
|
|
||||||
bkey_init(&tmp.k);
|
|
||||||
tmp.k.p = b->data->min_key;
|
|
||||||
bkey_copy(l, &tmp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
l = tree_to_prev_bkey(b, t, j >> ffs(j));
|
|
||||||
|
|
||||||
EBUG_ON(m < l);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_power_of_2(j + 1)) {
|
|
||||||
r = max_key;
|
|
||||||
|
|
||||||
if (!r->u64s) {
|
|
||||||
if (!bkey_pack_pos(r, t->max_key, b)) {
|
|
||||||
struct bkey_i tmp;
|
|
||||||
|
|
||||||
bkey_init(&tmp.k);
|
|
||||||
tmp.k.p = t->max_key;
|
|
||||||
bkey_copy(r, &tmp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
r = tree_to_bkey(b, t, j >> (ffz(j) + 1));
|
|
||||||
|
|
||||||
EBUG_ON(m > r);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* for failed bfloats, the lookup code falls back to comparing against
|
* for failed bfloats, the lookup code falls back to comparing against
|
||||||
* the original key.
|
* the original key.
|
||||||
@ -707,6 +677,30 @@ static void make_bfloat(struct btree *b, struct bset_tree *t,
|
|||||||
f->mantissa = mantissa;
|
f->mantissa = mantissa;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void make_bfloat(struct btree *b, struct bset_tree *t,
|
||||||
|
unsigned j,
|
||||||
|
struct bkey_packed *min_key,
|
||||||
|
struct bkey_packed *max_key)
|
||||||
|
{
|
||||||
|
struct bkey_i *k;
|
||||||
|
|
||||||
|
if (is_power_of_2(j) &&
|
||||||
|
!min_key->u64s) {
|
||||||
|
k = (void *) min_key;
|
||||||
|
bkey_init(&k->k);
|
||||||
|
k->k.p = b->data->min_key;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_power_of_2(j + 1) &&
|
||||||
|
!max_key->u64s) {
|
||||||
|
k = (void *) max_key;
|
||||||
|
bkey_init(&k->k);
|
||||||
|
k->k.p = t->max_key;
|
||||||
|
}
|
||||||
|
|
||||||
|
__make_bfloat(b, t, j, min_key, max_key);
|
||||||
|
}
|
||||||
|
|
||||||
/* bytes remaining - only valid for last bset: */
|
/* bytes remaining - only valid for last bset: */
|
||||||
static unsigned __bset_tree_capacity(const struct btree *b, const struct bset_tree *t)
|
static unsigned __bset_tree_capacity(const struct btree *b, const struct bset_tree *t)
|
||||||
{
|
{
|
||||||
@ -726,7 +720,7 @@ static unsigned bset_rw_tree_capacity(const struct btree *b, const struct bset_t
|
|||||||
return __bset_tree_capacity(b, t) / sizeof(struct rw_aux_tree);
|
return __bset_tree_capacity(b, t) / sizeof(struct rw_aux_tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __build_rw_aux_tree(struct btree *b, struct bset_tree *t)
|
static noinline void __build_rw_aux_tree(struct btree *b, struct bset_tree *t)
|
||||||
{
|
{
|
||||||
struct bkey_packed *k;
|
struct bkey_packed *k;
|
||||||
|
|
||||||
@ -745,15 +739,12 @@ static void __build_rw_aux_tree(struct btree *b, struct bset_tree *t)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
|
static noinline void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
|
||||||
{
|
{
|
||||||
struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t);
|
struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t);
|
||||||
struct bkey_packed min_key, max_key;
|
struct bkey_i min_key, max_key;
|
||||||
unsigned j, cacheline = 1;
|
unsigned j, cacheline = 1;
|
||||||
|
|
||||||
/* signal to make_bfloat() that they're uninitialized: */
|
|
||||||
min_key.u64s = max_key.u64s = 0;
|
|
||||||
|
|
||||||
t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)),
|
t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)),
|
||||||
bset_ro_tree_capacity(b, t));
|
bset_ro_tree_capacity(b, t));
|
||||||
retry:
|
retry:
|
||||||
@ -789,9 +780,16 @@ retry:
|
|||||||
|
|
||||||
t->max_key = bkey_unpack_pos(b, prev);
|
t->max_key = bkey_unpack_pos(b, prev);
|
||||||
|
|
||||||
|
bkey_init(&min_key.k);
|
||||||
|
min_key.k.p = b->data->min_key;
|
||||||
|
bkey_init(&max_key.k);
|
||||||
|
max_key.k.p = t->max_key;
|
||||||
|
|
||||||
/* Then we build the tree */
|
/* Then we build the tree */
|
||||||
eytzinger1_for_each(j, t->size)
|
eytzinger1_for_each(j, t->size)
|
||||||
make_bfloat(b, t, j, &min_key, &max_key);
|
__make_bfloat(b, t, j,
|
||||||
|
bkey_to_packed(&min_key),
|
||||||
|
bkey_to_packed(&max_key));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
|
static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
|
||||||
|
@ -328,9 +328,9 @@ restart:
|
|||||||
clear_btree_node_accessed(b);
|
clear_btree_node_accessed(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
memalloc_nofs_restore(flags);
|
|
||||||
mutex_unlock(&bc->lock);
|
mutex_unlock(&bc->lock);
|
||||||
out:
|
out:
|
||||||
|
memalloc_nofs_restore(flags);
|
||||||
return (unsigned long) freed * btree_pages(c);
|
return (unsigned long) freed * btree_pages(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -381,11 +381,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
|||||||
|
|
||||||
if (btree_node_dirty(b))
|
if (btree_node_dirty(b))
|
||||||
bch2_btree_complete_write(c, b, btree_current_write(b));
|
bch2_btree_complete_write(c, b, btree_current_write(b));
|
||||||
clear_btree_node_dirty(b);
|
clear_btree_node_dirty(c, b);
|
||||||
|
|
||||||
btree_node_data_free(c, b);
|
btree_node_data_free(c, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BUG_ON(atomic_read(&c->btree_cache.dirty));
|
||||||
|
|
||||||
while (!list_empty(&bc->freed)) {
|
while (!list_empty(&bc->freed)) {
|
||||||
b = list_first_entry(&bc->freed, struct btree, list);
|
b = list_first_entry(&bc->freed, struct btree, list);
|
||||||
list_del(&b->list);
|
list_del(&b->list);
|
||||||
@ -445,7 +447,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
|||||||
bc->shrink.scan_objects = bch2_btree_cache_scan;
|
bc->shrink.scan_objects = bch2_btree_cache_scan;
|
||||||
bc->shrink.seeks = 4;
|
bc->shrink.seeks = 4;
|
||||||
bc->shrink.batch = btree_pages(c) * 2;
|
bc->shrink.batch = btree_pages(c) * 2;
|
||||||
register_shrinker(&bc->shrink);
|
ret = register_shrinker(&bc->shrink);
|
||||||
out:
|
out:
|
||||||
pr_verbose_init(c->opts, "ret %i", ret);
|
pr_verbose_init(c->opts, "ret %i", ret);
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -1442,8 +1442,10 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
|||||||
|
|
||||||
ret = validate_bset(c, b, i, sectors, WRITE, false) ?:
|
ret = validate_bset(c, b, i, sectors, WRITE, false) ?:
|
||||||
validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false);
|
validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false);
|
||||||
if (ret)
|
if (ret) {
|
||||||
bch2_inconsistent_error(c);
|
bch2_inconsistent_error(c);
|
||||||
|
dump_stack();
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1498,6 +1500,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
|||||||
new ^= (1 << BTREE_NODE_write_idx);
|
new ^= (1 << BTREE_NODE_write_idx);
|
||||||
} while (cmpxchg_acquire(&b->flags, old, new) != old);
|
} while (cmpxchg_acquire(&b->flags, old, new) != old);
|
||||||
|
|
||||||
|
atomic_dec(&c->btree_cache.dirty);
|
||||||
|
|
||||||
BUG_ON(btree_node_fake(b));
|
BUG_ON(btree_node_fake(b));
|
||||||
BUG_ON((b->will_make_reachable != 0) != !b->written);
|
BUG_ON((b->will_make_reachable != 0) != !b->written);
|
||||||
|
|
||||||
@ -1530,6 +1534,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
|||||||
seq = max(seq, le64_to_cpu(i->journal_seq));
|
seq = max(seq, le64_to_cpu(i->journal_seq));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* bch2_varint_decode may read up to 7 bytes past the end of the buffer: */
|
||||||
|
bytes += 8;
|
||||||
|
|
||||||
data = btree_bounce_alloc(c, bytes, &used_mempool);
|
data = btree_bounce_alloc(c, bytes, &used_mempool);
|
||||||
|
|
||||||
if (!b->written) {
|
if (!b->written) {
|
||||||
|
@ -14,6 +14,23 @@ struct btree_write;
|
|||||||
struct btree;
|
struct btree;
|
||||||
struct btree_iter;
|
struct btree_iter;
|
||||||
|
|
||||||
|
static inline bool btree_node_dirty(struct btree *b)
|
||||||
|
{
|
||||||
|
return test_bit(BTREE_NODE_dirty, &b->flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void set_btree_node_dirty(struct bch_fs *c, struct btree *b)
|
||||||
|
{
|
||||||
|
if (!test_and_set_bit(BTREE_NODE_dirty, &b->flags))
|
||||||
|
atomic_inc(&c->btree_cache.dirty);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void clear_btree_node_dirty(struct bch_fs *c, struct btree *b)
|
||||||
|
{
|
||||||
|
if (test_and_clear_bit(BTREE_NODE_dirty, &b->flags))
|
||||||
|
atomic_dec(&c->btree_cache.dirty);
|
||||||
|
}
|
||||||
|
|
||||||
struct btree_read_bio {
|
struct btree_read_bio {
|
||||||
struct bch_fs *c;
|
struct bch_fs *c;
|
||||||
u64 start_time;
|
u64 start_time;
|
||||||
|
@ -2342,12 +2342,15 @@ static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
|
|||||||
unsigned new_size = BTREE_ITER_MAX;
|
unsigned new_size = BTREE_ITER_MAX;
|
||||||
size_t iters_bytes = sizeof(struct btree_iter) * new_size;
|
size_t iters_bytes = sizeof(struct btree_iter) * new_size;
|
||||||
size_t updates_bytes = sizeof(struct btree_insert_entry) * new_size;
|
size_t updates_bytes = sizeof(struct btree_insert_entry) * new_size;
|
||||||
void *p;
|
void *p = NULL;
|
||||||
|
|
||||||
BUG_ON(trans->used_mempool);
|
BUG_ON(trans->used_mempool);
|
||||||
|
|
||||||
p = this_cpu_xchg(c->btree_iters_bufs->iter, NULL) ?:
|
#ifdef __KERNEL__
|
||||||
mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
|
p = this_cpu_xchg(c->btree_iters_bufs->iter, NULL);
|
||||||
|
#endif
|
||||||
|
if (!p)
|
||||||
|
p = mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
|
||||||
|
|
||||||
trans->iters = p; p += iters_bytes;
|
trans->iters = p; p += iters_bytes;
|
||||||
trans->updates = p; p += updates_bytes;
|
trans->updates = p; p += updates_bytes;
|
||||||
@ -2369,8 +2372,12 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
|
|||||||
*/
|
*/
|
||||||
bch2_trans_alloc_iters(trans, c);
|
bch2_trans_alloc_iters(trans, c);
|
||||||
|
|
||||||
if (expected_mem_bytes)
|
if (expected_mem_bytes) {
|
||||||
bch2_trans_preload_mem(trans, expected_mem_bytes);
|
trans->mem_bytes = roundup_pow_of_two(expected_mem_bytes);
|
||||||
|
trans->mem = kmalloc(trans->mem_bytes, GFP_KERNEL|__GFP_NOFAIL);
|
||||||
|
}
|
||||||
|
|
||||||
|
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||||
|
|
||||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||||
trans->pid = current->pid;
|
trans->pid = current->pid;
|
||||||
@ -2392,12 +2399,19 @@ int bch2_trans_exit(struct btree_trans *trans)
|
|||||||
mutex_unlock(&trans->c->btree_trans_lock);
|
mutex_unlock(&trans->c->btree_trans_lock);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
|
||||||
|
|
||||||
bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
|
bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
|
||||||
|
|
||||||
kfree(trans->fs_usage_deltas);
|
kfree(trans->fs_usage_deltas);
|
||||||
kfree(trans->mem);
|
kfree(trans->mem);
|
||||||
|
|
||||||
|
#ifdef __KERNEL__
|
||||||
|
/*
|
||||||
|
* Userspace doesn't have a real percpu implementation:
|
||||||
|
*/
|
||||||
trans->iters = this_cpu_xchg(c->btree_iters_bufs->iter, trans->iters);
|
trans->iters = this_cpu_xchg(c->btree_iters_bufs->iter, trans->iters);
|
||||||
|
#endif
|
||||||
if (trans->iters)
|
if (trans->iters)
|
||||||
mempool_free(trans->iters, &trans->c->btree_iters_pool);
|
mempool_free(trans->iters, &trans->c->btree_iters_pool);
|
||||||
|
|
||||||
@ -2474,6 +2488,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
|
|||||||
void bch2_fs_btree_iter_exit(struct bch_fs *c)
|
void bch2_fs_btree_iter_exit(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
mempool_exit(&c->btree_iters_pool);
|
mempool_exit(&c->btree_iters_pool);
|
||||||
|
cleanup_srcu_struct(&c->btree_trans_barrier);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_fs_btree_iter_init(struct bch_fs *c)
|
int bch2_fs_btree_iter_init(struct bch_fs *c)
|
||||||
@ -2483,7 +2498,8 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
|
|||||||
INIT_LIST_HEAD(&c->btree_trans_list);
|
INIT_LIST_HEAD(&c->btree_trans_list);
|
||||||
mutex_init(&c->btree_trans_lock);
|
mutex_init(&c->btree_trans_lock);
|
||||||
|
|
||||||
return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
|
return init_srcu_struct(&c->btree_trans_barrier) ?:
|
||||||
|
mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
|
||||||
sizeof(struct btree_iter) * nr +
|
sizeof(struct btree_iter) * nr +
|
||||||
sizeof(struct btree_insert_entry) * nr +
|
sizeof(struct btree_insert_entry) * nr +
|
||||||
sizeof(struct btree_insert_entry) * nr);
|
sizeof(struct btree_insert_entry) * nr);
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
#include "journal_reclaim.h"
|
#include "journal_reclaim.h"
|
||||||
|
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
#include <trace/events/bcachefs.h>
|
#include <trace/events/bcachefs.h>
|
||||||
|
|
||||||
static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
|
static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
|
||||||
@ -66,12 +67,19 @@ static void bkey_cached_evict(struct btree_key_cache *c,
|
|||||||
BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash,
|
BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash,
|
||||||
bch2_btree_key_cache_params));
|
bch2_btree_key_cache_params));
|
||||||
memset(&ck->key, ~0, sizeof(ck->key));
|
memset(&ck->key, ~0, sizeof(ck->key));
|
||||||
|
|
||||||
|
c->nr_keys--;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bkey_cached_free(struct btree_key_cache *c,
|
static void bkey_cached_free(struct btree_key_cache *bc,
|
||||||
struct bkey_cached *ck)
|
struct bkey_cached *ck)
|
||||||
{
|
{
|
||||||
list_move(&ck->list, &c->freed);
|
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
||||||
|
|
||||||
|
ck->btree_trans_barrier_seq =
|
||||||
|
start_poll_synchronize_srcu(&c->btree_trans_barrier);
|
||||||
|
|
||||||
|
list_move(&ck->list, &bc->freed);
|
||||||
|
|
||||||
kfree(ck->k);
|
kfree(ck->k);
|
||||||
ck->k = NULL;
|
ck->k = NULL;
|
||||||
@ -135,6 +143,8 @@ btree_key_cache_create(struct btree_key_cache *c,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
c->nr_keys++;
|
||||||
|
|
||||||
list_move(&ck->list, &c->clean);
|
list_move(&ck->list, &c->clean);
|
||||||
six_unlock_write(&ck->c.lock);
|
six_unlock_write(&ck->c.lock);
|
||||||
|
|
||||||
@ -355,10 +365,14 @@ err:
|
|||||||
|
|
||||||
bch2_journal_pin_drop(j, &ck->journal);
|
bch2_journal_pin_drop(j, &ck->journal);
|
||||||
bch2_journal_preres_put(j, &ck->res);
|
bch2_journal_preres_put(j, &ck->res);
|
||||||
clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
|
|
||||||
|
|
||||||
if (!evict) {
|
if (!evict) {
|
||||||
mutex_lock(&c->btree_key_cache.lock);
|
mutex_lock(&c->btree_key_cache.lock);
|
||||||
|
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||||
|
clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
|
||||||
|
c->btree_key_cache.nr_dirty--;
|
||||||
|
}
|
||||||
|
|
||||||
list_move_tail(&ck->list, &c->btree_key_cache.clean);
|
list_move_tail(&ck->list, &c->btree_key_cache.clean);
|
||||||
mutex_unlock(&c->btree_key_cache.lock);
|
mutex_unlock(&c->btree_key_cache.lock);
|
||||||
} else {
|
} else {
|
||||||
@ -371,6 +385,11 @@ evict:
|
|||||||
six_lock_write(&ck->c.lock, NULL, NULL);
|
six_lock_write(&ck->c.lock, NULL, NULL);
|
||||||
|
|
||||||
mutex_lock(&c->btree_key_cache.lock);
|
mutex_lock(&c->btree_key_cache.lock);
|
||||||
|
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||||
|
clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
|
||||||
|
c->btree_key_cache.nr_dirty--;
|
||||||
|
}
|
||||||
|
|
||||||
bkey_cached_evict(&c->btree_key_cache, ck);
|
bkey_cached_evict(&c->btree_key_cache, ck);
|
||||||
bkey_cached_free(&c->btree_key_cache, ck);
|
bkey_cached_free(&c->btree_key_cache, ck);
|
||||||
mutex_unlock(&c->btree_key_cache.lock);
|
mutex_unlock(&c->btree_key_cache.lock);
|
||||||
@ -391,19 +410,23 @@ static void btree_key_cache_journal_flush(struct journal *j,
|
|||||||
struct bkey_cached_key key;
|
struct bkey_cached_key key;
|
||||||
struct btree_trans trans;
|
struct btree_trans trans;
|
||||||
|
|
||||||
|
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||||
|
|
||||||
six_lock_read(&ck->c.lock, NULL, NULL);
|
six_lock_read(&ck->c.lock, NULL, NULL);
|
||||||
key = ck->key;
|
key = ck->key;
|
||||||
|
|
||||||
if (ck->journal.seq != seq ||
|
if (ck->journal.seq != seq ||
|
||||||
!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||||
six_unlock_read(&ck->c.lock);
|
six_unlock_read(&ck->c.lock);
|
||||||
return;
|
goto unlock;
|
||||||
}
|
}
|
||||||
six_unlock_read(&ck->c.lock);
|
six_unlock_read(&ck->c.lock);
|
||||||
|
|
||||||
bch2_trans_init(&trans, c, 0, 0);
|
bch2_trans_init(&trans, c, 0, 0);
|
||||||
btree_key_cache_flush_pos(&trans, key, seq, false);
|
btree_key_cache_flush_pos(&trans, key, seq, false);
|
||||||
bch2_trans_exit(&trans);
|
bch2_trans_exit(&trans);
|
||||||
|
unlock:
|
||||||
|
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -448,9 +471,10 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
|||||||
|
|
||||||
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||||
mutex_lock(&c->btree_key_cache.lock);
|
mutex_lock(&c->btree_key_cache.lock);
|
||||||
list_del_init(&ck->list);
|
list_move(&ck->list, &c->btree_key_cache.dirty);
|
||||||
|
|
||||||
set_bit(BKEY_CACHED_DIRTY, &ck->flags);
|
set_bit(BKEY_CACHED_DIRTY, &ck->flags);
|
||||||
|
c->btree_key_cache.nr_dirty++;
|
||||||
mutex_unlock(&c->btree_key_cache.lock);
|
mutex_unlock(&c->btree_key_cache.lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -467,20 +491,97 @@ void bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void bch2_fs_btree_key_cache_exit(struct btree_key_cache *c)
|
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||||
|
struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
|
struct bch_fs *c = container_of(shrink, struct bch_fs,
|
||||||
|
btree_key_cache.shrink);
|
||||||
|
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||||
|
struct bkey_cached *ck, *t;
|
||||||
|
size_t scanned = 0, freed = 0, nr = sc->nr_to_scan;
|
||||||
|
unsigned flags;
|
||||||
|
|
||||||
|
/* Return -1 if we can't do anything right now */
|
||||||
|
if (sc->gfp_mask & __GFP_FS)
|
||||||
|
mutex_lock(&bc->lock);
|
||||||
|
else if (!mutex_trylock(&bc->lock))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
flags = memalloc_nofs_save();
|
||||||
|
|
||||||
|
list_for_each_entry_safe(ck, t, &bc->freed, list) {
|
||||||
|
scanned++;
|
||||||
|
|
||||||
|
if (poll_state_synchronize_srcu(&c->btree_trans_barrier,
|
||||||
|
ck->btree_trans_barrier_seq)) {
|
||||||
|
list_del(&ck->list);
|
||||||
|
kfree(ck);
|
||||||
|
freed++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scanned >= nr)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_for_each_entry_safe(ck, t, &bc->clean, list) {
|
||||||
|
scanned++;
|
||||||
|
|
||||||
|
if (bkey_cached_lock_for_evict(ck)) {
|
||||||
|
bkey_cached_evict(bc, ck);
|
||||||
|
bkey_cached_free(bc, ck);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scanned >= nr) {
|
||||||
|
if (&t->list != &bc->clean)
|
||||||
|
list_move_tail(&bc->clean, &t->list);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
memalloc_nofs_restore(flags);
|
||||||
|
mutex_unlock(&bc->lock);
|
||||||
|
|
||||||
|
return freed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
|
||||||
|
struct shrink_control *sc)
|
||||||
|
{
|
||||||
|
struct bch_fs *c = container_of(shrink, struct bch_fs,
|
||||||
|
btree_key_cache.shrink);
|
||||||
|
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||||
|
|
||||||
|
return bc->nr_keys;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||||
|
{
|
||||||
|
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
||||||
struct bkey_cached *ck, *n;
|
struct bkey_cached *ck, *n;
|
||||||
|
|
||||||
mutex_lock(&c->lock);
|
if (bc->shrink.list.next)
|
||||||
list_for_each_entry_safe(ck, n, &c->clean, list) {
|
unregister_shrinker(&bc->shrink);
|
||||||
|
|
||||||
|
mutex_lock(&bc->lock);
|
||||||
|
list_splice(&bc->dirty, &bc->clean);
|
||||||
|
|
||||||
|
list_for_each_entry_safe(ck, n, &bc->clean, list) {
|
||||||
|
bch2_journal_pin_drop(&c->journal, &ck->journal);
|
||||||
|
bch2_journal_preres_put(&c->journal, &ck->res);
|
||||||
|
|
||||||
kfree(ck->k);
|
kfree(ck->k);
|
||||||
kfree(ck);
|
kfree(ck);
|
||||||
|
bc->nr_keys--;
|
||||||
}
|
}
|
||||||
list_for_each_entry_safe(ck, n, &c->freed, list)
|
|
||||||
kfree(ck);
|
|
||||||
mutex_unlock(&c->lock);
|
|
||||||
|
|
||||||
rhashtable_destroy(&c->table);
|
BUG_ON(bc->nr_dirty && !bch2_journal_error(&c->journal));
|
||||||
|
BUG_ON(bc->nr_keys);
|
||||||
|
|
||||||
|
list_for_each_entry_safe(ck, n, &bc->freed, list)
|
||||||
|
kfree(ck);
|
||||||
|
mutex_unlock(&bc->lock);
|
||||||
|
|
||||||
|
rhashtable_destroy(&bc->table);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
|
void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
|
||||||
@ -488,11 +589,16 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
|
|||||||
mutex_init(&c->lock);
|
mutex_init(&c->lock);
|
||||||
INIT_LIST_HEAD(&c->freed);
|
INIT_LIST_HEAD(&c->freed);
|
||||||
INIT_LIST_HEAD(&c->clean);
|
INIT_LIST_HEAD(&c->clean);
|
||||||
|
INIT_LIST_HEAD(&c->dirty);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_fs_btree_key_cache_init(struct btree_key_cache *c)
|
int bch2_fs_btree_key_cache_init(struct btree_key_cache *c)
|
||||||
{
|
{
|
||||||
return rhashtable_init(&c->table, &bch2_btree_key_cache_params);
|
c->shrink.count_objects = bch2_btree_key_cache_count;
|
||||||
|
c->shrink.scan_objects = bch2_btree_key_cache_scan;
|
||||||
|
|
||||||
|
return register_shrinker(&c->shrink) ?:
|
||||||
|
rhashtable_init(&c->table, &bch2_btree_key_cache_params);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
|
void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
|
||||||
|
@ -158,6 +158,7 @@ struct btree_cache {
|
|||||||
/* Number of elements in live + freeable lists */
|
/* Number of elements in live + freeable lists */
|
||||||
unsigned used;
|
unsigned used;
|
||||||
unsigned reserve;
|
unsigned reserve;
|
||||||
|
atomic_t dirty;
|
||||||
struct shrinker shrink;
|
struct shrinker shrink;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -294,6 +295,11 @@ struct btree_key_cache {
|
|||||||
struct rhashtable table;
|
struct rhashtable table;
|
||||||
struct list_head freed;
|
struct list_head freed;
|
||||||
struct list_head clean;
|
struct list_head clean;
|
||||||
|
struct list_head dirty;
|
||||||
|
struct shrinker shrink;
|
||||||
|
|
||||||
|
size_t nr_keys;
|
||||||
|
size_t nr_dirty;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bkey_cached_key {
|
struct bkey_cached_key {
|
||||||
@ -309,6 +315,7 @@ struct bkey_cached {
|
|||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
u8 u64s;
|
u8 u64s;
|
||||||
bool valid;
|
bool valid;
|
||||||
|
u32 btree_trans_barrier_seq;
|
||||||
struct bkey_cached_key key;
|
struct bkey_cached_key key;
|
||||||
|
|
||||||
struct rhash_head hash;
|
struct rhash_head hash;
|
||||||
@ -345,6 +352,7 @@ struct btree_trans {
|
|||||||
pid_t pid;
|
pid_t pid;
|
||||||
#endif
|
#endif
|
||||||
unsigned long ip;
|
unsigned long ip;
|
||||||
|
int srcu_idx;
|
||||||
|
|
||||||
u64 iters_linked;
|
u64 iters_linked;
|
||||||
u64 iters_live;
|
u64 iters_live;
|
||||||
@ -411,7 +419,6 @@ enum btree_flags {
|
|||||||
|
|
||||||
BTREE_FLAG(read_in_flight);
|
BTREE_FLAG(read_in_flight);
|
||||||
BTREE_FLAG(read_error);
|
BTREE_FLAG(read_error);
|
||||||
BTREE_FLAG(dirty);
|
|
||||||
BTREE_FLAG(need_write);
|
BTREE_FLAG(need_write);
|
||||||
BTREE_FLAG(noevict);
|
BTREE_FLAG(noevict);
|
||||||
BTREE_FLAG(write_idx);
|
BTREE_FLAG(write_idx);
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include "btree_iter.h"
|
#include "btree_iter.h"
|
||||||
#include "btree_locking.h"
|
#include "btree_locking.h"
|
||||||
#include "buckets.h"
|
#include "buckets.h"
|
||||||
|
#include "error.h"
|
||||||
#include "extents.h"
|
#include "extents.h"
|
||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
#include "journal_reclaim.h"
|
#include "journal_reclaim.h"
|
||||||
@ -149,7 +150,7 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
|
|||||||
|
|
||||||
b->ob.nr = 0;
|
b->ob.nr = 0;
|
||||||
|
|
||||||
clear_btree_node_dirty(b);
|
clear_btree_node_dirty(c, b);
|
||||||
|
|
||||||
btree_node_lock_type(c, b, SIX_LOCK_write);
|
btree_node_lock_type(c, b, SIX_LOCK_write);
|
||||||
__btree_node_free(c, b);
|
__btree_node_free(c, b);
|
||||||
@ -264,7 +265,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
|||||||
b = as->prealloc_nodes[--as->nr_prealloc_nodes];
|
b = as->prealloc_nodes[--as->nr_prealloc_nodes];
|
||||||
|
|
||||||
set_btree_node_accessed(b);
|
set_btree_node_accessed(b);
|
||||||
set_btree_node_dirty(b);
|
set_btree_node_dirty(c, b);
|
||||||
set_btree_node_need_write(b);
|
set_btree_node_need_write(b);
|
||||||
|
|
||||||
bch2_bset_init_first(b, &b->data->keys);
|
bch2_bset_init_first(b, &b->data->keys);
|
||||||
@ -523,6 +524,7 @@ static void btree_update_nodes_written(struct btree_update *as)
|
|||||||
{
|
{
|
||||||
struct bch_fs *c = as->c;
|
struct bch_fs *c = as->c;
|
||||||
struct btree *b = as->b;
|
struct btree *b = as->b;
|
||||||
|
struct btree_trans trans;
|
||||||
u64 journal_seq = 0;
|
u64 journal_seq = 0;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
int ret;
|
int ret;
|
||||||
@ -540,7 +542,8 @@ static void btree_update_nodes_written(struct btree_update *as)
|
|||||||
* journal reclaim does btree updates when flushing bkey_cached entries,
|
* journal reclaim does btree updates when flushing bkey_cached entries,
|
||||||
* which may require allocations as well.
|
* which may require allocations as well.
|
||||||
*/
|
*/
|
||||||
ret = bch2_trans_do(c, &as->disk_res, &journal_seq,
|
bch2_trans_init(&trans, c, 0, 512);
|
||||||
|
ret = __bch2_trans_do(&trans, &as->disk_res, &journal_seq,
|
||||||
BTREE_INSERT_NOFAIL|
|
BTREE_INSERT_NOFAIL|
|
||||||
BTREE_INSERT_USE_RESERVE|
|
BTREE_INSERT_USE_RESERVE|
|
||||||
BTREE_INSERT_USE_ALLOC_RESERVE|
|
BTREE_INSERT_USE_ALLOC_RESERVE|
|
||||||
@ -548,6 +551,7 @@ static void btree_update_nodes_written(struct btree_update *as)
|
|||||||
BTREE_INSERT_JOURNAL_RECLAIM|
|
BTREE_INSERT_JOURNAL_RECLAIM|
|
||||||
BTREE_INSERT_JOURNAL_RESERVED,
|
BTREE_INSERT_JOURNAL_RESERVED,
|
||||||
btree_update_nodes_written_trans(&trans, as));
|
btree_update_nodes_written_trans(&trans, as));
|
||||||
|
bch2_trans_exit(&trans);
|
||||||
BUG_ON(ret && !bch2_journal_error(&c->journal));
|
BUG_ON(ret && !bch2_journal_error(&c->journal));
|
||||||
|
|
||||||
if (b) {
|
if (b) {
|
||||||
@ -827,7 +831,7 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
|||||||
closure_wake_up(&c->btree_interior_update_wait);
|
closure_wake_up(&c->btree_interior_update_wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
clear_btree_node_dirty(b);
|
clear_btree_node_dirty(c, b);
|
||||||
clear_btree_node_need_write(b);
|
clear_btree_node_need_write(b);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1018,7 +1022,18 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
|
|||||||
struct bkey_i *insert,
|
struct bkey_i *insert,
|
||||||
struct btree_node_iter *node_iter)
|
struct btree_node_iter *node_iter)
|
||||||
{
|
{
|
||||||
|
struct bch_fs *c = as->c;
|
||||||
struct bkey_packed *k;
|
struct bkey_packed *k;
|
||||||
|
const char *invalid;
|
||||||
|
|
||||||
|
invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b));
|
||||||
|
if (invalid) {
|
||||||
|
char buf[160];
|
||||||
|
|
||||||
|
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert));
|
||||||
|
bch2_fs_inconsistent(c, "inserting invalid bkey %s: %s", buf, invalid);
|
||||||
|
dump_stack();
|
||||||
|
}
|
||||||
|
|
||||||
BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
|
BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
|
||||||
ARRAY_SIZE(as->journal_entries));
|
ARRAY_SIZE(as->journal_entries));
|
||||||
@ -1034,7 +1049,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
|
|||||||
bch2_btree_node_iter_advance(node_iter, b);
|
bch2_btree_node_iter_advance(node_iter, b);
|
||||||
|
|
||||||
bch2_btree_bset_insert_key(iter, b, node_iter, insert);
|
bch2_btree_bset_insert_key(iter, b, node_iter, insert);
|
||||||
set_btree_node_dirty(b);
|
set_btree_node_dirty(c, b);
|
||||||
set_btree_node_need_write(b);
|
set_btree_node_need_write(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -237,6 +237,9 @@ static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
|
|||||||
b->whiteout_u64s;
|
b->whiteout_u64s;
|
||||||
ssize_t total = c->opts.btree_node_size << 6;
|
ssize_t total = c->opts.btree_node_size << 6;
|
||||||
|
|
||||||
|
/* Always leave one extra u64 for bch2_varint_decode: */
|
||||||
|
used++;
|
||||||
|
|
||||||
return total - used;
|
return total - used;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,7 +191,7 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
|
|||||||
bch2_btree_add_journal_pin(c, b, trans->journal_res.seq);
|
bch2_btree_add_journal_pin(c, b, trans->journal_res.seq);
|
||||||
|
|
||||||
if (unlikely(!btree_node_dirty(b)))
|
if (unlikely(!btree_node_dirty(b)))
|
||||||
set_btree_node_dirty(b);
|
set_btree_node_dirty(c, b);
|
||||||
|
|
||||||
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
|
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
|
||||||
u64s_added = (int) bset_u64s(t) - old_u64s;
|
u64s_added = (int) bset_u64s(t) - old_u64s;
|
||||||
|
@ -323,7 +323,7 @@ static u64 reserve_factor(u64 r)
|
|||||||
|
|
||||||
static u64 avail_factor(u64 r)
|
static u64 avail_factor(u64 r)
|
||||||
{
|
{
|
||||||
return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
|
return div_u64(r << RESERVE_FACTOR, (1 << RESERVE_FACTOR) + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
|
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
|
||||||
|
@ -35,6 +35,22 @@
|
|||||||
#include <trace/events/bcachefs.h>
|
#include <trace/events/bcachefs.h>
|
||||||
#include <trace/events/writeback.h>
|
#include <trace/events/writeback.h>
|
||||||
|
|
||||||
|
static inline struct address_space *faults_disabled_mapping(void)
|
||||||
|
{
|
||||||
|
return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void set_fdm_dropped_locks(void)
|
||||||
|
{
|
||||||
|
current->faults_disabled_mapping =
|
||||||
|
(void *) (((unsigned long) current->faults_disabled_mapping)|1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool fdm_dropped_locks(void)
|
||||||
|
{
|
||||||
|
return ((unsigned long) current->faults_disabled_mapping) & 1;
|
||||||
|
}
|
||||||
|
|
||||||
struct quota_res {
|
struct quota_res {
|
||||||
u64 sectors;
|
u64 sectors;
|
||||||
};
|
};
|
||||||
@ -493,10 +509,35 @@ static void bch2_set_page_dirty(struct bch_fs *c,
|
|||||||
vm_fault_t bch2_page_fault(struct vm_fault *vmf)
|
vm_fault_t bch2_page_fault(struct vm_fault *vmf)
|
||||||
{
|
{
|
||||||
struct file *file = vmf->vma->vm_file;
|
struct file *file = vmf->vma->vm_file;
|
||||||
|
struct address_space *mapping = file->f_mapping;
|
||||||
|
struct address_space *fdm = faults_disabled_mapping();
|
||||||
struct bch_inode_info *inode = file_bch_inode(file);
|
struct bch_inode_info *inode = file_bch_inode(file);
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
if (fdm == mapping)
|
||||||
|
return VM_FAULT_SIGBUS;
|
||||||
|
|
||||||
|
/* Lock ordering: */
|
||||||
|
if (fdm > mapping) {
|
||||||
|
struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
|
||||||
|
|
||||||
|
if (bch2_pagecache_add_tryget(&inode->ei_pagecache_lock))
|
||||||
|
goto got_lock;
|
||||||
|
|
||||||
|
bch2_pagecache_block_put(&fdm_host->ei_pagecache_lock);
|
||||||
|
|
||||||
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
|
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
|
||||||
|
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||||
|
|
||||||
|
bch2_pagecache_block_get(&fdm_host->ei_pagecache_lock);
|
||||||
|
|
||||||
|
/* Signal that lock has been dropped: */
|
||||||
|
set_fdm_dropped_locks();
|
||||||
|
return VM_FAULT_SIGBUS;
|
||||||
|
}
|
||||||
|
|
||||||
|
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
|
||||||
|
got_lock:
|
||||||
ret = filemap_fault(vmf);
|
ret = filemap_fault(vmf);
|
||||||
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||||
|
|
||||||
@ -1742,14 +1783,16 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
|||||||
struct bio *bio = &dio->op.wbio.bio;
|
struct bio *bio = &dio->op.wbio.bio;
|
||||||
struct bvec_iter_all iter;
|
struct bvec_iter_all iter;
|
||||||
struct bio_vec *bv;
|
struct bio_vec *bv;
|
||||||
unsigned unaligned;
|
unsigned unaligned, iter_count;
|
||||||
bool sync = dio->sync;
|
bool sync = dio->sync, dropped_locks;
|
||||||
long ret;
|
long ret;
|
||||||
|
|
||||||
if (dio->loop)
|
if (dio->loop)
|
||||||
goto loop;
|
goto loop;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
|
iter_count = dio->iter.count;
|
||||||
|
|
||||||
if (kthread)
|
if (kthread)
|
||||||
kthread_use_mm(dio->mm);
|
kthread_use_mm(dio->mm);
|
||||||
BUG_ON(current->faults_disabled_mapping);
|
BUG_ON(current->faults_disabled_mapping);
|
||||||
@ -1757,13 +1800,34 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
|||||||
|
|
||||||
ret = bio_iov_iter_get_pages(bio, &dio->iter);
|
ret = bio_iov_iter_get_pages(bio, &dio->iter);
|
||||||
|
|
||||||
|
dropped_locks = fdm_dropped_locks();
|
||||||
|
|
||||||
current->faults_disabled_mapping = NULL;
|
current->faults_disabled_mapping = NULL;
|
||||||
if (kthread)
|
if (kthread)
|
||||||
kthread_unuse_mm(dio->mm);
|
kthread_unuse_mm(dio->mm);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the fault handler returned an error but also signalled
|
||||||
|
* that it dropped & retook ei_pagecache_lock, we just need to
|
||||||
|
* re-shoot down the page cache and retry:
|
||||||
|
*/
|
||||||
|
if (dropped_locks && ret)
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
if (unlikely(ret < 0))
|
if (unlikely(ret < 0))
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
if (unlikely(dropped_locks)) {
|
||||||
|
ret = write_invalidate_inode_pages_range(mapping,
|
||||||
|
req->ki_pos,
|
||||||
|
req->ki_pos + iter_count - 1);
|
||||||
|
if (unlikely(ret))
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
if (!bio->bi_iter.bi_size)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
|
unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
|
||||||
bio->bi_iter.bi_size -= unaligned;
|
bio->bi_iter.bi_size -= unaligned;
|
||||||
iov_iter_revert(&dio->iter, unaligned);
|
iov_iter_revert(&dio->iter, unaligned);
|
||||||
|
@ -91,6 +91,11 @@ void bch2_pagecache_add_put(struct pagecache_lock *lock)
|
|||||||
__pagecache_lock_put(lock, 1);
|
__pagecache_lock_put(lock, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool bch2_pagecache_add_tryget(struct pagecache_lock *lock)
|
||||||
|
{
|
||||||
|
return __pagecache_lock_tryget(lock, 1);
|
||||||
|
}
|
||||||
|
|
||||||
void bch2_pagecache_add_get(struct pagecache_lock *lock)
|
void bch2_pagecache_add_get(struct pagecache_lock *lock)
|
||||||
{
|
{
|
||||||
__pagecache_lock_get(lock, 1);
|
__pagecache_lock_get(lock, 1);
|
||||||
@ -271,7 +276,8 @@ __bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
|
|||||||
if (!tmpfile)
|
if (!tmpfile)
|
||||||
mutex_lock(&dir->ei_update_lock);
|
mutex_lock(&dir->ei_update_lock);
|
||||||
|
|
||||||
bch2_trans_init(&trans, c, 8, 1024);
|
bch2_trans_init(&trans, c, 8,
|
||||||
|
2048 + (!tmpfile ? dentry->d_name.len : 0));
|
||||||
retry:
|
retry:
|
||||||
bch2_trans_begin(&trans);
|
bch2_trans_begin(&trans);
|
||||||
|
|
||||||
|
@ -26,6 +26,7 @@ static inline void pagecache_lock_init(struct pagecache_lock *lock)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void bch2_pagecache_add_put(struct pagecache_lock *);
|
void bch2_pagecache_add_put(struct pagecache_lock *);
|
||||||
|
bool bch2_pagecache_add_tryget(struct pagecache_lock *);
|
||||||
void bch2_pagecache_add_get(struct pagecache_lock *);
|
void bch2_pagecache_add_get(struct pagecache_lock *);
|
||||||
void bch2_pagecache_block_put(struct pagecache_lock *);
|
void bch2_pagecache_block_put(struct pagecache_lock *);
|
||||||
void bch2_pagecache_block_get(struct pagecache_lock *);
|
void bch2_pagecache_block_get(struct pagecache_lock *);
|
||||||
|
@ -537,7 +537,9 @@ found_slot:
|
|||||||
inode_u->bi_inum = k.k->p.offset;
|
inode_u->bi_inum = k.k->p.offset;
|
||||||
inode_u->bi_generation = bkey_generation(k);
|
inode_u->bi_generation = bkey_generation(k);
|
||||||
|
|
||||||
return bch2_inode_write(trans, iter, inode_u);
|
ret = bch2_inode_write(trans, iter, inode_u);
|
||||||
|
bch2_trans_iter_put(trans, iter);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
|
int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
|
||||||
@ -574,16 +576,9 @@ retry:
|
|||||||
|
|
||||||
bi_generation = 0;
|
bi_generation = 0;
|
||||||
|
|
||||||
ret = bch2_btree_key_cache_flush(&trans, BTREE_ID_INODES, POS(0, inode_nr));
|
|
||||||
if (ret) {
|
|
||||||
if (ret != -EINTR)
|
|
||||||
bch_err(c, "error flushing btree key cache: %i", ret);
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
|
|
||||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr),
|
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr),
|
||||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
BTREE_ITER_CACHED|BTREE_ITER_INTENT);
|
||||||
k = bch2_btree_iter_peek_slot(iter);
|
k = bch2_btree_iter_peek_cached(iter);
|
||||||
|
|
||||||
ret = bkey_err(k);
|
ret = bkey_err(k);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -18,7 +18,19 @@
|
|||||||
|
|
||||||
#include <trace/events/bcachefs.h>
|
#include <trace/events/bcachefs.h>
|
||||||
|
|
||||||
static inline struct journal_buf *journal_seq_to_buf(struct journal *, u64);
|
static u64 last_unwritten_seq(struct journal *j)
|
||||||
|
{
|
||||||
|
union journal_res_state s = READ_ONCE(j->reservations);
|
||||||
|
|
||||||
|
lockdep_assert_held(&j->lock);
|
||||||
|
|
||||||
|
return journal_cur_seq(j) - s.prev_buf_unwritten;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
|
||||||
|
{
|
||||||
|
return seq >= last_unwritten_seq(j);
|
||||||
|
}
|
||||||
|
|
||||||
static bool __journal_entry_is_open(union journal_res_state state)
|
static bool __journal_entry_is_open(union journal_res_state state)
|
||||||
{
|
{
|
||||||
@ -30,6 +42,22 @@ static bool journal_entry_is_open(struct journal *j)
|
|||||||
return __journal_entry_is_open(j->reservations);
|
return __journal_entry_is_open(j->reservations);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct journal_buf *
|
||||||
|
journal_seq_to_buf(struct journal *j, u64 seq)
|
||||||
|
{
|
||||||
|
struct journal_buf *buf = NULL;
|
||||||
|
|
||||||
|
EBUG_ON(seq > journal_cur_seq(j));
|
||||||
|
EBUG_ON(seq == journal_cur_seq(j) &&
|
||||||
|
j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);
|
||||||
|
|
||||||
|
if (journal_seq_unwritten(j, seq)) {
|
||||||
|
buf = j->buf + (seq & 1);
|
||||||
|
EBUG_ON(le64_to_cpu(buf->data->seq) != seq);
|
||||||
|
}
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
static void journal_pin_new_entry(struct journal *j, int count)
|
static void journal_pin_new_entry(struct journal *j, int count)
|
||||||
{
|
{
|
||||||
struct journal_entry_pin_list *p;
|
struct journal_entry_pin_list *p;
|
||||||
@ -51,6 +79,8 @@ static void bch2_journal_buf_init(struct journal *j)
|
|||||||
{
|
{
|
||||||
struct journal_buf *buf = journal_cur_buf(j);
|
struct journal_buf *buf = journal_cur_buf(j);
|
||||||
|
|
||||||
|
bkey_extent_init(&buf->key);
|
||||||
|
|
||||||
memset(buf->has_inode, 0, sizeof(buf->has_inode));
|
memset(buf->has_inode, 0, sizeof(buf->has_inode));
|
||||||
|
|
||||||
memset(buf->data, 0, sizeof(*buf->data));
|
memset(buf->data, 0, sizeof(*buf->data));
|
||||||
@ -72,6 +102,7 @@ void bch2_journal_halt(struct journal *j)
|
|||||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||||
old.v, new.v)) != old.v);
|
old.v, new.v)) != old.v);
|
||||||
|
|
||||||
|
j->err_seq = journal_cur_seq(j);
|
||||||
journal_wake(j);
|
journal_wake(j);
|
||||||
closure_wake_up(&journal_cur_buf(j)->wait);
|
closure_wake_up(&journal_cur_buf(j)->wait);
|
||||||
}
|
}
|
||||||
@ -139,8 +170,6 @@ static bool __journal_entry_close(struct journal *j)
|
|||||||
BUG_ON(sectors > buf->sectors);
|
BUG_ON(sectors > buf->sectors);
|
||||||
buf->sectors = sectors;
|
buf->sectors = sectors;
|
||||||
|
|
||||||
bkey_extent_init(&buf->key);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have to set last_seq here, _before_ opening a new journal entry:
|
* We have to set last_seq here, _before_ opening a new journal entry:
|
||||||
*
|
*
|
||||||
@ -162,11 +191,6 @@ static bool __journal_entry_close(struct journal *j)
|
|||||||
*/
|
*/
|
||||||
buf->data->last_seq = cpu_to_le64(journal_last_seq(j));
|
buf->data->last_seq = cpu_to_le64(journal_last_seq(j));
|
||||||
|
|
||||||
if (journal_entry_empty(buf->data))
|
|
||||||
clear_bit(JOURNAL_NOT_EMPTY, &j->flags);
|
|
||||||
else
|
|
||||||
set_bit(JOURNAL_NOT_EMPTY, &j->flags);
|
|
||||||
|
|
||||||
journal_pin_new_entry(j, 1);
|
journal_pin_new_entry(j, 1);
|
||||||
|
|
||||||
bch2_journal_buf_init(j);
|
bch2_journal_buf_init(j);
|
||||||
@ -391,8 +415,17 @@ unlock:
|
|||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
if (ret == -ENOSPC) {
|
if (ret == -ENOSPC) {
|
||||||
WARN_ONCE(!can_discard && (flags & JOURNAL_RES_GET_RESERVED),
|
if (WARN_ONCE(!can_discard && (flags & JOURNAL_RES_GET_RESERVED),
|
||||||
"JOURNAL_RES_GET_RESERVED set but journal full");
|
"JOURNAL_RES_GET_RESERVED set but journal full")) {
|
||||||
|
char *buf;
|
||||||
|
|
||||||
|
buf = kmalloc(4096, GFP_NOFS);
|
||||||
|
if (buf) {
|
||||||
|
bch2_journal_debug_to_text(&PBUF(buf), j);
|
||||||
|
pr_err("\n%s", buf);
|
||||||
|
kfree(buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Journal is full - can't rely on reclaim from work item due to
|
* Journal is full - can't rely on reclaim from work item due to
|
||||||
@ -503,146 +536,28 @@ out:
|
|||||||
|
|
||||||
/* journal flushing: */
|
/* journal flushing: */
|
||||||
|
|
||||||
u64 bch2_journal_last_unwritten_seq(struct journal *j)
|
|
||||||
{
|
|
||||||
u64 seq;
|
|
||||||
|
|
||||||
spin_lock(&j->lock);
|
|
||||||
seq = journal_cur_seq(j);
|
|
||||||
if (j->reservations.prev_buf_unwritten)
|
|
||||||
seq--;
|
|
||||||
spin_unlock(&j->lock);
|
|
||||||
|
|
||||||
return seq;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* bch2_journal_open_seq_async - try to open a new journal entry if @seq isn't
|
|
||||||
* open yet, or wait if we cannot
|
|
||||||
*
|
|
||||||
* used by the btree interior update machinery, when it needs to write a new
|
|
||||||
* btree root - every journal entry contains the roots of all the btrees, so it
|
|
||||||
* doesn't need to bother with getting a journal reservation
|
|
||||||
*/
|
|
||||||
int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
|
|
||||||
{
|
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
spin_lock(&j->lock);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Can't try to open more than one sequence number ahead:
|
|
||||||
*/
|
|
||||||
BUG_ON(journal_cur_seq(j) < seq && !journal_entry_is_open(j));
|
|
||||||
|
|
||||||
if (journal_cur_seq(j) > seq ||
|
|
||||||
journal_entry_is_open(j)) {
|
|
||||||
spin_unlock(&j->lock);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (journal_cur_seq(j) < seq &&
|
|
||||||
!__journal_entry_close(j)) {
|
|
||||||
/* haven't finished writing out the previous one: */
|
|
||||||
trace_journal_entry_full(c);
|
|
||||||
ret = -EAGAIN;
|
|
||||||
} else {
|
|
||||||
BUG_ON(journal_cur_seq(j) != seq);
|
|
||||||
|
|
||||||
ret = journal_entry_open(j);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((ret == -EAGAIN || ret == -ENOSPC) &&
|
|
||||||
!j->res_get_blocked_start)
|
|
||||||
j->res_get_blocked_start = local_clock() ?: 1;
|
|
||||||
|
|
||||||
if (ret == -EAGAIN || ret == -ENOSPC)
|
|
||||||
closure_wait(&j->async_wait, cl);
|
|
||||||
|
|
||||||
spin_unlock(&j->lock);
|
|
||||||
|
|
||||||
if (ret == -ENOSPC) {
|
|
||||||
trace_journal_full(c);
|
|
||||||
bch2_journal_reclaim_work(&j->reclaim_work.work);
|
|
||||||
ret = -EAGAIN;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int journal_seq_error(struct journal *j, u64 seq)
|
|
||||||
{
|
|
||||||
union journal_res_state state = READ_ONCE(j->reservations);
|
|
||||||
|
|
||||||
if (seq == journal_cur_seq(j))
|
|
||||||
return bch2_journal_error(j);
|
|
||||||
|
|
||||||
if (seq + 1 == journal_cur_seq(j) &&
|
|
||||||
!state.prev_buf_unwritten &&
|
|
||||||
seq > j->seq_ondisk)
|
|
||||||
return -EIO;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline struct journal_buf *
|
|
||||||
journal_seq_to_buf(struct journal *j, u64 seq)
|
|
||||||
{
|
|
||||||
/* seq should be for a journal entry that has been opened: */
|
|
||||||
BUG_ON(seq > journal_cur_seq(j));
|
|
||||||
BUG_ON(seq == journal_cur_seq(j) &&
|
|
||||||
j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);
|
|
||||||
|
|
||||||
if (seq == journal_cur_seq(j))
|
|
||||||
return journal_cur_buf(j);
|
|
||||||
if (seq + 1 == journal_cur_seq(j) &&
|
|
||||||
j->reservations.prev_buf_unwritten)
|
|
||||||
return journal_prev_buf(j);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* bch2_journal_wait_on_seq - wait for a journal entry to be written
|
|
||||||
*
|
|
||||||
* does _not_ cause @seq to be written immediately - if there is no other
|
|
||||||
* activity to cause the relevant journal entry to be filled up or flushed it
|
|
||||||
* can wait for an arbitrary amount of time (up to @j->write_delay_ms, which is
|
|
||||||
* configurable).
|
|
||||||
*/
|
|
||||||
void bch2_journal_wait_on_seq(struct journal *j, u64 seq,
|
|
||||||
struct closure *parent)
|
|
||||||
{
|
|
||||||
struct journal_buf *buf;
|
|
||||||
|
|
||||||
spin_lock(&j->lock);
|
|
||||||
|
|
||||||
if ((buf = journal_seq_to_buf(j, seq))) {
|
|
||||||
if (!closure_wait(&buf->wait, parent))
|
|
||||||
BUG();
|
|
||||||
|
|
||||||
if (seq == journal_cur_seq(j)) {
|
|
||||||
smp_mb();
|
|
||||||
if (bch2_journal_error(j))
|
|
||||||
closure_wake_up(&buf->wait);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_unlock(&j->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* bch2_journal_flush_seq_async - wait for a journal entry to be written
|
* bch2_journal_flush_seq_async - wait for a journal entry to be written
|
||||||
*
|
*
|
||||||
* like bch2_journal_wait_on_seq, except that it triggers a write immediately if
|
* like bch2_journal_wait_on_seq, except that it triggers a write immediately if
|
||||||
* necessary
|
* necessary
|
||||||
*/
|
*/
|
||||||
void bch2_journal_flush_seq_async(struct journal *j, u64 seq,
|
int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
|
||||||
struct closure *parent)
|
struct closure *parent)
|
||||||
{
|
{
|
||||||
struct journal_buf *buf;
|
struct journal_buf *buf;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
spin_lock(&j->lock);
|
spin_lock(&j->lock);
|
||||||
|
if (seq <= j->err_seq) {
|
||||||
|
ret = -EIO;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (seq <= j->seq_ondisk) {
|
||||||
|
ret = 1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
if (parent &&
|
if (parent &&
|
||||||
(buf = journal_seq_to_buf(j, seq)))
|
(buf = journal_seq_to_buf(j, seq)))
|
||||||
@ -651,20 +566,8 @@ void bch2_journal_flush_seq_async(struct journal *j, u64 seq,
|
|||||||
|
|
||||||
if (seq == journal_cur_seq(j))
|
if (seq == journal_cur_seq(j))
|
||||||
__journal_entry_close(j);
|
__journal_entry_close(j);
|
||||||
|
out:
|
||||||
spin_unlock(&j->lock);
|
spin_unlock(&j->lock);
|
||||||
}
|
|
||||||
|
|
||||||
static int journal_seq_flushed(struct journal *j, u64 seq)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
spin_lock(&j->lock);
|
|
||||||
ret = seq <= j->seq_ondisk ? 1 : journal_seq_error(j, seq);
|
|
||||||
|
|
||||||
if (seq == journal_cur_seq(j))
|
|
||||||
__journal_entry_close(j);
|
|
||||||
spin_unlock(&j->lock);
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -673,28 +576,13 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq)
|
|||||||
u64 start_time = local_clock();
|
u64 start_time = local_clock();
|
||||||
int ret, ret2;
|
int ret, ret2;
|
||||||
|
|
||||||
ret = wait_event_killable(j->wait, (ret2 = journal_seq_flushed(j, seq)));
|
ret = wait_event_killable(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL)));
|
||||||
|
|
||||||
bch2_time_stats_update(j->flush_seq_time, start_time);
|
bch2_time_stats_update(j->flush_seq_time, start_time);
|
||||||
|
|
||||||
return ret ?: ret2 < 0 ? ret2 : 0;
|
return ret ?: ret2 < 0 ? ret2 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* bch2_journal_meta_async - force a journal entry to be written
|
|
||||||
*/
|
|
||||||
void bch2_journal_meta_async(struct journal *j, struct closure *parent)
|
|
||||||
{
|
|
||||||
struct journal_res res;
|
|
||||||
|
|
||||||
memset(&res, 0, sizeof(res));
|
|
||||||
|
|
||||||
bch2_journal_res_get(j, &res, jset_u64s(0), 0);
|
|
||||||
bch2_journal_res_put(j, &res);
|
|
||||||
|
|
||||||
bch2_journal_flush_seq_async(j, res.seq, parent);
|
|
||||||
}
|
|
||||||
|
|
||||||
int bch2_journal_meta(struct journal *j)
|
int bch2_journal_meta(struct journal *j)
|
||||||
{
|
{
|
||||||
struct journal_res res;
|
struct journal_res res;
|
||||||
@ -989,7 +877,8 @@ void bch2_fs_journal_stop(struct journal *j)
|
|||||||
journal_quiesce(j);
|
journal_quiesce(j);
|
||||||
|
|
||||||
BUG_ON(!bch2_journal_error(j) &&
|
BUG_ON(!bch2_journal_error(j) &&
|
||||||
test_bit(JOURNAL_NOT_EMPTY, &j->flags));
|
(journal_entry_is_open(j) ||
|
||||||
|
j->last_empty_seq + 1 != journal_cur_seq(j)));
|
||||||
|
|
||||||
cancel_delayed_work_sync(&j->write_work);
|
cancel_delayed_work_sync(&j->write_work);
|
||||||
cancel_delayed_work_sync(&j->reclaim_work);
|
cancel_delayed_work_sync(&j->reclaim_work);
|
||||||
@ -1047,6 +936,9 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
|
|||||||
set_bit(JOURNAL_STARTED, &j->flags);
|
set_bit(JOURNAL_STARTED, &j->flags);
|
||||||
|
|
||||||
journal_pin_new_entry(j, 1);
|
journal_pin_new_entry(j, 1);
|
||||||
|
|
||||||
|
j->reservations.idx = journal_cur_seq(j);
|
||||||
|
|
||||||
bch2_journal_buf_init(j);
|
bch2_journal_buf_init(j);
|
||||||
|
|
||||||
c->last_bucket_seq_cleanup = journal_cur_seq(j);
|
c->last_bucket_seq_cleanup = journal_cur_seq(j);
|
||||||
|
@ -464,13 +464,8 @@ void bch2_journal_entry_res_resize(struct journal *,
|
|||||||
struct journal_entry_res *,
|
struct journal_entry_res *,
|
||||||
unsigned);
|
unsigned);
|
||||||
|
|
||||||
u64 bch2_journal_last_unwritten_seq(struct journal *);
|
int bch2_journal_flush_seq_async(struct journal *, u64, struct closure *);
|
||||||
int bch2_journal_open_seq_async(struct journal *, u64, struct closure *);
|
|
||||||
|
|
||||||
void bch2_journal_wait_on_seq(struct journal *, u64, struct closure *);
|
|
||||||
void bch2_journal_flush_seq_async(struct journal *, u64, struct closure *);
|
|
||||||
void bch2_journal_flush_async(struct journal *, struct closure *);
|
void bch2_journal_flush_async(struct journal *, struct closure *);
|
||||||
void bch2_journal_meta_async(struct journal *, struct closure *);
|
|
||||||
|
|
||||||
int bch2_journal_flush_seq(struct journal *, u64);
|
int bch2_journal_flush_seq(struct journal *, u64);
|
||||||
int bch2_journal_flush(struct journal *);
|
int bch2_journal_flush(struct journal *);
|
||||||
|
@ -161,6 +161,8 @@ static void journal_entry_null_range(void *start, void *end)
|
|||||||
#define journal_entry_err_on(cond, c, msg, ...) \
|
#define journal_entry_err_on(cond, c, msg, ...) \
|
||||||
((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false)
|
((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false)
|
||||||
|
|
||||||
|
#define FSCK_DELETED_KEY 5
|
||||||
|
|
||||||
static int journal_validate_key(struct bch_fs *c, struct jset *jset,
|
static int journal_validate_key(struct bch_fs *c, struct jset *jset,
|
||||||
struct jset_entry *entry,
|
struct jset_entry *entry,
|
||||||
unsigned level, enum btree_id btree_id,
|
unsigned level, enum btree_id btree_id,
|
||||||
@ -173,28 +175,42 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (journal_entry_err_on(!k->k.u64s, c,
|
if (journal_entry_err_on(!k->k.u64s, c,
|
||||||
"invalid %s in journal: k->u64s 0", type)) {
|
"invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: k->u64s 0",
|
||||||
|
type, le64_to_cpu(jset->seq),
|
||||||
|
(u64 *) entry - jset->_data,
|
||||||
|
le32_to_cpu(jset->u64s),
|
||||||
|
(u64 *) k - entry->_data,
|
||||||
|
le16_to_cpu(entry->u64s))) {
|
||||||
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
|
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
|
||||||
journal_entry_null_range(vstruct_next(entry), next);
|
journal_entry_null_range(vstruct_next(entry), next);
|
||||||
return 0;
|
return FSCK_DELETED_KEY;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (journal_entry_err_on((void *) bkey_next(k) >
|
if (journal_entry_err_on((void *) bkey_next(k) >
|
||||||
(void *) vstruct_next(entry), c,
|
(void *) vstruct_next(entry), c,
|
||||||
"invalid %s in journal: extends past end of journal entry",
|
"invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: extends past end of journal entry",
|
||||||
type)) {
|
type, le64_to_cpu(jset->seq),
|
||||||
|
(u64 *) entry - jset->_data,
|
||||||
|
le32_to_cpu(jset->u64s),
|
||||||
|
(u64 *) k - entry->_data,
|
||||||
|
le16_to_cpu(entry->u64s))) {
|
||||||
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
|
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
|
||||||
journal_entry_null_range(vstruct_next(entry), next);
|
journal_entry_null_range(vstruct_next(entry), next);
|
||||||
return 0;
|
return FSCK_DELETED_KEY;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
|
if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
|
||||||
"invalid %s in journal: bad format %u",
|
"invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: bad format %u",
|
||||||
type, k->k.format)) {
|
type, le64_to_cpu(jset->seq),
|
||||||
le16_add_cpu(&entry->u64s, -k->k.u64s);
|
(u64 *) entry - jset->_data,
|
||||||
|
le32_to_cpu(jset->u64s),
|
||||||
|
(u64 *) k - entry->_data,
|
||||||
|
le16_to_cpu(entry->u64s),
|
||||||
|
k->k.format)) {
|
||||||
|
le16_add_cpu(&entry->u64s, -((u16) k->k.u64s));
|
||||||
memmove(k, bkey_next(k), next - (void *) bkey_next(k));
|
memmove(k, bkey_next(k), next - (void *) bkey_next(k));
|
||||||
journal_entry_null_range(vstruct_next(entry), next);
|
journal_entry_null_range(vstruct_next(entry), next);
|
||||||
return 0;
|
return FSCK_DELETED_KEY;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!write)
|
if (!write)
|
||||||
@ -208,13 +224,18 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
|
|||||||
char buf[160];
|
char buf[160];
|
||||||
|
|
||||||
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k));
|
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k));
|
||||||
mustfix_fsck_err(c, "invalid %s in journal: %s\n%s",
|
mustfix_fsck_err(c, "invalid %s in jset %llu offset %zi/%u entry offset %zi/%u: %s\n%s",
|
||||||
type, invalid, buf);
|
type, le64_to_cpu(jset->seq),
|
||||||
|
(u64 *) entry - jset->_data,
|
||||||
|
le32_to_cpu(jset->u64s),
|
||||||
|
(u64 *) k - entry->_data,
|
||||||
|
le16_to_cpu(entry->u64s),
|
||||||
|
invalid, buf);
|
||||||
|
|
||||||
le16_add_cpu(&entry->u64s, -k->k.u64s);
|
le16_add_cpu(&entry->u64s, -((u16) k->k.u64s));
|
||||||
memmove(k, bkey_next(k), next - (void *) bkey_next(k));
|
memmove(k, bkey_next(k), next - (void *) bkey_next(k));
|
||||||
journal_entry_null_range(vstruct_next(entry), next);
|
journal_entry_null_range(vstruct_next(entry), next);
|
||||||
return 0;
|
return FSCK_DELETED_KEY;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (write)
|
if (write)
|
||||||
@ -230,15 +251,17 @@ static int journal_entry_validate_btree_keys(struct bch_fs *c,
|
|||||||
struct jset_entry *entry,
|
struct jset_entry *entry,
|
||||||
int write)
|
int write)
|
||||||
{
|
{
|
||||||
struct bkey_i *k;
|
struct bkey_i *k = entry->start;
|
||||||
|
|
||||||
vstruct_for_each(entry, k) {
|
while (k != vstruct_last(entry)) {
|
||||||
int ret = journal_validate_key(c, jset, entry,
|
int ret = journal_validate_key(c, jset, entry,
|
||||||
entry->level,
|
entry->level,
|
||||||
entry->btree_id,
|
entry->btree_id,
|
||||||
k, "key", write);
|
k, "key", write);
|
||||||
if (ret)
|
if (ret == FSCK_DELETED_KEY)
|
||||||
return ret;
|
continue;
|
||||||
|
|
||||||
|
k = bkey_next(k);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -432,46 +455,45 @@ static int jset_validate(struct bch_fs *c,
|
|||||||
"%s sector %llu seq %llu: unknown journal entry version %u",
|
"%s sector %llu seq %llu: unknown journal entry version %u",
|
||||||
ca->name, sector, le64_to_cpu(jset->seq),
|
ca->name, sector, le64_to_cpu(jset->seq),
|
||||||
version)) {
|
version)) {
|
||||||
/* XXX: note we might have missing journal entries */
|
/* don't try to continue: */
|
||||||
return JOURNAL_ENTRY_BAD;
|
return EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (bytes > (sectors_read << 9) &&
|
||||||
|
sectors_read < bucket_sectors_left)
|
||||||
|
return JOURNAL_ENTRY_REREAD;
|
||||||
|
|
||||||
if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
|
if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
|
||||||
"%s sector %llu seq %llu: journal entry too big (%zu bytes)",
|
"%s sector %llu seq %llu: journal entry too big (%zu bytes)",
|
||||||
ca->name, sector, le64_to_cpu(jset->seq), bytes)) {
|
ca->name, sector, le64_to_cpu(jset->seq), bytes)) {
|
||||||
/* XXX: note we might have missing journal entries */
|
ret = JOURNAL_ENTRY_BAD;
|
||||||
return JOURNAL_ENTRY_BAD;
|
le32_add_cpu(&jset->u64s,
|
||||||
|
-((bytes - (bucket_sectors_left << 9)) / 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bytes > sectors_read << 9)
|
|
||||||
return JOURNAL_ENTRY_REREAD;
|
|
||||||
|
|
||||||
if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
|
if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
|
||||||
"%s sector %llu seq %llu: journal entry with unknown csum type %llu",
|
"%s sector %llu seq %llu: journal entry with unknown csum type %llu",
|
||||||
ca->name, sector, le64_to_cpu(jset->seq),
|
ca->name, sector, le64_to_cpu(jset->seq),
|
||||||
JSET_CSUM_TYPE(jset)))
|
JSET_CSUM_TYPE(jset))) {
|
||||||
return JOURNAL_ENTRY_BAD;
|
ret = JOURNAL_ENTRY_BAD;
|
||||||
|
goto bad_csum_type;
|
||||||
|
}
|
||||||
|
|
||||||
csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
|
csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
|
||||||
if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
|
if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
|
||||||
"%s sector %llu seq %llu: journal checksum bad",
|
"%s sector %llu seq %llu: journal checksum bad",
|
||||||
ca->name, sector, le64_to_cpu(jset->seq))) {
|
ca->name, sector, le64_to_cpu(jset->seq)))
|
||||||
/* XXX: retry IO, when we start retrying checksum errors */
|
ret = JOURNAL_ENTRY_BAD;
|
||||||
/* XXX: note we might have missing journal entries */
|
|
||||||
return JOURNAL_ENTRY_BAD;
|
|
||||||
}
|
|
||||||
|
|
||||||
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
|
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
|
||||||
jset->encrypted_start,
|
jset->encrypted_start,
|
||||||
vstruct_end(jset) - (void *) jset->encrypted_start);
|
vstruct_end(jset) - (void *) jset->encrypted_start);
|
||||||
|
bad_csum_type:
|
||||||
if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
|
if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
|
||||||
"invalid journal entry: last_seq > seq")) {
|
"invalid journal entry: last_seq > seq")) {
|
||||||
jset->last_seq = jset->seq;
|
jset->last_seq = jset->seq;
|
||||||
return JOURNAL_ENTRY_BAD;
|
return JOURNAL_ENTRY_BAD;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
|
||||||
fsck_err:
|
fsck_err:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -939,24 +961,29 @@ static void journal_write_done(struct closure *cl)
|
|||||||
struct bch_replicas_padded replicas;
|
struct bch_replicas_padded replicas;
|
||||||
u64 seq = le64_to_cpu(w->data->seq);
|
u64 seq = le64_to_cpu(w->data->seq);
|
||||||
u64 last_seq = le64_to_cpu(w->data->last_seq);
|
u64 last_seq = le64_to_cpu(w->data->last_seq);
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
bch2_time_stats_update(j->write_time, j->write_start_time);
|
bch2_time_stats_update(j->write_time, j->write_start_time);
|
||||||
|
|
||||||
if (!devs.nr) {
|
if (!devs.nr) {
|
||||||
bch_err(c, "unable to write journal to sufficient devices");
|
bch_err(c, "unable to write journal to sufficient devices");
|
||||||
goto err;
|
err = -EIO;
|
||||||
|
} else {
|
||||||
|
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, devs);
|
||||||
|
if (bch2_mark_replicas(c, &replicas.e))
|
||||||
|
err = -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, devs);
|
if (err)
|
||||||
|
bch2_fatal_error(c);
|
||||||
if (bch2_mark_replicas(c, &replicas.e))
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
spin_lock(&j->lock);
|
spin_lock(&j->lock);
|
||||||
if (seq >= j->pin.front)
|
if (seq >= j->pin.front)
|
||||||
journal_seq_pin(j, seq)->devs = devs;
|
journal_seq_pin(j, seq)->devs = devs;
|
||||||
|
|
||||||
j->seq_ondisk = seq;
|
j->seq_ondisk = seq;
|
||||||
|
if (err && (!j->err_seq || seq < j->err_seq))
|
||||||
|
j->err_seq = seq;
|
||||||
j->last_seq_ondisk = last_seq;
|
j->last_seq_ondisk = last_seq;
|
||||||
bch2_journal_space_available(j);
|
bch2_journal_space_available(j);
|
||||||
|
|
||||||
@ -968,7 +995,7 @@ static void journal_write_done(struct closure *cl)
|
|||||||
* bch2_fs_journal_stop():
|
* bch2_fs_journal_stop():
|
||||||
*/
|
*/
|
||||||
mod_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
|
mod_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
|
||||||
out:
|
|
||||||
/* also must come before signalling write completion: */
|
/* also must come before signalling write completion: */
|
||||||
closure_debug_destroy(cl);
|
closure_debug_destroy(cl);
|
||||||
|
|
||||||
@ -982,11 +1009,6 @@ out:
|
|||||||
if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
|
if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
|
||||||
mod_delayed_work(system_freezable_wq, &j->write_work, 0);
|
mod_delayed_work(system_freezable_wq, &j->write_work, 0);
|
||||||
spin_unlock(&j->lock);
|
spin_unlock(&j->lock);
|
||||||
return;
|
|
||||||
err:
|
|
||||||
bch2_fatal_error(c);
|
|
||||||
spin_lock(&j->lock);
|
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void journal_write_endio(struct bio *bio)
|
static void journal_write_endio(struct bio *bio)
|
||||||
@ -1067,6 +1089,9 @@ void bch2_journal_write(struct closure *cl)
|
|||||||
SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
|
SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
|
||||||
SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
|
SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
|
||||||
|
|
||||||
|
if (journal_entry_empty(jset))
|
||||||
|
j->last_empty_seq = le64_to_cpu(jset->seq);
|
||||||
|
|
||||||
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
|
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
|
||||||
validate_before_checksum = true;
|
validate_before_checksum = true;
|
||||||
|
|
||||||
|
@ -263,6 +263,7 @@ static void bch2_journal_reclaim_fast(struct journal *j)
|
|||||||
while (!fifo_empty(&j->pin) &&
|
while (!fifo_empty(&j->pin) &&
|
||||||
!atomic_read(&fifo_peek_front(&j->pin).count)) {
|
!atomic_read(&fifo_peek_front(&j->pin).count)) {
|
||||||
BUG_ON(!list_empty(&fifo_peek_front(&j->pin).list));
|
BUG_ON(!list_empty(&fifo_peek_front(&j->pin).list));
|
||||||
|
BUG_ON(!list_empty(&fifo_peek_front(&j->pin).flushed));
|
||||||
BUG_ON(!fifo_pop(&j->pin, temp));
|
BUG_ON(!fifo_pop(&j->pin, temp));
|
||||||
popped = true;
|
popped = true;
|
||||||
}
|
}
|
||||||
@ -547,6 +548,12 @@ void bch2_journal_reclaim(struct journal *j)
|
|||||||
|
|
||||||
if (j->prereserved.reserved * 2 > j->prereserved.remaining)
|
if (j->prereserved.reserved * 2 > j->prereserved.remaining)
|
||||||
min_nr = 1;
|
min_nr = 1;
|
||||||
|
|
||||||
|
if ((atomic_read(&c->btree_cache.dirty) * 4 >
|
||||||
|
c->btree_cache.used * 3) ||
|
||||||
|
(c->btree_key_cache.nr_dirty * 4 >
|
||||||
|
c->btree_key_cache.nr_keys))
|
||||||
|
min_nr = 1;
|
||||||
} while (journal_flush_pins(j, seq_to_flush, min_nr));
|
} while (journal_flush_pins(j, seq_to_flush, min_nr));
|
||||||
|
|
||||||
if (!bch2_journal_error(j))
|
if (!bch2_journal_error(j))
|
||||||
|
@ -127,7 +127,6 @@ enum {
|
|||||||
JOURNAL_STARTED,
|
JOURNAL_STARTED,
|
||||||
JOURNAL_RECLAIM_STARTED,
|
JOURNAL_RECLAIM_STARTED,
|
||||||
JOURNAL_NEED_WRITE,
|
JOURNAL_NEED_WRITE,
|
||||||
JOURNAL_NOT_EMPTY,
|
|
||||||
JOURNAL_MAY_GET_UNRESERVED,
|
JOURNAL_MAY_GET_UNRESERVED,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -181,6 +180,8 @@ struct journal {
|
|||||||
/* seq, last_seq from the most recent journal entry successfully written */
|
/* seq, last_seq from the most recent journal entry successfully written */
|
||||||
u64 seq_ondisk;
|
u64 seq_ondisk;
|
||||||
u64 last_seq_ondisk;
|
u64 last_seq_ondisk;
|
||||||
|
u64 err_seq;
|
||||||
|
u64 last_empty_seq;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* FIFO of journal entries whose btree updates have not yet been
|
* FIFO of journal entries whose btree updates have not yet been
|
||||||
|
@ -456,6 +456,7 @@ retry:
|
|||||||
__bch2_btree_iter_set_pos(split_iter, split->k.p, false);
|
__bch2_btree_iter_set_pos(split_iter, split->k.p, false);
|
||||||
bch2_trans_update(&trans, split_iter, split,
|
bch2_trans_update(&trans, split_iter, split,
|
||||||
BTREE_TRIGGER_NORUN);
|
BTREE_TRIGGER_NORUN);
|
||||||
|
bch2_trans_iter_put(&trans, split_iter);
|
||||||
|
|
||||||
bch2_btree_iter_set_pos(iter, split->k.p);
|
bch2_btree_iter_set_pos(iter, split->k.p);
|
||||||
|
|
||||||
@ -481,6 +482,8 @@ retry:
|
|||||||
BTREE_INSERT_LAZY_RW|
|
BTREE_INSERT_LAZY_RW|
|
||||||
BTREE_INSERT_JOURNAL_REPLAY);
|
BTREE_INSERT_JOURNAL_REPLAY);
|
||||||
err:
|
err:
|
||||||
|
bch2_trans_iter_put(&trans, iter);
|
||||||
|
|
||||||
if (ret == -EINTR)
|
if (ret == -EINTR)
|
||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
|
@ -458,7 +458,7 @@ STORE(bch2_fs)
|
|||||||
/* Debugging: */
|
/* Debugging: */
|
||||||
|
|
||||||
if (attr == &sysfs_trigger_journal_flush)
|
if (attr == &sysfs_trigger_journal_flush)
|
||||||
bch2_journal_meta_async(&c->journal, NULL);
|
bch2_journal_meta(&c->journal);
|
||||||
|
|
||||||
if (attr == &sysfs_trigger_btree_coalesce)
|
if (attr == &sysfs_trigger_btree_coalesce)
|
||||||
bch2_coalesce(c);
|
bch2_coalesce(c);
|
||||||
|
Loading…
Reference in New Issue
Block a user