Update bcachefs sources to 04036b4910 bcachefs: Fix a memory leak

This commit is contained in:
Kent Overstreet 2022-02-27 12:01:32 -05:00
parent b1a2ab6eef
commit 03498f9464
19 changed files with 369 additions and 209 deletions

View File

@ -1 +1 @@
31718a290491ef933e0bfc5fb666a197b08a4d10
04036b491089aeb4bac5d796ae1716d019564f7a

View File

@ -918,6 +918,14 @@ TRACE_EVENT(trans_restart_mem_realloced,
__entry->bytes)
);
DEFINE_EVENT(transaction_restart_iter, trans_restart_key_cache_key_realloced,
TP_PROTO(const char *trans_fn,
unsigned long caller_ip,
enum btree_id btree_id,
struct bpos *pos),
TP_ARGS(trans_fn, caller_ip, btree_id, pos)
);
#endif /* _TRACE_BCACHE_H */
/* This part must be outside protection */

View File

@ -534,14 +534,10 @@ enum {
BCH_FS_NEED_ANOTHER_GC,
BCH_FS_DELETED_NODES,
BCH_FS_REBUILD_REPLICAS,
BCH_FS_HOLD_BTREE_WRITES,
};
struct btree_debug {
unsigned id;
struct dentry *btree;
struct dentry *btree_format;
struct dentry *failed;
};
struct bch_fs_pcpu {
@ -886,7 +882,8 @@ struct bch_fs {
struct bch_memquota_type quotas[QTYP_NR];
/* DEBUG JUNK */
struct dentry *debug;
struct dentry *fs_debug_dir;
struct dentry *btree_debug_dir;
struct btree_debug btree_debug[BTREE_ID_NR];
struct btree *verify_data;
struct btree_node *verify_ondisk;

View File

@ -15,6 +15,13 @@
struct lock_class_key bch2_btree_node_lock_key;
const char * const bch2_btree_node_flags[] = {
#define x(f) #f,
BTREE_FLAGS()
#undef x
NULL
};
void bch2_recalc_btree_reserve(struct bch_fs *c)
{
unsigned i, reserve = 16;
@ -217,15 +224,13 @@ wait_on_io:
goto wait_on_io;
}
if (btree_node_noevict(b))
goto out_unlock;
if (!btree_node_may_write(b))
if (btree_node_noevict(b) ||
btree_node_write_blocked(b) ||
btree_node_will_make_reachable(b))
goto out_unlock;
if (btree_node_dirty(b)) {
if (!flush ||
test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
if (!flush)
goto out_unlock;
/*
* Using the underscore version because we don't want to compact
@ -234,9 +239,9 @@ wait_on_io:
* the post write cleanup:
*/
if (bch2_verify_btree_ondisk)
bch2_btree_node_write(c, b, SIX_LOCK_intent);
bch2_btree_node_write(c, b, SIX_LOCK_intent, 0);
else
__bch2_btree_node_write(c, b, false);
__bch2_btree_node_write(c, b, 0);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
@ -415,7 +420,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
if (btree_node_dirty(b))
bch2_btree_complete_write(c, b, btree_current_write(b));
clear_btree_node_dirty(c, b);
clear_btree_node_dirty_acct(c, b);
btree_node_data_free(c, b);
}
@ -1059,7 +1064,7 @@ wait_on_io:
six_lock_write(&b->c.lock, NULL, NULL);
if (btree_node_dirty(b)) {
__bch2_btree_node_write(c, b, false);
__bch2_btree_node_write(c, b, 0);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
goto wait_on_io;

View File

@ -7,6 +7,8 @@
extern struct lock_class_key bch2_btree_node_lock_key;
extern const char * const bch2_btree_node_flags[];
struct btree_iter;
void bch2_recalc_btree_reserve(struct bch_fs *);

View File

@ -1059,6 +1059,9 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only)
bch2_trans_init(&trans, c, 0, 0);
if (initial)
trans.is_initial_gc = true;
for (i = 0; i < BTREE_ID_NR; i++)
ids[i] = i;
bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);

View File

@ -477,7 +477,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
};
if (log_u64s[1] >= (log_u64s[0] + log_u64s[2]) / 2) {
bch2_btree_node_write(c, b, SIX_LOCK_write);
bch2_btree_node_write(c, b, SIX_LOCK_write, 0);
reinit_iter = true;
}
}
@ -1596,29 +1596,13 @@ void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
bch2_journal_pin_drop(&c->journal, &w->journal);
}
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
{
struct btree_write *w = btree_prev_write(b);
unsigned long old, new, v;
bch2_btree_complete_write(c, b, w);
v = READ_ONCE(b->flags);
do {
old = new = v;
if (old & (1U << BTREE_NODE_need_write))
goto do_write;
new &= ~(1U << BTREE_NODE_write_in_flight);
new &= ~(1U << BTREE_NODE_write_in_flight_inner);
} while ((v = cmpxchg(&b->flags, old, new)) != old);
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
return;
do_write:
six_lock_read(&b->c.lock, NULL, NULL);
v = READ_ONCE(b->flags);
do {
old = new = v;
@ -1626,7 +1610,8 @@ do_write:
if ((old & (1U << BTREE_NODE_dirty)) &&
(old & (1U << BTREE_NODE_need_write)) &&
!(old & (1U << BTREE_NODE_never_write)) &&
btree_node_may_write(b)) {
!(old & (1U << BTREE_NODE_write_blocked)) &&
!(old & (1U << BTREE_NODE_will_make_reachable))) {
new &= ~(1U << BTREE_NODE_dirty);
new &= ~(1U << BTREE_NODE_need_write);
new |= (1U << BTREE_NODE_write_in_flight);
@ -1640,8 +1625,13 @@ do_write:
} while ((v = cmpxchg(&b->flags, old, new)) != old);
if (new & (1U << BTREE_NODE_write_in_flight))
__bch2_btree_node_write(c, b, true);
__bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED);
}
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
{
six_lock_read(&b->c.lock, NULL, NULL);
__btree_node_write_done(c, b);
six_unlock_read(&b->c.lock);
}
@ -1756,7 +1746,7 @@ static void btree_write_submit(struct work_struct *work)
bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &tmp.k);
}
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_started)
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
{
struct btree_write_bio *wbio;
struct bset_tree *t;
@ -1773,12 +1763,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_sta
void *data;
int ret;
if (already_started)
if (flags & BTREE_WRITE_ALREADY_STARTED)
goto do_write;
if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
return;
/*
* We may only have a read lock on the btree node - the dirty bit is our
* "lock" against racing with other threads that may be trying to start
@ -1792,13 +1779,21 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_sta
if (!(old & (1 << BTREE_NODE_dirty)))
return;
if (!btree_node_may_write(b))
if ((flags & BTREE_WRITE_ONLY_IF_NEED) &&
!(old & (1 << BTREE_NODE_need_write)))
return;
if (old & (1 << BTREE_NODE_never_write))
if (old &
((1 << BTREE_NODE_never_write)|
(1 << BTREE_NODE_write_blocked)))
return;
BUG_ON(old & (1 << BTREE_NODE_write_in_flight));
if (b->written &&
(old & (1 << BTREE_NODE_will_make_reachable)))
return;
if (old & (1 << BTREE_NODE_write_in_flight))
return;
new &= ~(1 << BTREE_NODE_dirty);
new &= ~(1 << BTREE_NODE_need_write);
@ -1998,7 +1993,7 @@ err:
b->written += sectors_to_write;
nowrite:
btree_bounce_free(c, bytes, used_mempool, data);
btree_node_write_done(c, b);
__btree_node_write_done(c, b);
}
/*
@ -2061,12 +2056,13 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
* Use this one if the node is intent locked:
*/
void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
enum six_lock_type lock_type_held)
enum six_lock_type lock_type_held,
unsigned flags)
{
if (lock_type_held == SIX_LOCK_intent ||
(lock_type_held == SIX_LOCK_read &&
six_lock_tryupgrade(&b->c.lock))) {
__bch2_btree_node_write(c, b, false);
__bch2_btree_node_write(c, b, flags);
/* don't cycle lock unnecessarily: */
if (btree_node_just_written(b) &&
@ -2078,7 +2074,7 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
if (lock_type_held == SIX_LOCK_read)
six_lock_downgrade(&b->c.lock);
} else {
__bch2_btree_node_write(c, b, false);
__bch2_btree_node_write(c, b, flags);
if (lock_type_held == SIX_LOCK_write &&
btree_node_just_written(b))
bch2_btree_post_write_cleanup(c, b);
@ -2112,30 +2108,3 @@ void bch2_btree_flush_all_writes(struct bch_fs *c)
{
__bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
}
void bch2_dirty_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c)
{
struct bucket_table *tbl;
struct rhash_head *pos;
struct btree *b;
unsigned i;
rcu_read_lock();
for_each_cached_btree(b, c, tbl, i, pos) {
unsigned long flags = READ_ONCE(b->flags);
if (!(flags & (1 << BTREE_NODE_dirty)))
continue;
pr_buf(out, "%p d %u n %u l %u w %u b %u r %u:%lu\n",
b,
(flags & (1 << BTREE_NODE_dirty)) != 0,
(flags & (1 << BTREE_NODE_need_write)) != 0,
b->c.level,
b->written,
!list_empty_careful(&b->write_blocked),
b->will_make_reachable != 0,
b->will_make_reachable & 1);
}
rcu_read_unlock();
}

View File

@ -15,18 +15,13 @@ struct btree;
struct btree_iter;
struct btree_node_read_all;
static inline bool btree_node_dirty(struct btree *b)
{
return test_bit(BTREE_NODE_dirty, &b->flags);
}
static inline void set_btree_node_dirty(struct bch_fs *c, struct btree *b)
static inline void set_btree_node_dirty_acct(struct bch_fs *c, struct btree *b)
{
if (!test_and_set_bit(BTREE_NODE_dirty, &b->flags))
atomic_inc(&c->btree_cache.dirty);
}
static inline void clear_btree_node_dirty(struct bch_fs *c, struct btree *b)
static inline void clear_btree_node_dirty_acct(struct bch_fs *c, struct btree *b)
{
if (test_and_clear_bit(BTREE_NODE_dirty, &b->flags))
atomic_dec(&c->btree_cache.dirty);
@ -67,12 +62,6 @@ void __bch2_btree_node_wait_on_write(struct btree *);
void bch2_btree_node_wait_on_read(struct btree *);
void bch2_btree_node_wait_on_write(struct btree *);
static inline bool btree_node_may_write(struct btree *b)
{
return list_empty_careful(&b->write_blocked) &&
(!b->written || !b->will_make_reachable);
}
enum compact_mode {
COMPACT_LAZY,
COMPACT_ALL,
@ -148,41 +137,23 @@ int bch2_btree_root_read(struct bch_fs *, enum btree_id,
void bch2_btree_complete_write(struct bch_fs *, struct btree *,
struct btree_write *);
void __bch2_btree_node_write(struct bch_fs *, struct btree *, bool);
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
#define BTREE_WRITE_ONLY_IF_NEED (1U << 0)
#define BTREE_WRITE_ALREADY_STARTED (1U << 1)
void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned);
void bch2_btree_node_write(struct bch_fs *, struct btree *,
enum six_lock_type);
enum six_lock_type, unsigned);
static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
enum six_lock_type lock_held)
{
if (b->written &&
btree_node_need_write(b) &&
btree_node_may_write(b) &&
!btree_node_write_in_flight(b))
bch2_btree_node_write(c, b, lock_held);
bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
}
#define bch2_btree_node_write_cond(_c, _b, cond) \
do { \
unsigned long old, new, v = READ_ONCE((_b)->flags); \
\
do { \
old = new = v; \
\
if (!(old & (1 << BTREE_NODE_dirty)) || !(cond)) \
break; \
\
new |= (1 << BTREE_NODE_need_write); \
} while ((v = cmpxchg(&(_b)->flags, old, new)) != old); \
\
btree_node_write_if_need(_c, _b, SIX_LOCK_read); \
} while (0)
void bch2_btree_flush_all_reads(struct bch_fs *);
void bch2_btree_flush_all_writes(struct bch_fs *);
void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *);
static inline void compat_bformat(unsigned level, enum btree_id btree_id,
unsigned version, unsigned big_endian,

View File

@ -558,7 +558,12 @@ void bch2_trans_unlock(struct btree_trans *trans)
trans_for_each_path(trans, path)
__bch2_btree_path_unlock(path);
BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
/*
* bch2_gc_btree_init_recurse() doesn't use btree iterators for walking
* btree nodes, it implements its own walking:
*/
BUG_ON(!trans->is_initial_gc &&
lock_class_is_held(&bch2_btree_node_lock_key));
}
/* Btree iterator: */

View File

@ -392,6 +392,7 @@ struct btree_trans {
bool restarted:1;
bool memory_allocation_failure:1;
bool journal_transaction_names:1;
bool is_initial_gc:1;
/*
* For when bch2_trans_update notices we'll be splitting a compressed
* extent:
@ -424,7 +425,31 @@ struct btree_trans {
struct replicas_delta_list *fs_usage_deltas;
};
#define BTREE_FLAG(flag) \
#define BTREE_FLAGS() \
x(read_in_flight) \
x(read_error) \
x(dirty) \
x(need_write) \
x(write_blocked) \
x(will_make_reachable) \
x(noevict) \
x(write_idx) \
x(accessed) \
x(write_in_flight) \
x(write_in_flight_inner) \
x(just_written) \
x(dying) \
x(fake) \
x(need_rewrite) \
x(never_write)
enum btree_flags {
#define x(flag) BTREE_NODE_##flag,
BTREE_FLAGS()
#undef x
};
#define x(flag) \
static inline bool btree_node_ ## flag(struct btree *b) \
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
\
@ -434,36 +459,8 @@ static inline void set_btree_node_ ## flag(struct btree *b) \
static inline void clear_btree_node_ ## flag(struct btree *b) \
{ clear_bit(BTREE_NODE_ ## flag, &b->flags); }
enum btree_flags {
BTREE_NODE_read_in_flight,
BTREE_NODE_read_error,
BTREE_NODE_dirty,
BTREE_NODE_need_write,
BTREE_NODE_noevict,
BTREE_NODE_write_idx,
BTREE_NODE_accessed,
BTREE_NODE_write_in_flight,
BTREE_NODE_write_in_flight_inner,
BTREE_NODE_just_written,
BTREE_NODE_dying,
BTREE_NODE_fake,
BTREE_NODE_need_rewrite,
BTREE_NODE_never_write,
};
BTREE_FLAG(read_in_flight);
BTREE_FLAG(read_error);
BTREE_FLAG(need_write);
BTREE_FLAG(noevict);
BTREE_FLAG(write_idx);
BTREE_FLAG(accessed);
BTREE_FLAG(write_in_flight);
BTREE_FLAG(write_in_flight_inner);
BTREE_FLAG(just_written);
BTREE_FLAG(dying);
BTREE_FLAG(fake);
BTREE_FLAG(need_rewrite);
BTREE_FLAG(never_write);
BTREE_FLAGS()
#undef x
static inline struct btree_write *btree_current_write(struct btree *b)
{

View File

@ -271,7 +271,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
six_lock_write(&b->c.lock, NULL, NULL);
set_btree_node_accessed(b);
set_btree_node_dirty(c, b);
set_btree_node_dirty_acct(c, b);
set_btree_node_need_write(b);
bch2_bset_init_first(b, &b->data->keys);
@ -619,6 +619,8 @@ err:
mutex_lock(&c->btree_interior_update_lock);
list_del(&as->write_blocked_list);
if (list_empty(&b->write_blocked))
clear_btree_node_write_blocked(b);
/*
* Node might have been freed, recheck under
@ -663,6 +665,7 @@ err:
BUG_ON(b->will_make_reachable != (unsigned long) as);
b->will_make_reachable = 0;
clear_btree_node_will_make_reachable(b);
}
mutex_unlock(&c->btree_interior_update_lock);
@ -729,6 +732,8 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
as->mode = BTREE_INTERIOR_UPDATING_NODE;
as->b = b;
set_btree_node_write_blocked(b);
list_add(&as->write_blocked_list, &b->write_blocked);
mutex_unlock(&c->btree_interior_update_lock);
@ -794,6 +799,7 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
as->new_nodes[as->nr_new_nodes++] = b;
b->will_make_reachable = 1UL|(unsigned long) as;
set_btree_node_will_make_reachable(b);
mutex_unlock(&c->btree_interior_update_lock);
@ -816,6 +822,7 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
* xchg() is for synchronization with bch2_btree_complete_write:
*/
v = xchg(&b->will_make_reachable, 0);
clear_btree_node_will_make_reachable(b);
as = (struct btree_update *) (v & ~1UL);
if (!as) {
@ -881,7 +888,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
closure_wake_up(&c->btree_interior_update_wait);
}
clear_btree_node_dirty(c, b);
clear_btree_node_dirty_acct(c, b);
clear_btree_node_need_write(b);
/*
@ -1096,8 +1103,7 @@ static void bch2_btree_set_root(struct btree_update *as,
struct btree *old;
trace_btree_set_root(c, b);
BUG_ON(!b->written &&
!test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags));
BUG_ON(!b->written);
old = btree_node_root(c, b);
@ -1165,7 +1171,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
bch2_btree_node_iter_advance(node_iter, b);
bch2_btree_bset_insert_key(trans, path, b, node_iter, insert);
set_btree_node_dirty(c, b);
set_btree_node_dirty_acct(c, b);
set_btree_node_need_write(b);
}
@ -1386,8 +1392,8 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
six_unlock_write(&n2->c.lock);
six_unlock_write(&n1->c.lock);
bch2_btree_node_write(c, n1, SIX_LOCK_intent);
bch2_btree_node_write(c, n2, SIX_LOCK_intent);
bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
/*
* Note that on recursive parent_keys == keys, so we
@ -1406,7 +1412,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
bch2_btree_node_write(c, n3, SIX_LOCK_intent);
bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
}
} else {
trace_btree_compact(c, b);
@ -1414,7 +1420,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
bch2_btree_build_aux_trees(n1);
six_unlock_write(&n1->c.lock);
bch2_btree_node_write(c, n1, SIX_LOCK_intent);
bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
if (parent)
bch2_keylist_add(&as->parent_keys, &n1->key);
@ -1702,7 +1708,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_btree_build_aux_trees(n);
six_unlock_write(&n->c.lock);
bch2_btree_node_write(c, n, SIX_LOCK_intent);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bkey_init(&delete.k);
delete.k.p = prev->key.k.p;
@ -1776,7 +1782,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
trace_btree_gc_rewrite_node(c, b);
bch2_btree_node_write(c, n, SIX_LOCK_intent);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
if (parent) {
bch2_keylist_add(&as->parent_keys, &n->key);

View File

@ -167,10 +167,24 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct btree_write *w = container_of(pin, struct btree_write, journal);
struct btree *b = container_of(w, struct btree, writes[i]);
unsigned long old, new, v;
unsigned idx = w - b->writes;
six_lock_read(&b->c.lock, NULL, NULL);
bch2_btree_node_write_cond(c, b,
(btree_current_write(b) == w && w->journal.seq == seq));
v = READ_ONCE(b->flags);
do {
old = new = v;
if (!(old & (1 << BTREE_NODE_dirty)) ||
!!(old & (1 << BTREE_NODE_write_idx)) != idx ||
w->journal.seq != seq)
break;
new |= 1 << BTREE_NODE_need_write;
} while ((v = cmpxchg(&b->flags, old, new)) != old);
btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
return 0;
}
@ -220,7 +234,7 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
bch2_btree_add_journal_pin(c, b, trans->journal_res.seq);
if (unlikely(!btree_node_dirty(b)))
set_btree_node_dirty(c, b);
set_btree_node_dirty_acct(c, b);
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
u64s_added = (int) bset_u64s(t) - old_u64s;
@ -367,7 +381,13 @@ btree_key_can_insert_cached(struct btree_trans *trans,
ck->u64s = new_u64s;
ck->k = new_k;
return BTREE_INSERT_OK;
/*
* Keys returned by peek() are no longer valid pointers, so we need a
* transaction restart:
*/
trace_trans_restart_key_cache_key_realloced(trans->fn, _RET_IP_,
path->btree_id, &path->pos);
return btree_trans_restart(trans);
}
static inline void do_btree_insert_one(struct btree_trans *trans,

View File

@ -185,9 +185,10 @@ out:
/* XXX: bch_fs refcounting */
struct dump_iter {
struct bpos from;
struct bch_fs *c;
struct bch_fs *c;
enum btree_id id;
struct bpos from;
u64 iter;
struct printbuf buf;
@ -226,6 +227,7 @@ static int bch2_dump_open(struct inode *inode, struct file *file)
file->private_data = i;
i->from = POS_MIN;
i->iter = 0;
i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]);
i->id = bd->id;
i->buf = PRINTBUF;
@ -420,10 +422,148 @@ static const struct file_operations bfloat_failed_debug_ops = {
.read = bch2_read_bfloat_failed,
};
static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
struct btree *b)
{
out->tabstops[0] = 32;
pr_buf(out, "%px btree=%s l=%u ",
b,
bch2_btree_ids[b->c.btree_id],
b->c.level);
pr_newline(out);
pr_indent_push(out, 2);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
pr_newline(out);
pr_buf(out, "flags: ");
pr_tab(out);
bch2_flags_to_text(out, bch2_btree_node_flags, b->flags);
pr_newline(out);
pr_buf(out, "written:");
pr_tab(out);
pr_buf(out, "%u", b->written);
pr_newline(out);
pr_buf(out, "writes blocked:");
pr_tab(out);
pr_buf(out, "%u", !list_empty_careful(&b->write_blocked));
pr_newline(out);
pr_buf(out, "will make reachable:");
pr_tab(out);
pr_buf(out, "%lx", b->will_make_reachable);
pr_newline(out);
pr_buf(out, "journal pin %px:", &b->writes[0].journal);
pr_tab(out);
pr_buf(out, "%llu", b->writes[0].journal.seq);
pr_newline(out);
pr_buf(out, "journal pin %px:", &b->writes[1].journal);
pr_tab(out);
pr_buf(out, "%llu", b->writes[1].journal.seq);
pr_newline(out);
pr_indent_pop(out, 2);
}
static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iter *i = file->private_data;
struct bch_fs *c = i->c;
bool done = false;
int err;
i->ubuf = buf;
i->size = size;
i->ret = 0;
do {
struct bucket_table *tbl;
struct rhash_head *pos;
struct btree *b;
err = flush_buf(i);
if (err)
return err;
if (!i->size)
break;
rcu_read_lock();
i->buf.atomic++;
tbl = rht_dereference_rcu(c->btree_cache.table.tbl,
&c->btree_cache.table);
if (i->iter < tbl->size) {
rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash)
bch2_cached_btree_node_to_text(&i->buf, c, b);
i->iter++;;
} else {
done = true;
}
--i->buf.atomic;
rcu_read_unlock();
} while (!done);
if (i->buf.allocation_failure)
return -ENOMEM;
return i->ret;
}
static const struct file_operations cached_btree_nodes_ops = {
.owner = THIS_MODULE,
.open = bch2_dump_open,
.release = bch2_dump_release,
.read = bch2_cached_btree_nodes_read,
};
static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iter *i = file->private_data;
struct bch_fs *c = i->c;
bool done = false;
int err;
i->ubuf = buf;
i->size = size;
i->ret = 0;
do {
err = flush_buf(i);
if (err)
return err;
if (!i->size)
break;
done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter);
i->iter++;
} while (!done);
if (i->buf.allocation_failure)
return -ENOMEM;
return i->ret;
}
static const struct file_operations journal_pins_ops = {
.owner = THIS_MODULE,
.open = bch2_dump_open,
.release = bch2_dump_release,
.read = bch2_journal_pins_read,
};
void bch2_fs_debug_exit(struct bch_fs *c)
{
if (!IS_ERR_OR_NULL(c->debug))
debugfs_remove_recursive(c->debug);
if (!IS_ERR_OR_NULL(c->fs_debug_dir))
debugfs_remove_recursive(c->fs_debug_dir);
}
void bch2_fs_debug_init(struct bch_fs *c)
@ -435,29 +575,39 @@ void bch2_fs_debug_init(struct bch_fs *c)
return;
snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
c->debug = debugfs_create_dir(name, bch_debug);
if (IS_ERR_OR_NULL(c->debug))
c->fs_debug_dir = debugfs_create_dir(name, bch_debug);
if (IS_ERR_OR_NULL(c->fs_debug_dir))
return;
debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir,
c->btree_debug, &cached_btree_nodes_ops);
debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
c->btree_debug, &journal_pins_ops);
c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
if (IS_ERR_OR_NULL(c->btree_debug_dir))
return;
for (bd = c->btree_debug;
bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
bd++) {
bd->id = bd - c->btree_debug;
bd->btree = debugfs_create_file(bch2_btree_ids[bd->id],
0400, c->debug, bd,
&btree_debug_ops);
debugfs_create_file(bch2_btree_ids[bd->id],
0400, c->btree_debug_dir, bd,
&btree_debug_ops);
snprintf(name, sizeof(name), "%s-formats",
bch2_btree_ids[bd->id]);
bd->btree_format = debugfs_create_file(name, 0400, c->debug, bd,
&btree_format_debug_ops);
debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
&btree_format_debug_ops);
snprintf(name, sizeof(name), "%s-bfloat-failed",
bch2_btree_ids[bd->id]);
bd->failed = debugfs_create_file(name, 0400, c->debug, bd,
&bfloat_failed_debug_ops);
debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
&bfloat_failed_debug_ops);
}
}

View File

@ -2041,7 +2041,14 @@ retry_pick:
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
if (!pick.ptr.cached &&
/*
* Stale dirty pointers are treated as IO errors, but @failed isn't
* allocated unless we're in the retry path - so if we're not in the
* retry path, don't check here, it'll be caught in bch2_read_endio()
* and we'll end up in the retry path:
*/
if ((flags & BCH_READ_IN_RETRY) &&
!pick.ptr.cached &&
unlikely(ptr_stale(ca, &pick.ptr))) {
read_from_stale_dirty_pointer(trans, k, pick.ptr);
bch2_mark_io_failure(failed, &pick);

View File

@ -1281,35 +1281,59 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
spin_unlock(&j->lock);
}
void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *pin;
u64 i;
spin_lock(&j->lock);
*seq = max(*seq, j->pin.front);
if (*seq >= j->pin.back) {
spin_unlock(&j->lock);
return true;
}
out->atomic++;
fifo_for_each_entry_ptr(pin_list, &j->pin, i) {
pr_buf(out, "%llu: count %u\n",
i, atomic_read(&pin_list->count));
pin_list = journal_seq_pin(j, *seq);
list_for_each_entry(pin, &pin_list->key_cache_list, list)
pr_buf(out, "\t%px %ps\n",
pin, pin->flush);
pr_buf(out, "%llu: count %u", *seq, atomic_read(&pin_list->count));
pr_newline(out);
pr_indent_push(out, 2);
list_for_each_entry(pin, &pin_list->list, list)
pr_buf(out, "\t%px %ps\n",
pin, pin->flush);
if (!list_empty(&pin_list->flushed))
pr_buf(out, "flushed:\n");
list_for_each_entry(pin, &pin_list->flushed, list)
pr_buf(out, "\t%px %ps\n",
pin, pin->flush);
list_for_each_entry(pin, &pin_list->list, list) {
pr_buf(out, "\t%px %ps", pin, pin->flush);
pr_newline(out);
}
list_for_each_entry(pin, &pin_list->key_cache_list, list) {
pr_buf(out, "\t%px %ps", pin, pin->flush);
pr_newline(out);
}
if (!list_empty(&pin_list->flushed)) {
pr_buf(out, "flushed:");
pr_newline(out);
}
list_for_each_entry(pin, &pin_list->flushed, list) {
pr_buf(out, "\t%px %ps", pin, pin->flush);
pr_newline(out);
}
pr_indent_pop(out, 2);
--out->atomic;
spin_unlock(&j->lock);
return false;
}
void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
{
u64 seq = 0;
while (!bch2_journal_seq_pins_to_text(out, j, &seq))
seq++;
}

View File

@ -501,6 +501,7 @@ void bch2_journal_block(struct journal *);
void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
unsigned nr);

View File

@ -1420,24 +1420,25 @@ static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
};
static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f,
struct printbuf *orig_err)
struct printbuf *err)
{
unsigned type = le32_to_cpu(f->type);
struct printbuf err = *orig_err;
struct printbuf field_err = PRINTBUF;
int ret;
if (type >= BCH_SB_FIELD_NR)
return 0;
pr_buf(&err, "Invalid superblock section %s: ", bch2_sb_fields[type]);
ret = bch2_sb_field_ops[type]->validate(sb, f, &err);
ret = bch2_sb_field_ops[type]->validate(sb, f, &field_err);
if (ret) {
pr_newline(&err);
bch2_sb_field_to_text(&err, sb, f);
*orig_err = err;
pr_buf(err, "Invalid superblock section %s: %s",
bch2_sb_fields[type],
field_err.buf);
pr_newline(err);
bch2_sb_field_to_text(err, sb, f);
}
printbuf_exit(&field_err);
return ret;
}

View File

@ -174,9 +174,7 @@ read_attribute(reserve_stats);
read_attribute(btree_cache_size);
read_attribute(compression_stats);
read_attribute(journal_debug);
read_attribute(journal_pins);
read_attribute(btree_updates);
read_attribute(dirty_btree_nodes);
read_attribute(btree_cache);
read_attribute(btree_key_cache);
read_attribute(btree_transactions);
@ -402,15 +400,9 @@ SHOW(bch2_fs)
if (attr == &sysfs_journal_debug)
bch2_journal_debug_to_text(out, &c->journal);
if (attr == &sysfs_journal_pins)
bch2_journal_pins_to_text(out, &c->journal);
if (attr == &sysfs_btree_updates)
bch2_btree_updates_to_text(out, c);
if (attr == &sysfs_dirty_btree_nodes)
bch2_dirty_btree_nodes_to_text(out, c);
if (attr == &sysfs_btree_cache)
bch2_btree_cache_to_text(out, c);
@ -564,9 +556,7 @@ SYSFS_OPS(bch2_fs_internal);
struct attribute *bch2_fs_internal_files[] = {
&sysfs_journal_debug,
&sysfs_journal_pins,
&sysfs_btree_updates,
&sysfs_dirty_btree_nodes,
&sysfs_btree_cache,
&sysfs_btree_key_cache,
&sysfs_btree_transactions,

View File

@ -300,6 +300,10 @@ static inline void pr_indent_push(struct printbuf *buf, unsigned spaces)
static inline void pr_indent_pop(struct printbuf *buf, unsigned spaces)
{
if (buf->last_newline + buf->indent == buf->pos) {
buf->pos -= spaces;
buf->buf[buf->pos] = 0;
}
buf->indent -= spaces;
}