mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 176718966e bcachefs: Re-enable hash_redo_key()
This commit is contained in:
parent
d2c2c5954c
commit
d01f633041
@ -1 +1 @@
|
||||
a7694865a3008d6752370caee2ed3c64c1b0f973
|
||||
176718966e14c5f832ead8cea2e0e45aba51f5ef
|
||||
|
@ -10,6 +10,7 @@
|
||||
#define list_add(n, h) cds_list_add(n, h)
|
||||
#define list_add_tail(n, h) cds_list_add_tail(n, h)
|
||||
#define __list_del_entry(l) cds_list_del(l)
|
||||
#define __list_del(p, n) __cds_list_del(p, n)
|
||||
#define list_del(l) cds_list_del(l)
|
||||
#define list_del_init(l) cds_list_del_init(l)
|
||||
#define list_replace(o, n) cds_list_replace(o, n)
|
||||
|
@ -59,7 +59,6 @@
|
||||
*/
|
||||
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/osq_lock.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
@ -105,18 +104,23 @@ enum six_lock_type {
|
||||
|
||||
struct six_lock {
|
||||
union six_lock_state state;
|
||||
unsigned intent_lock_recurse;
|
||||
struct task_struct *owner;
|
||||
struct optimistic_spin_queue osq;
|
||||
unsigned __percpu *readers;
|
||||
|
||||
unsigned intent_lock_recurse;
|
||||
raw_spinlock_t wait_lock;
|
||||
struct list_head wait_list[2];
|
||||
struct list_head wait_list;
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
struct lockdep_map dep_map;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct six_lock_waiter {
|
||||
struct list_head list;
|
||||
struct task_struct *task;
|
||||
enum six_lock_type lock_want;
|
||||
bool lock_acquired;
|
||||
};
|
||||
|
||||
typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
|
||||
|
||||
static __always_inline void __six_lock_init(struct six_lock *lock,
|
||||
@ -125,8 +129,7 @@ static __always_inline void __six_lock_init(struct six_lock *lock,
|
||||
{
|
||||
atomic64_set(&lock->state.counter, 0);
|
||||
raw_spin_lock_init(&lock->wait_lock);
|
||||
INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_read]);
|
||||
INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_intent]);
|
||||
INIT_LIST_HEAD(&lock->wait_list);
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
debug_check_no_locks_freed((void *) lock, sizeof(*lock));
|
||||
lockdep_init_map(&lock->dep_map, name, key, 0);
|
||||
@ -146,6 +149,8 @@ do { \
|
||||
bool six_trylock_##type(struct six_lock *); \
|
||||
bool six_relock_##type(struct six_lock *, u32); \
|
||||
int six_lock_##type(struct six_lock *, six_lock_should_sleep_fn, void *);\
|
||||
int six_lock_waiter_##type(struct six_lock *, struct six_lock_waiter *, \
|
||||
six_lock_should_sleep_fn, void *); \
|
||||
void six_unlock_##type(struct six_lock *);
|
||||
|
||||
__SIX_LOCK(read)
|
||||
@ -182,6 +187,13 @@ static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
SIX_LOCK_DISPATCH(type, six_lock, lock, should_sleep_fn, p);
|
||||
}
|
||||
|
||||
static inline int six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
{
|
||||
SIX_LOCK_DISPATCH(type, six_lock_waiter, lock, wait, should_sleep_fn, p);
|
||||
}
|
||||
|
||||
static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
SIX_LOCK_DISPATCH(type, six_unlock, lock);
|
||||
@ -196,13 +208,11 @@ void six_lock_increment(struct six_lock *, enum six_lock_type);
|
||||
|
||||
void six_lock_wakeup_all(struct six_lock *);
|
||||
|
||||
void six_lock_pcpu_free_rcu(struct six_lock *);
|
||||
void six_lock_pcpu_free(struct six_lock *);
|
||||
void six_lock_pcpu_alloc(struct six_lock *);
|
||||
|
||||
struct six_lock_count {
|
||||
unsigned read;
|
||||
unsigned intent;
|
||||
unsigned n[3];
|
||||
};
|
||||
|
||||
struct six_lock_count six_lock_counts(struct six_lock *);
|
||||
|
@ -18,7 +18,7 @@
|
||||
__entry->dst##_snapshot = (src).snapshot
|
||||
|
||||
DECLARE_EVENT_CLASS(bpos,
|
||||
TP_PROTO(struct bpos *p),
|
||||
TP_PROTO(const struct bpos *p),
|
||||
TP_ARGS(p),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
@ -52,6 +52,31 @@ DECLARE_EVENT_CLASS(bkey,
|
||||
__entry->offset, __entry->size)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(btree_node,
|
||||
TP_PROTO(struct bch_fs *c, struct btree *b),
|
||||
TP_ARGS(c, b),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
__field(u8, level )
|
||||
__field(u8, btree_id )
|
||||
TRACE_BPOS_entries(pos)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = c->dev;
|
||||
__entry->level = b->c.level;
|
||||
__entry->btree_id = b->c.btree_id;
|
||||
TRACE_BPOS_assign(pos, b->key.k.p);
|
||||
),
|
||||
|
||||
TP_printk("%d,%d %u %s %llu:%llu:%u",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->level,
|
||||
bch2_btree_ids[__entry->btree_id],
|
||||
__entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(bch_fs,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c),
|
||||
@ -112,7 +137,7 @@ TRACE_EVENT(write_super,
|
||||
|
||||
/* io.c: */
|
||||
|
||||
DEFINE_EVENT(bio, read_split,
|
||||
DEFINE_EVENT(bio, read_promote,
|
||||
TP_PROTO(struct bio *bio),
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
@ -122,12 +147,17 @@ DEFINE_EVENT(bio, read_bounce,
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bio, read_split,
|
||||
TP_PROTO(struct bio *bio),
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bio, read_retry,
|
||||
TP_PROTO(struct bio *bio),
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bio, promote,
|
||||
DEFINE_EVENT(bio, read_reuse_race,
|
||||
TP_PROTO(struct bio *bio),
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
@ -220,48 +250,68 @@ TRACE_EVENT(journal_reclaim_finish,
|
||||
__entry->nr_flushed)
|
||||
);
|
||||
|
||||
/* allocator: */
|
||||
|
||||
/* bset.c: */
|
||||
|
||||
DEFINE_EVENT(bpos, bkey_pack_pos_fail,
|
||||
TP_PROTO(struct bpos *p),
|
||||
TP_PROTO(const struct bpos *p),
|
||||
TP_ARGS(p)
|
||||
);
|
||||
|
||||
/* Btree */
|
||||
/* Btree cache: */
|
||||
|
||||
DECLARE_EVENT_CLASS(btree_node,
|
||||
TP_PROTO(struct bch_fs *c, struct btree *b),
|
||||
TP_ARGS(c, b),
|
||||
TRACE_EVENT(btree_cache_scan,
|
||||
TP_PROTO(long nr_to_scan, long can_free, long ret),
|
||||
TP_ARGS(nr_to_scan, can_free, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
__field(u8, level )
|
||||
__field(u8, btree_id )
|
||||
TRACE_BPOS_entries(pos)
|
||||
__field(long, nr_to_scan )
|
||||
__field(long, can_free )
|
||||
__field(long, ret )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = c->dev;
|
||||
__entry->level = b->c.level;
|
||||
__entry->btree_id = b->c.btree_id;
|
||||
TRACE_BPOS_assign(pos, b->key.k.p);
|
||||
__entry->nr_to_scan = nr_to_scan;
|
||||
__entry->can_free = can_free;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk("%d,%d %u %s %llu:%llu:%u",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->level,
|
||||
bch2_btree_ids[__entry->btree_id],
|
||||
__entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot)
|
||||
TP_printk("scanned for %li nodes, can free %li, ret %li",
|
||||
__entry->nr_to_scan, __entry->can_free, __entry->ret)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node, btree_read,
|
||||
DEFINE_EVENT(btree_node, btree_cache_reap,
|
||||
TP_PROTO(struct bch_fs *c, struct btree *b),
|
||||
TP_ARGS(c, b)
|
||||
);
|
||||
|
||||
TRACE_EVENT(btree_write,
|
||||
DEFINE_EVENT(bch_fs, btree_cache_cannibalize_lock_fail,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bch_fs, btree_cache_cannibalize_lock,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bch_fs, btree_cache_cannibalize,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bch_fs, btree_cache_cannibalize_unlock,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
/* Btree */
|
||||
|
||||
DEFINE_EVENT(btree_node, btree_node_read,
|
||||
TP_PROTO(struct bch_fs *c, struct btree *b),
|
||||
TP_ARGS(c, b)
|
||||
);
|
||||
|
||||
TRACE_EVENT(btree_node_write,
|
||||
TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors),
|
||||
TP_ARGS(b, bytes, sectors),
|
||||
|
||||
@ -291,31 +341,6 @@ DEFINE_EVENT(btree_node, btree_node_free,
|
||||
TP_ARGS(c, b)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node, btree_node_reap,
|
||||
TP_PROTO(struct bch_fs *c, struct btree *b),
|
||||
TP_ARGS(c, b)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bch_fs, btree_node_cannibalize_lock_fail,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bch_fs, btree_node_cannibalize_lock,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bch_fs, btree_node_cannibalize,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bch_fs, btree_node_cannibalize_unlock,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
TRACE_EVENT(btree_reserve_get_fail,
|
||||
TP_PROTO(const char *trans_fn,
|
||||
unsigned long caller_ip,
|
||||
@ -323,7 +348,7 @@ TRACE_EVENT(btree_reserve_get_fail,
|
||||
TP_ARGS(trans_fn, caller_ip, required),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 24 )
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(size_t, required )
|
||||
),
|
||||
@ -340,52 +365,32 @@ TRACE_EVENT(btree_reserve_get_fail,
|
||||
__entry->required)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node, btree_split,
|
||||
DEFINE_EVENT(btree_node, btree_node_compact,
|
||||
TP_PROTO(struct bch_fs *c, struct btree *b),
|
||||
TP_ARGS(c, b)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node, btree_compact,
|
||||
DEFINE_EVENT(btree_node, btree_node_merge,
|
||||
TP_PROTO(struct bch_fs *c, struct btree *b),
|
||||
TP_ARGS(c, b)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node, btree_merge,
|
||||
DEFINE_EVENT(btree_node, btree_node_split,
|
||||
TP_PROTO(struct bch_fs *c, struct btree *b),
|
||||
TP_ARGS(c, b)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node, btree_rewrite,
|
||||
DEFINE_EVENT(btree_node, btree_node_rewrite,
|
||||
TP_PROTO(struct bch_fs *c, struct btree *b),
|
||||
TP_ARGS(c, b)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node, btree_set_root,
|
||||
DEFINE_EVENT(btree_node, btree_node_set_root,
|
||||
TP_PROTO(struct bch_fs *c, struct btree *b),
|
||||
TP_ARGS(c, b)
|
||||
);
|
||||
|
||||
TRACE_EVENT(btree_cache_scan,
|
||||
TP_PROTO(long nr_to_scan, long can_free, long ret),
|
||||
TP_ARGS(nr_to_scan, can_free, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(long, nr_to_scan )
|
||||
__field(long, can_free )
|
||||
__field(long, ret )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->nr_to_scan = nr_to_scan;
|
||||
__entry->can_free = can_free;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk("scanned for %li nodes, can free %li, ret %li",
|
||||
__entry->nr_to_scan, __entry->can_free, __entry->ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(btree_node_relock_fail,
|
||||
TRACE_EVENT(btree_path_relock_fail,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip,
|
||||
struct btree_path *path,
|
||||
@ -393,26 +398,31 @@ TRACE_EVENT(btree_node_relock_fail,
|
||||
TP_ARGS(trans, caller_ip, path, level),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 24 )
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(u8, btree_id )
|
||||
TRACE_BPOS_entries(pos)
|
||||
__field(unsigned long, node )
|
||||
__array(char, node, 24 )
|
||||
__field(u32, iter_lock_seq )
|
||||
__field(u32, node_lock_seq )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
struct btree *b = btree_path_node(path, level);
|
||||
|
||||
strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
|
||||
__entry->caller_ip = caller_ip;
|
||||
__entry->btree_id = path->btree_id;
|
||||
TRACE_BPOS_assign(pos, path->pos);
|
||||
__entry->node = (unsigned long) btree_path_node(path, level);
|
||||
if (IS_ERR(b))
|
||||
strscpy(__entry->node, bch2_err_str(PTR_ERR(b)), sizeof(__entry->node));
|
||||
else
|
||||
scnprintf(__entry->node, sizeof(__entry->node), "%px", b);
|
||||
__entry->iter_lock_seq = path->l[level].lock_seq;
|
||||
__entry->node_lock_seq = is_btree_node(path, level) ? path->l[level].b->c.lock.state.seq : 0;
|
||||
),
|
||||
|
||||
TP_printk("%s %pS btree %s pos %llu:%llu:%u, node %lu iter seq %u lock seq %u",
|
||||
TP_printk("%s %pS btree %s pos %llu:%llu:%u, node %s iter seq %u lock seq %u",
|
||||
__entry->trans_fn,
|
||||
(void *) __entry->caller_ip,
|
||||
bch2_btree_ids[__entry->btree_id],
|
||||
@ -424,7 +434,7 @@ TRACE_EVENT(btree_node_relock_fail,
|
||||
__entry->node_lock_seq)
|
||||
);
|
||||
|
||||
TRACE_EVENT(btree_node_upgrade_fail,
|
||||
TRACE_EVENT(btree_path_upgrade_fail,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip,
|
||||
struct btree_path *path,
|
||||
@ -432,7 +442,7 @@ TRACE_EVENT(btree_node_upgrade_fail,
|
||||
TP_ARGS(trans, caller_ip, path, level),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 24 )
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(u8, btree_id )
|
||||
TRACE_BPOS_entries(pos)
|
||||
@ -452,12 +462,12 @@ TRACE_EVENT(btree_node_upgrade_fail,
|
||||
TRACE_BPOS_assign(pos, path->pos);
|
||||
__entry->locked = btree_node_locked(path, level);
|
||||
|
||||
c = bch2_btree_node_lock_counts(trans, NULL, path->l[level].b, level),
|
||||
__entry->self_read_count = c.read;
|
||||
__entry->self_intent_count = c.intent;
|
||||
c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level),
|
||||
__entry->self_read_count = c.n[SIX_LOCK_read];
|
||||
__entry->self_intent_count = c.n[SIX_LOCK_intent];
|
||||
c = six_lock_counts(&path->l[level].b->c.lock);
|
||||
__entry->read_count = c.read;
|
||||
__entry->intent_count = c.intent;
|
||||
__entry->read_count = c.n[SIX_LOCK_read];
|
||||
__entry->intent_count = c.n[SIX_LOCK_read];
|
||||
),
|
||||
|
||||
TP_printk("%s %pS btree %s pos %llu:%llu:%u, locked %u held %u:%u lock count %u:%u",
|
||||
@ -599,7 +609,7 @@ TRACE_EVENT(discard_buckets,
|
||||
__entry->err)
|
||||
);
|
||||
|
||||
TRACE_EVENT(invalidate_bucket,
|
||||
TRACE_EVENT(bucket_invalidate,
|
||||
TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors),
|
||||
TP_ARGS(c, dev, bucket, sectors),
|
||||
|
||||
@ -625,17 +635,27 @@ TRACE_EVENT(invalidate_bucket,
|
||||
|
||||
/* Moving IO */
|
||||
|
||||
DEFINE_EVENT(bkey, move_extent,
|
||||
DEFINE_EVENT(bkey, move_extent_read,
|
||||
TP_PROTO(const struct bkey *k),
|
||||
TP_ARGS(k)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bkey, move_alloc_mem_fail,
|
||||
DEFINE_EVENT(bkey, move_extent_write,
|
||||
TP_PROTO(const struct bkey *k),
|
||||
TP_ARGS(k)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bkey, move_race,
|
||||
DEFINE_EVENT(bkey, move_extent_finish,
|
||||
TP_PROTO(const struct bkey *k),
|
||||
TP_ARGS(k)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bkey, move_extent_race,
|
||||
TP_PROTO(const struct bkey *k),
|
||||
TP_ARGS(k)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bkey, move_extent_alloc_mem_fail,
|
||||
TP_PROTO(const struct bkey *k),
|
||||
TP_ARGS(k)
|
||||
);
|
||||
@ -714,13 +734,15 @@ TRACE_EVENT(copygc_wait,
|
||||
__entry->wait_amount, __entry->until)
|
||||
);
|
||||
|
||||
/* btree transactions: */
|
||||
|
||||
DECLARE_EVENT_CLASS(transaction_event,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip),
|
||||
TP_ARGS(trans, caller_ip),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 24 )
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
),
|
||||
|
||||
@ -738,7 +760,7 @@ DEFINE_EVENT(transaction_event, transaction_commit,
|
||||
TP_ARGS(trans, caller_ip)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_event, transaction_restart_injected,
|
||||
DEFINE_EVENT(transaction_event, trans_restart_injected,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip),
|
||||
TP_ARGS(trans, caller_ip)
|
||||
@ -756,10 +778,28 @@ DEFINE_EVENT(transaction_event, trans_restart_journal_res_get,
|
||||
TP_ARGS(trans, caller_ip)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_event, trans_restart_journal_preres_get,
|
||||
|
||||
TRACE_EVENT(trans_restart_journal_preres_get,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip),
|
||||
TP_ARGS(trans, caller_ip)
|
||||
unsigned long caller_ip,
|
||||
unsigned flags),
|
||||
TP_ARGS(trans, caller_ip, flags),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(unsigned, flags )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
|
||||
__entry->caller_ip = caller_ip;
|
||||
__entry->flags = flags;
|
||||
),
|
||||
|
||||
TP_printk("%s %pS %x", __entry->trans_fn,
|
||||
(void *) __entry->caller_ip,
|
||||
__entry->flags)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_event, trans_restart_journal_reclaim,
|
||||
@ -805,7 +845,7 @@ DECLARE_EVENT_CLASS(transaction_restart_iter,
|
||||
TP_ARGS(trans, caller_ip, path),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 24 )
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(u8, btree_id )
|
||||
TRACE_BPOS_entries(pos)
|
||||
@ -883,7 +923,7 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_after_fill,
|
||||
TP_ARGS(trans, caller_ip, path)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_event, transaction_restart_key_cache_upgrade,
|
||||
DEFINE_EVENT(transaction_event, trans_restart_key_cache_upgrade,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip),
|
||||
TP_ARGS(trans, caller_ip)
|
||||
@ -935,7 +975,7 @@ TRACE_EVENT(trans_restart_would_deadlock,
|
||||
have, want, want_pos),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 24 )
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(u8, in_traverse_all )
|
||||
__field(u8, reason )
|
||||
@ -982,7 +1022,7 @@ TRACE_EVENT(trans_restart_would_deadlock_write,
|
||||
TP_ARGS(trans),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 24 )
|
||||
__array(char, trans_fn, 32 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@ -999,7 +1039,7 @@ TRACE_EVENT(trans_restart_mem_realloced,
|
||||
TP_ARGS(trans, caller_ip, bytes),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 24 )
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(unsigned long, bytes )
|
||||
),
|
||||
@ -1025,7 +1065,7 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced,
|
||||
TP_ARGS(trans, caller_ip, path, old_u64s, new_u64s),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 24 )
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(enum btree_id, btree_id )
|
||||
TRACE_BPOS_entries(pos)
|
||||
|
@ -1217,8 +1217,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
trace_invalidate_bucket(c, bucket.inode, bucket.offset, cached_sectors);
|
||||
this_cpu_inc(c->counters[BCH_COUNTER_bucket_invalidate]);
|
||||
trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors);
|
||||
--*nr_to_invalidate;
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
|
@ -268,7 +268,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
trace_bucket_alloc(ca, bch2_alloc_reserves[reserve]);
|
||||
trace_and_count(c, bucket_alloc, ca, bch2_alloc_reserves[reserve]);
|
||||
return ob;
|
||||
}
|
||||
|
||||
@ -575,20 +575,19 @@ err:
|
||||
if (!ob)
|
||||
ob = ERR_PTR(-BCH_ERR_no_buckets_found);
|
||||
|
||||
if (IS_ERR(ob)) {
|
||||
trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve],
|
||||
usage.d[BCH_DATA_free].buckets,
|
||||
avail,
|
||||
bch2_copygc_wait_amount(c),
|
||||
c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
|
||||
buckets_seen,
|
||||
skipped_open,
|
||||
skipped_need_journal_commit,
|
||||
skipped_nouse,
|
||||
cl == NULL,
|
||||
bch2_err_str(PTR_ERR(ob)));
|
||||
atomic_long_inc(&c->bucket_alloc_fail);
|
||||
}
|
||||
if (IS_ERR(ob))
|
||||
trace_and_count(c, bucket_alloc_fail,
|
||||
ca, bch2_alloc_reserves[reserve],
|
||||
usage.d[BCH_DATA_free].buckets,
|
||||
avail,
|
||||
bch2_copygc_wait_amount(c),
|
||||
c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
|
||||
buckets_seen,
|
||||
skipped_open,
|
||||
skipped_need_journal_commit,
|
||||
skipped_nouse,
|
||||
cl == NULL,
|
||||
bch2_err_str(PTR_ERR(ob)));
|
||||
|
||||
return ob;
|
||||
}
|
||||
|
@ -212,6 +212,12 @@
|
||||
#define dynamic_fault(...) 0
|
||||
#define race_fault(...) 0
|
||||
|
||||
#define trace_and_count(_c, _name, ...) \
|
||||
do { \
|
||||
this_cpu_inc((_c)->counters[BCH_COUNTER_##_name]); \
|
||||
trace_##_name(__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define bch2_fs_init_fault(name) \
|
||||
dynamic_fault("bcachefs:bch_fs_init:" name)
|
||||
#define bch2_meta_read_fault(name) \
|
||||
@ -329,9 +335,6 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(btree_interior_update_foreground) \
|
||||
x(btree_interior_update_total) \
|
||||
x(btree_gc) \
|
||||
x(btree_lock_contended_read) \
|
||||
x(btree_lock_contended_intent) \
|
||||
x(btree_lock_contended_write) \
|
||||
x(data_write) \
|
||||
x(data_read) \
|
||||
x(data_promote) \
|
||||
@ -535,6 +538,7 @@ struct btree_transaction_stats {
|
||||
struct mutex lock;
|
||||
struct time_stats lock_hold_times;
|
||||
unsigned nr_max_paths;
|
||||
unsigned max_mem;
|
||||
char *max_paths_text;
|
||||
};
|
||||
|
||||
@ -917,12 +921,6 @@ struct bch_fs {
|
||||
|
||||
u64 last_bucket_seq_cleanup;
|
||||
|
||||
/* TODO rewrite as counters - The rest of this all shows up in sysfs */
|
||||
atomic_long_t read_realloc_races;
|
||||
atomic_long_t extent_migrate_done;
|
||||
atomic_long_t extent_migrate_raced;
|
||||
atomic_long_t bucket_alloc_fail;
|
||||
|
||||
u64 counters_on_mount[BCH_COUNTER_NR];
|
||||
u64 __percpu *counters;
|
||||
|
||||
|
@ -1337,12 +1337,81 @@ struct bch_sb_field_disk_groups {
|
||||
|
||||
/* BCH_SB_FIELD_counters */
|
||||
|
||||
#define BCH_PERSISTENT_COUNTERS() \
|
||||
x(io_read, 0) \
|
||||
x(io_write, 1) \
|
||||
x(io_move, 2) \
|
||||
x(bucket_invalidate, 3) \
|
||||
x(bucket_discard, 4)
|
||||
#define BCH_PERSISTENT_COUNTERS() \
|
||||
x(io_read, 0) \
|
||||
x(io_write, 1) \
|
||||
x(io_move, 2) \
|
||||
x(bucket_invalidate, 3) \
|
||||
x(bucket_discard, 4) \
|
||||
x(bucket_alloc, 5) \
|
||||
x(bucket_alloc_fail, 6) \
|
||||
x(btree_cache_scan, 7) \
|
||||
x(btree_cache_reap, 8) \
|
||||
x(btree_cache_cannibalize, 9) \
|
||||
x(btree_cache_cannibalize_lock, 10) \
|
||||
x(btree_cache_cannibalize_lock_fail, 11) \
|
||||
x(btree_cache_cannibalize_unlock, 12) \
|
||||
x(btree_node_write, 13) \
|
||||
x(btree_node_read, 14) \
|
||||
x(btree_node_compact, 15) \
|
||||
x(btree_node_merge, 16) \
|
||||
x(btree_node_split, 17) \
|
||||
x(btree_node_rewrite, 18) \
|
||||
x(btree_node_alloc, 19) \
|
||||
x(btree_node_free, 20) \
|
||||
x(btree_node_set_root, 21) \
|
||||
x(btree_path_relock_fail, 22) \
|
||||
x(btree_path_upgrade_fail, 23) \
|
||||
x(btree_reserve_get_fail, 24) \
|
||||
x(journal_entry_full, 25) \
|
||||
x(journal_full, 26) \
|
||||
x(journal_reclaim_finish, 27) \
|
||||
x(journal_reclaim_start, 28) \
|
||||
x(journal_write, 29) \
|
||||
x(read_promote, 30) \
|
||||
x(read_bounce, 31) \
|
||||
x(read_split, 33) \
|
||||
x(read_retry, 32) \
|
||||
x(read_reuse_race, 34) \
|
||||
x(move_extent_read, 35) \
|
||||
x(move_extent_write, 36) \
|
||||
x(move_extent_finish, 37) \
|
||||
x(move_extent_race, 38) \
|
||||
x(move_extent_alloc_mem_fail, 39) \
|
||||
x(copygc, 40) \
|
||||
x(copygc_wait, 41) \
|
||||
x(gc_gens_end, 42) \
|
||||
x(gc_gens_start, 43) \
|
||||
x(trans_blocked_journal_reclaim, 44) \
|
||||
x(trans_restart_btree_node_reused, 45) \
|
||||
x(trans_restart_btree_node_split, 46) \
|
||||
x(trans_restart_fault_inject, 47) \
|
||||
x(trans_restart_iter_upgrade, 48) \
|
||||
x(trans_restart_journal_preres_get, 49) \
|
||||
x(trans_restart_journal_reclaim, 50) \
|
||||
x(trans_restart_journal_res_get, 51) \
|
||||
x(trans_restart_key_cache_key_realloced, 52) \
|
||||
x(trans_restart_key_cache_raced, 53) \
|
||||
x(trans_restart_mark_replicas, 54) \
|
||||
x(trans_restart_mem_realloced, 55) \
|
||||
x(trans_restart_memory_allocation_failure, 56) \
|
||||
x(trans_restart_relock, 57) \
|
||||
x(trans_restart_relock_after_fill, 58) \
|
||||
x(trans_restart_relock_key_cache_fill, 59) \
|
||||
x(trans_restart_relock_next_node, 60) \
|
||||
x(trans_restart_relock_parent_for_fill, 61) \
|
||||
x(trans_restart_relock_path, 62) \
|
||||
x(trans_restart_relock_path_intent, 63) \
|
||||
x(trans_restart_too_many_iters, 64) \
|
||||
x(trans_restart_traverse, 65) \
|
||||
x(trans_restart_upgrade, 66) \
|
||||
x(trans_restart_would_deadlock, 67) \
|
||||
x(trans_restart_would_deadlock_write, 68) \
|
||||
x(trans_restart_injected, 69) \
|
||||
x(trans_restart_key_cache_upgrade, 70) \
|
||||
x(trans_traverse_all, 71) \
|
||||
x(transaction_commit, 72) \
|
||||
x(write_super, 73)
|
||||
|
||||
enum bch_persistent_counters {
|
||||
#define x(t, n, ...) BCH_COUNTER_##t,
|
||||
|
@ -14,8 +14,6 @@
|
||||
#include <linux/sched/mm.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
struct lock_class_key bch2_btree_node_lock_key;
|
||||
|
||||
const char * const bch2_btree_node_flags[] = {
|
||||
#define x(f) #f,
|
||||
BTREE_FLAGS()
|
||||
@ -254,7 +252,7 @@ wait_on_io:
|
||||
}
|
||||
out:
|
||||
if (b->hash_val && !ret)
|
||||
trace_btree_node_reap(c, b);
|
||||
trace_and_count(c, btree_cache_reap, c, b);
|
||||
return ret;
|
||||
out_unlock:
|
||||
six_unlock_write(&b->c.lock);
|
||||
@ -378,7 +376,7 @@ out:
|
||||
ret = freed;
|
||||
memalloc_nofs_restore(flags);
|
||||
out_norestore:
|
||||
trace_btree_cache_scan(sc->nr_to_scan, can_free, ret);
|
||||
trace_and_count(c, btree_cache_scan, sc->nr_to_scan, can_free, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -514,7 +512,7 @@ void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c)
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
|
||||
if (bc->alloc_lock == current) {
|
||||
trace_btree_node_cannibalize_unlock(c);
|
||||
trace_and_count(c, btree_cache_cannibalize_unlock, c);
|
||||
bc->alloc_lock = NULL;
|
||||
closure_wake_up(&bc->alloc_wait);
|
||||
}
|
||||
@ -530,7 +528,7 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
|
||||
goto success;
|
||||
|
||||
if (!cl) {
|
||||
trace_btree_node_cannibalize_lock_fail(c);
|
||||
trace_and_count(c, btree_cache_cannibalize_lock_fail, c);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -544,11 +542,11 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
|
||||
goto success;
|
||||
}
|
||||
|
||||
trace_btree_node_cannibalize_lock_fail(c);
|
||||
trace_and_count(c, btree_cache_cannibalize_lock_fail, c);
|
||||
return -EAGAIN;
|
||||
|
||||
success:
|
||||
trace_btree_node_cannibalize_lock(c);
|
||||
trace_and_count(c, btree_cache_cannibalize_lock, c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -672,7 +670,7 @@ err_locked:
|
||||
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
trace_btree_node_cannibalize(c);
|
||||
trace_and_count(c, btree_cache_cannibalize, c);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -701,7 +699,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
* been freed:
|
||||
*/
|
||||
if (trans && !bch2_btree_node_relock(trans, path, level + 1)) {
|
||||
trace_trans_restart_relock_parent_for_fill(trans, _THIS_IP_, path);
|
||||
trace_and_count(c, trans_restart_relock_parent_for_fill, trans, _THIS_IP_, path);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock));
|
||||
}
|
||||
|
||||
@ -709,7 +707,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
|
||||
if (trans && b == ERR_PTR(-ENOMEM)) {
|
||||
trans->memory_allocation_failure = true;
|
||||
trace_trans_restart_memory_allocation_failure(trans, _THIS_IP_, path);
|
||||
trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail));
|
||||
}
|
||||
|
||||
@ -758,7 +756,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
|
||||
if (!six_relock_type(&b->c.lock, lock_type, seq)) {
|
||||
if (trans)
|
||||
trace_trans_restart_relock_after_fill(trans, _THIS_IP_, path);
|
||||
trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
|
||||
}
|
||||
|
||||
@ -896,7 +894,7 @@ lock_node:
|
||||
if (btree_node_read_locked(path, level + 1))
|
||||
btree_node_unlock(trans, path, level + 1);
|
||||
|
||||
ret = btree_node_lock(trans, path, b, k->k.p, level, lock_type,
|
||||
ret = btree_node_lock(trans, path, &b->c, k->k.p, level, lock_type,
|
||||
lock_node_check_fn, (void *) k, trace_ip);
|
||||
if (unlikely(ret)) {
|
||||
if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
|
||||
@ -913,7 +911,7 @@ lock_node:
|
||||
if (bch2_btree_node_relock(trans, path, level + 1))
|
||||
goto retry;
|
||||
|
||||
trace_trans_restart_btree_node_reused(trans, trace_ip, path);
|
||||
trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused));
|
||||
}
|
||||
}
|
||||
@ -969,12 +967,13 @@ lock_node:
|
||||
return b;
|
||||
}
|
||||
|
||||
struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
|
||||
struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
|
||||
const struct bkey_i *k,
|
||||
enum btree_id btree_id,
|
||||
unsigned level,
|
||||
bool nofill)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
struct bset_tree *t;
|
||||
@ -1008,9 +1007,14 @@ retry:
|
||||
goto out;
|
||||
} else {
|
||||
lock_node:
|
||||
ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
|
||||
if (ret)
|
||||
goto retry;
|
||||
ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read);
|
||||
if (unlikely(ret)) {
|
||||
if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
|
||||
goto retry;
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ERR_PTR(ret);
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
|
||||
b->c.btree_id != btree_id ||
|
||||
@ -1072,8 +1076,9 @@ int bch2_btree_node_prefetch(struct bch_fs *c,
|
||||
return PTR_ERR_OR_ZERO(b);
|
||||
}
|
||||
|
||||
void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)
|
||||
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
|
||||
@ -1089,8 +1094,8 @@ wait_on_io:
|
||||
__bch2_btree_node_wait_on_read(b);
|
||||
__bch2_btree_node_wait_on_write(b);
|
||||
|
||||
six_lock_intent(&b->c.lock, NULL, NULL);
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
|
||||
|
||||
if (btree_node_dirty(b)) {
|
||||
__bch2_btree_node_write(c, b, 0);
|
||||
|
@ -5,8 +5,6 @@
|
||||
#include "bcachefs.h"
|
||||
#include "btree_types.h"
|
||||
|
||||
extern struct lock_class_key bch2_btree_node_lock_key;
|
||||
|
||||
extern const char * const bch2_btree_node_flags[];
|
||||
|
||||
struct btree_iter;
|
||||
@ -28,13 +26,13 @@ struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *,
|
||||
const struct bkey_i *, unsigned,
|
||||
enum six_lock_type, unsigned long);
|
||||
|
||||
struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
|
||||
struct btree *bch2_btree_node_get_noiter(struct btree_trans *, const struct bkey_i *,
|
||||
enum btree_id, unsigned, bool);
|
||||
|
||||
int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *, struct btree_path *,
|
||||
const struct bkey_i *, enum btree_id, unsigned);
|
||||
|
||||
void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *);
|
||||
void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *);
|
||||
|
||||
void bch2_fs_btree_cache_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_cache_init(struct bch_fs *);
|
||||
|
@ -165,10 +165,11 @@ static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_btree_node_update_key_early(struct bch_fs *c,
|
||||
static void bch2_btree_node_update_key_early(struct btree_trans *trans,
|
||||
enum btree_id btree, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_i *new)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b;
|
||||
struct bkey_buf tmp;
|
||||
int ret;
|
||||
@ -176,7 +177,7 @@ static void bch2_btree_node_update_key_early(struct bch_fs *c,
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
bch2_bkey_buf_reassemble(&tmp, c, old);
|
||||
|
||||
b = bch2_btree_node_get_noiter(c, tmp.k, btree, level, true);
|
||||
b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true);
|
||||
if (!IS_ERR_OR_NULL(b)) {
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
|
||||
@ -352,8 +353,9 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
|
||||
static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_and_journal_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf prev_k, cur_k;
|
||||
@ -378,7 +380,7 @@ again:
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
bch2_bkey_buf_reassemble(&cur_k, c, k);
|
||||
|
||||
cur = bch2_btree_node_get_noiter(c, cur_k.k,
|
||||
cur = bch2_btree_node_get_noiter(trans, cur_k.k,
|
||||
b->c.btree_id, b->c.level - 1,
|
||||
false);
|
||||
ret = PTR_ERR_OR_ZERO(cur);
|
||||
@ -392,7 +394,7 @@ again:
|
||||
bch2_btree_ids[b->c.btree_id],
|
||||
b->c.level - 1,
|
||||
buf.buf)) {
|
||||
bch2_btree_node_evict(c, cur_k.k);
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur_k.k->k.p);
|
||||
cur = NULL;
|
||||
@ -411,7 +413,7 @@ again:
|
||||
|
||||
if (ret == DROP_THIS_NODE) {
|
||||
six_unlock_read(&cur->c.lock);
|
||||
bch2_btree_node_evict(c, cur_k.k);
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur_k.k->k.p);
|
||||
cur = NULL;
|
||||
@ -425,7 +427,7 @@ again:
|
||||
prev = NULL;
|
||||
|
||||
if (ret == DROP_PREV_NODE) {
|
||||
bch2_btree_node_evict(c, prev_k.k);
|
||||
bch2_btree_node_evict(trans, prev_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, prev_k.k->k.p);
|
||||
if (ret)
|
||||
@ -465,7 +467,7 @@ again:
|
||||
bch2_bkey_buf_reassemble(&cur_k, c, k);
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
|
||||
cur = bch2_btree_node_get_noiter(c, cur_k.k,
|
||||
cur = bch2_btree_node_get_noiter(trans, cur_k.k,
|
||||
b->c.btree_id, b->c.level - 1,
|
||||
false);
|
||||
ret = PTR_ERR_OR_ZERO(cur);
|
||||
@ -476,12 +478,12 @@ again:
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_btree_repair_topology_recurse(c, cur);
|
||||
ret = bch2_btree_repair_topology_recurse(trans, cur);
|
||||
six_unlock_read(&cur->c.lock);
|
||||
cur = NULL;
|
||||
|
||||
if (ret == DROP_THIS_NODE) {
|
||||
bch2_btree_node_evict(c, cur_k.k);
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur_k.k->k.p);
|
||||
dropped_children = true;
|
||||
@ -522,18 +524,21 @@ fsck_err:
|
||||
|
||||
static int bch2_repair_topology(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree *b;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR && !ret; i++) {
|
||||
b = c->btree_roots[i].b;
|
||||
if (btree_node_fake(b))
|
||||
continue;
|
||||
|
||||
six_lock_read(&b->c.lock, NULL, NULL);
|
||||
ret = bch2_btree_repair_topology_recurse(c, b);
|
||||
six_unlock_read(&b->c.lock);
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
|
||||
ret = bch2_btree_repair_topology_recurse(&trans, b);
|
||||
|
||||
if (ret == DROP_THIS_NODE) {
|
||||
bch_err(c, "empty btree root - repair unimplemented");
|
||||
@ -541,13 +546,16 @@ static int bch2_repair_topology(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id,
|
||||
unsigned level, bool is_root,
|
||||
struct bkey_s_c *k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p = { 0 };
|
||||
@ -747,7 +755,7 @@ found:
|
||||
}
|
||||
|
||||
if (level)
|
||||
bch2_btree_node_update_key_early(c, btree_id, level - 1, *k, new);
|
||||
bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
|
||||
|
||||
if (c->opts.verbose) {
|
||||
printbuf_reset(&buf);
|
||||
@ -788,7 +796,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
|
||||
BUG_ON(bch2_journal_seq_verify &&
|
||||
k->k->version.lo > atomic64_read(&c->journal.seq));
|
||||
|
||||
ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
|
||||
ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -941,7 +949,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
|
||||
bch2_bkey_buf_reassemble(&cur, c, k);
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
|
||||
child = bch2_btree_node_get_noiter(c, cur.k,
|
||||
child = bch2_btree_node_get_noiter(trans, cur.k,
|
||||
b->c.btree_id, b->c.level - 1,
|
||||
false);
|
||||
ret = PTR_ERR_OR_ZERO(child);
|
||||
@ -1934,7 +1942,7 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
if (!mutex_trylock(&c->gc_gens_lock))
|
||||
return 0;
|
||||
|
||||
trace_gc_gens_start(c);
|
||||
trace_and_count(c, gc_gens_start, c);
|
||||
down_read(&c->gc_lock);
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
@ -1995,7 +2003,7 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
c->gc_count++;
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
|
||||
trace_gc_gens_end(c);
|
||||
trace_and_count(c, gc_gens_end, c);
|
||||
err:
|
||||
for_each_member_device(ca, c, i) {
|
||||
kvfree(ca->oldest_gen);
|
||||
|
@ -1490,7 +1490,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
|
||||
trace_btree_read(c, b);
|
||||
trace_and_count(c, btree_node_read, c, b);
|
||||
|
||||
if (bch2_verify_all_btree_replicas &&
|
||||
!btree_node_read_all_replicas(c, b, sync))
|
||||
@ -1657,9 +1657,15 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
|
||||
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
six_lock_read(&b->c.lock, NULL, NULL);
|
||||
struct btree_trans trans;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
|
||||
__btree_node_write_done(c, b);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
}
|
||||
|
||||
static void btree_node_write_work(struct work_struct *work)
|
||||
@ -1979,7 +1985,7 @@ do_write:
|
||||
c->opts.nochanges)
|
||||
goto err;
|
||||
|
||||
trace_btree_write(b, bytes_to_write, sectors_to_write);
|
||||
trace_and_count(c, btree_node_write, b, bytes_to_write, sectors_to_write);
|
||||
|
||||
wbio = container_of(bio_alloc_bioset(NULL,
|
||||
buf_pages(data, sectors_to_write << 9),
|
||||
|
@ -130,443 +130,6 @@ static inline bool btree_path_pos_in_node(struct btree_path *path,
|
||||
!btree_path_pos_after_node(path, b);
|
||||
}
|
||||
|
||||
/* Btree node locking: */
|
||||
|
||||
void bch2_btree_node_unlock_write(struct btree_trans *trans,
|
||||
struct btree_path *path, struct btree *b)
|
||||
{
|
||||
bch2_btree_node_unlock_write_inlined(trans, path, b);
|
||||
}
|
||||
|
||||
struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans,
|
||||
struct btree_path *skip,
|
||||
struct btree *b,
|
||||
unsigned level)
|
||||
{
|
||||
struct btree_path *path;
|
||||
struct six_lock_count ret = { 0, 0 };
|
||||
|
||||
if (IS_ERR_OR_NULL(b))
|
||||
return ret;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
if (path != skip && path->l[level].b == b) {
|
||||
ret.read += btree_node_read_locked(path, level);
|
||||
ret.intent += btree_node_intent_locked(path, level);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void six_lock_readers_add(struct six_lock *lock, int nr)
|
||||
{
|
||||
if (!lock->readers)
|
||||
atomic64_add(__SIX_VAL(read_lock, nr), &lock->state.counter);
|
||||
else
|
||||
this_cpu_add(*lock->readers, nr);
|
||||
}
|
||||
|
||||
void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
|
||||
{
|
||||
int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->c.level).read;
|
||||
|
||||
/*
|
||||
* Must drop our read locks before calling six_lock_write() -
|
||||
* six_unlock() won't do wakeups until the reader count
|
||||
* goes to 0, and it's safe because we have the node intent
|
||||
* locked:
|
||||
*/
|
||||
six_lock_readers_add(&b->c.lock, -readers);
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
six_lock_readers_add(&b->c.lock, readers);
|
||||
}
|
||||
|
||||
bool __bch2_btree_node_relock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
struct btree *b = btree_path_node(path, level);
|
||||
int want = __btree_lock_want(path, level);
|
||||
|
||||
if (!is_btree_node(path, level))
|
||||
goto fail;
|
||||
|
||||
if (race_fault())
|
||||
goto fail;
|
||||
|
||||
if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
|
||||
(btree_node_lock_seq_matches(path, b, level) &&
|
||||
btree_node_lock_increment(trans, b, level, want))) {
|
||||
mark_btree_node_locked(trans, path, level, want);
|
||||
return true;
|
||||
}
|
||||
fail:
|
||||
if (b != ERR_PTR(-BCH_ERR_no_btree_node_cached) &&
|
||||
b != ERR_PTR(-BCH_ERR_no_btree_node_init))
|
||||
trace_btree_node_relock_fail(trans, _RET_IP_, path, level);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool bch2_btree_node_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
struct btree *b = path->l[level].b;
|
||||
|
||||
if (!is_btree_node(path, level))
|
||||
return false;
|
||||
|
||||
switch (btree_lock_want(path, level)) {
|
||||
case BTREE_NODE_UNLOCKED:
|
||||
BUG_ON(btree_node_locked(path, level));
|
||||
return true;
|
||||
case BTREE_NODE_READ_LOCKED:
|
||||
BUG_ON(btree_node_intent_locked(path, level));
|
||||
return bch2_btree_node_relock(trans, path, level);
|
||||
case BTREE_NODE_INTENT_LOCKED:
|
||||
break;
|
||||
}
|
||||
|
||||
if (btree_node_intent_locked(path, level))
|
||||
return true;
|
||||
|
||||
if (race_fault())
|
||||
return false;
|
||||
|
||||
if (btree_node_locked(path, level)
|
||||
? six_lock_tryupgrade(&b->c.lock)
|
||||
: six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
|
||||
goto success;
|
||||
|
||||
if (btree_node_lock_seq_matches(path, b, level) &&
|
||||
btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
|
||||
btree_node_unlock(trans, path, level);
|
||||
goto success;
|
||||
}
|
||||
|
||||
trace_btree_node_upgrade_fail(trans, _RET_IP_, path, level);
|
||||
return false;
|
||||
success:
|
||||
mark_btree_node_intent_locked(trans, path, level);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool btree_path_get_locks(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
bool upgrade)
|
||||
{
|
||||
unsigned l = path->level;
|
||||
int fail_idx = -1;
|
||||
|
||||
do {
|
||||
if (!btree_path_node(path, l))
|
||||
break;
|
||||
|
||||
if (!(upgrade
|
||||
? bch2_btree_node_upgrade(trans, path, l)
|
||||
: bch2_btree_node_relock(trans, path, l)))
|
||||
fail_idx = l;
|
||||
|
||||
l++;
|
||||
} while (l < path->locks_want);
|
||||
|
||||
/*
|
||||
* When we fail to get a lock, we have to ensure that any child nodes
|
||||
* can't be relocked so bch2_btree_path_traverse has to walk back up to
|
||||
* the node that we failed to relock:
|
||||
*/
|
||||
if (fail_idx >= 0) {
|
||||
__bch2_btree_path_unlock(trans, path);
|
||||
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
|
||||
|
||||
do {
|
||||
path->l[fail_idx].b = upgrade
|
||||
? ERR_PTR(-BCH_ERR_no_btree_node_upgrade)
|
||||
: ERR_PTR(-BCH_ERR_no_btree_node_relock);
|
||||
--fail_idx;
|
||||
} while (fail_idx >= 0);
|
||||
}
|
||||
|
||||
if (path->uptodate == BTREE_ITER_NEED_RELOCK)
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
|
||||
bch2_trans_verify_locks(trans);
|
||||
|
||||
return path->uptodate < BTREE_ITER_NEED_RELOCK;
|
||||
}
|
||||
|
||||
static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
|
||||
bool cached)
|
||||
{
|
||||
return !cached
|
||||
? container_of(_b, struct btree, c)->key.k.p
|
||||
: container_of(_b, struct bkey_cached, c)->key.pos;
|
||||
}
|
||||
|
||||
/* Slowpath: */
|
||||
int __bch2_btree_node_lock(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree *b,
|
||||
struct bpos pos, unsigned level,
|
||||
enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
{
|
||||
struct btree_path *linked;
|
||||
unsigned reason;
|
||||
|
||||
/* Check if it's safe to block: */
|
||||
trans_for_each_path(trans, linked) {
|
||||
if (!linked->nodes_locked)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Can't block taking an intent lock if we have _any_ nodes read
|
||||
* locked:
|
||||
*
|
||||
* - Our read lock blocks another thread with an intent lock on
|
||||
* the same node from getting a write lock, and thus from
|
||||
* dropping its intent lock
|
||||
*
|
||||
* - And the other thread may have multiple nodes intent locked:
|
||||
* both the node we want to intent lock, and the node we
|
||||
* already have read locked - deadlock:
|
||||
*/
|
||||
if (type == SIX_LOCK_intent &&
|
||||
linked->nodes_locked != linked->nodes_intent_locked) {
|
||||
reason = 1;
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
if (linked->btree_id != path->btree_id) {
|
||||
if (linked->btree_id < path->btree_id)
|
||||
continue;
|
||||
|
||||
reason = 3;
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Within the same btree, non-cached paths come before cached
|
||||
* paths:
|
||||
*/
|
||||
if (linked->cached != path->cached) {
|
||||
if (!linked->cached)
|
||||
continue;
|
||||
|
||||
reason = 4;
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Interior nodes must be locked before their descendants: if
|
||||
* another path has possible descendants locked of the node
|
||||
* we're about to lock, it must have the ancestors locked too:
|
||||
*/
|
||||
if (level > __fls(linked->nodes_locked)) {
|
||||
reason = 5;
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
/* Must lock btree nodes in key order: */
|
||||
if (btree_node_locked(linked, level) &&
|
||||
bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b,
|
||||
linked->cached)) <= 0) {
|
||||
reason = 7;
|
||||
goto deadlock;
|
||||
}
|
||||
}
|
||||
|
||||
return btree_node_lock_type(trans, path, b, pos, level,
|
||||
type, should_sleep_fn, p);
|
||||
deadlock:
|
||||
trace_trans_restart_would_deadlock(trans, ip, reason, linked, path, &pos);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
|
||||
}
|
||||
|
||||
/* Btree iterator locking: */
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
|
||||
static void bch2_btree_path_verify_locks(struct btree_path *path)
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
if (!path->nodes_locked) {
|
||||
BUG_ON(path->uptodate == BTREE_ITER_UPTODATE &&
|
||||
btree_path_node(path, path->level));
|
||||
return;
|
||||
}
|
||||
|
||||
for (l = 0; btree_path_node(path, l); l++)
|
||||
BUG_ON(btree_lock_want(path, l) !=
|
||||
btree_node_locked_type(path, l));
|
||||
}
|
||||
|
||||
void bch2_trans_verify_locks(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
bch2_btree_path_verify_locks(path);
|
||||
}
|
||||
#else
|
||||
static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
|
||||
#endif
|
||||
|
||||
/* Btree path locking: */
|
||||
|
||||
/*
|
||||
* Only for btree_cache.c - only relocks intent locks
|
||||
*/
|
||||
int bch2_btree_path_relock_intent(struct btree_trans *trans,
|
||||
struct btree_path *path)
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
for (l = path->level;
|
||||
l < path->locks_want && btree_path_node(path, l);
|
||||
l++) {
|
||||
if (!bch2_btree_node_relock(trans, path, l)) {
|
||||
__bch2_btree_path_unlock(trans, path);
|
||||
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
|
||||
trace_trans_restart_relock_path_intent(trans, _RET_IP_, path);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__flatten
|
||||
static bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned long trace_ip)
|
||||
{
|
||||
return btree_path_get_locks(trans, path, false);
|
||||
}
|
||||
|
||||
static int bch2_btree_path_relock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned long trace_ip)
|
||||
{
|
||||
if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
|
||||
trace_trans_restart_relock_path(trans, trace_ip, path);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool __bch2_btree_path_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned new_locks_want)
|
||||
{
|
||||
struct btree_path *linked;
|
||||
|
||||
EBUG_ON(path->locks_want >= new_locks_want);
|
||||
|
||||
path->locks_want = new_locks_want;
|
||||
|
||||
if (btree_path_get_locks(trans, path, true))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* XXX: this is ugly - we'd prefer to not be mucking with other
|
||||
* iterators in the btree_trans here.
|
||||
*
|
||||
* On failure to upgrade the iterator, setting iter->locks_want and
|
||||
* calling get_locks() is sufficient to make bch2_btree_path_traverse()
|
||||
* get the locks we want on transaction restart.
|
||||
*
|
||||
* But if this iterator was a clone, on transaction restart what we did
|
||||
* to this iterator isn't going to be preserved.
|
||||
*
|
||||
* Possibly we could add an iterator field for the parent iterator when
|
||||
* an iterator is a copy - for now, we'll just upgrade any other
|
||||
* iterators with the same btree id.
|
||||
*
|
||||
* The code below used to be needed to ensure ancestor nodes get locked
|
||||
* before interior nodes - now that's handled by
|
||||
* bch2_btree_path_traverse_all().
|
||||
*/
|
||||
if (!path->cached && !trans->in_traverse_all)
|
||||
trans_for_each_path(trans, linked)
|
||||
if (linked != path &&
|
||||
linked->cached == path->cached &&
|
||||
linked->btree_id == path->btree_id &&
|
||||
linked->locks_want < new_locks_want) {
|
||||
linked->locks_want = new_locks_want;
|
||||
btree_path_get_locks(trans, linked, true);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void __bch2_btree_path_downgrade(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned new_locks_want)
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
EBUG_ON(path->locks_want < new_locks_want);
|
||||
|
||||
path->locks_want = new_locks_want;
|
||||
|
||||
while (path->nodes_locked &&
|
||||
(l = __fls(path->nodes_locked)) >= path->locks_want) {
|
||||
if (l > path->level) {
|
||||
btree_node_unlock(trans, path, l);
|
||||
} else {
|
||||
if (btree_node_intent_locked(path, l)) {
|
||||
six_lock_downgrade(&path->l[l].b->c.lock);
|
||||
path->nodes_intent_locked ^= 1 << l;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_btree_path_verify_locks(path);
|
||||
}
|
||||
|
||||
void bch2_trans_downgrade(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
bch2_btree_path_downgrade(trans, path);
|
||||
}
|
||||
|
||||
/* Btree transaction locking: */
|
||||
|
||||
int bch2_trans_relock(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
if (unlikely(trans->restarted))
|
||||
return -BCH_ERR_transaction_restart_relock;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
if (path->should_be_locked &&
|
||||
bch2_btree_path_relock(trans, path, _RET_IP_)) {
|
||||
trace_trans_restart_relock(trans, _RET_IP_, path);
|
||||
BUG_ON(!trans->restarted);
|
||||
return -BCH_ERR_transaction_restart_relock;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_trans_unlock(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
__bch2_btree_path_unlock(trans, path);
|
||||
|
||||
/*
|
||||
* bch2_gc_btree_init_recurse() doesn't use btree iterators for walking
|
||||
* btree nodes, it implements its own walking:
|
||||
*/
|
||||
BUG_ON(!trans->is_initial_gc &&
|
||||
lock_class_is_held(&bch2_btree_node_lock_key));
|
||||
}
|
||||
|
||||
/* Btree iterator: */
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
@ -795,7 +358,7 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
|
||||
if (cmp < 0)
|
||||
continue;
|
||||
|
||||
if (!(path->nodes_locked & 1) ||
|
||||
if (!btree_node_locked(path, 0) ||
|
||||
!path->should_be_locked)
|
||||
continue;
|
||||
|
||||
@ -1161,13 +724,13 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
|
||||
struct btree_path *path;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
if (!path->cached &&
|
||||
if (path->uptodate == BTREE_ITER_UPTODATE &&
|
||||
!path->cached &&
|
||||
btree_path_pos_in_node(path, b)) {
|
||||
enum btree_node_locked_type t =
|
||||
btree_lock_want(path, b->c.level);
|
||||
|
||||
if (path->nodes_locked &&
|
||||
t != BTREE_NODE_UNLOCKED) {
|
||||
if (t != BTREE_NODE_UNLOCKED) {
|
||||
btree_node_unlock(trans, path, b->c.level);
|
||||
six_lock_increment(&b->c.lock, t);
|
||||
mark_btree_node_locked(trans, path, b->c.level, t);
|
||||
@ -1232,7 +795,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
lock_type = __btree_lock_want(path, path->level);
|
||||
ret = btree_node_lock(trans, path, b, SPOS_MAX,
|
||||
ret = btree_node_lock(trans, path, &b->c, SPOS_MAX,
|
||||
path->level, lock_type,
|
||||
lock_root_check_fn, rootp,
|
||||
trace_ip);
|
||||
@ -1517,7 +1080,7 @@ err:
|
||||
|
||||
trans->in_traverse_all = false;
|
||||
|
||||
trace_trans_traverse_all(trans, trace_ip);
|
||||
trace_and_count(c, trans_traverse_all, trans, trace_ip);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1654,7 +1217,7 @@ int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
|
||||
u64 mask = ~(~0ULL << restart_probability_bits);
|
||||
|
||||
if ((prandom_u32() & mask) == mask) {
|
||||
trace_transaction_restart_injected(trans, _RET_IP_);
|
||||
trace_and_count(trans->c, trans_restart_injected, trans, _RET_IP_);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject);
|
||||
}
|
||||
}
|
||||
@ -1956,7 +1519,6 @@ static struct btree_path *btree_path_alloc(struct btree_trans *trans,
|
||||
path->ref = 0;
|
||||
path->intent_ref = 0;
|
||||
path->nodes_locked = 0;
|
||||
path->nodes_intent_locked = 0;
|
||||
|
||||
btree_path_list_add(trans, pos, path);
|
||||
return path;
|
||||
@ -2006,7 +1568,6 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
|
||||
path->level = level;
|
||||
path->locks_want = locks_want;
|
||||
path->nodes_locked = 0;
|
||||
path->nodes_intent_locked = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(path->l); i++)
|
||||
path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
@ -2030,10 +1591,8 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
|
||||
*/
|
||||
|
||||
locks_want = min(locks_want, BTREE_MAX_DEPTH);
|
||||
if (locks_want > path->locks_want) {
|
||||
path->locks_want = locks_want;
|
||||
btree_path_get_locks(trans, path, true);
|
||||
}
|
||||
if (locks_want > path->locks_want)
|
||||
bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want);
|
||||
|
||||
return path;
|
||||
}
|
||||
@ -2166,7 +1725,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
|
||||
path->l[path->level].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
|
||||
path->l[path->level + 1].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
|
||||
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
|
||||
trace_trans_restart_relock_next_node(trans, _THIS_IP_, path);
|
||||
trace_and_count(trans->c, trans_restart_relock_next_node, trans, _THIS_IP_, path);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
|
||||
goto err;
|
||||
}
|
||||
@ -3185,9 +2744,11 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
|
||||
|
||||
void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
{
|
||||
size_t new_top = trans->mem_top + size;
|
||||
unsigned new_top = trans->mem_top + size;
|
||||
void *p;
|
||||
|
||||
trans->mem_max = max(trans->mem_max, new_top);
|
||||
|
||||
if (new_top > trans->mem_bytes) {
|
||||
size_t old_bytes = trans->mem_bytes;
|
||||
size_t new_bytes = roundup_pow_of_two(new_top);
|
||||
@ -3209,7 +2770,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
trans->mem_bytes = new_bytes;
|
||||
|
||||
if (old_bytes) {
|
||||
trace_trans_restart_mem_realloced(trans, _RET_IP_, new_bytes);
|
||||
trace_and_count(trans->c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
|
||||
}
|
||||
}
|
||||
@ -3325,12 +2886,10 @@ static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct b
|
||||
return i;
|
||||
}
|
||||
|
||||
void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
|
||||
unsigned expected_nr_iters,
|
||||
size_t expected_mem_bytes,
|
||||
const char *fn)
|
||||
void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *fn)
|
||||
__acquires(&c->btree_trans_barrier)
|
||||
{
|
||||
struct btree_transaction_stats *s;
|
||||
struct btree_trans *pos;
|
||||
|
||||
BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
|
||||
@ -3344,7 +2903,10 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
|
||||
|
||||
bch2_trans_alloc_paths(trans, c);
|
||||
|
||||
if (expected_mem_bytes) {
|
||||
s = btree_trans_stats(trans);
|
||||
if (s) {
|
||||
unsigned expected_mem_bytes = s->max_mem;
|
||||
|
||||
trans->mem_bytes = roundup_pow_of_two(expected_mem_bytes);
|
||||
trans->mem = kmalloc(trans->mem_bytes, GFP_KERNEL|__GFP_NOFAIL);
|
||||
|
||||
@ -3395,9 +2957,13 @@ void bch2_trans_exit(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_transaction_stats *s = btree_trans_stats(trans);
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
if (s)
|
||||
s->max_mem = max(s->max_mem, trans->mem_max);
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
__btree_path_put(i->path, true);
|
||||
trans->nr_updates = 0;
|
||||
@ -3444,12 +3010,23 @@ void bch2_trans_exit(struct btree_trans *trans)
|
||||
|
||||
static void __maybe_unused
|
||||
bch2_btree_path_node_to_text(struct printbuf *out,
|
||||
struct btree_bkey_cached_common *b,
|
||||
bool cached)
|
||||
struct btree_bkey_cached_common *b)
|
||||
{
|
||||
struct six_lock_count c = six_lock_counts(&b->lock);
|
||||
struct task_struct *owner;
|
||||
pid_t pid;
|
||||
|
||||
rcu_read_lock();
|
||||
owner = READ_ONCE(b->lock.owner);
|
||||
pid = owner ? owner->pid : 0;;
|
||||
rcu_read_unlock();
|
||||
|
||||
prt_printf(out, " l=%u %s:",
|
||||
b->level, bch2_btree_ids[b->btree_id]);
|
||||
bch2_bpos_to_text(out, btree_node_pos(b, cached));
|
||||
bch2_bpos_to_text(out, btree_node_pos(b));
|
||||
|
||||
prt_printf(out, " locks %u:%u:%u held by pid %u",
|
||||
c.n[0], c.n[1], c.n[2], pid);
|
||||
}
|
||||
|
||||
void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
|
||||
@ -3476,9 +3053,9 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
|
||||
for (l = 0; l < BTREE_MAX_DEPTH; l++) {
|
||||
if (btree_node_locked(path, l) &&
|
||||
!IS_ERR_OR_NULL(b = (void *) READ_ONCE(path->l[l].b))) {
|
||||
prt_printf(out, " %s l=%u ",
|
||||
btree_node_intent_locked(path, l) ? "i" : "r", l);
|
||||
bch2_btree_path_node_to_text(out, b, path->cached);
|
||||
prt_printf(out, " %c l=%u ",
|
||||
lock_types[btree_node_locked_type(path, l)], l);
|
||||
bch2_btree_path_node_to_text(out, b);
|
||||
prt_printf(out, "\n");
|
||||
}
|
||||
}
|
||||
@ -3496,7 +3073,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
|
||||
bch2_bpos_to_text(out, trans->locking_pos);
|
||||
|
||||
prt_printf(out, " node ");
|
||||
bch2_btree_path_node_to_text(out, b, path->cached);
|
||||
bch2_btree_path_node_to_text(out, b);
|
||||
prt_printf(out, "\n");
|
||||
}
|
||||
}
|
||||
|
@ -145,12 +145,10 @@ struct bkey_i *bch2_btree_journal_peek_slot(struct btree_trans *,
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void bch2_trans_verify_paths(struct btree_trans *);
|
||||
void bch2_trans_verify_locks(struct btree_trans *);
|
||||
void bch2_assert_pos_locked(struct btree_trans *, enum btree_id,
|
||||
struct bpos, bool);
|
||||
#else
|
||||
static inline void bch2_trans_verify_paths(struct btree_trans *trans) {}
|
||||
static inline void bch2_trans_verify_locks(struct btree_trans *trans) {}
|
||||
static inline void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
|
||||
struct bpos pos, bool key_cache) {}
|
||||
#endif
|
||||
@ -195,20 +193,6 @@ static inline int btree_trans_restart(struct btree_trans *trans, int err)
|
||||
bool bch2_btree_node_upgrade(struct btree_trans *,
|
||||
struct btree_path *, unsigned);
|
||||
|
||||
bool __bch2_btree_path_upgrade(struct btree_trans *,
|
||||
struct btree_path *, unsigned);
|
||||
|
||||
static inline bool bch2_btree_path_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned new_locks_want)
|
||||
{
|
||||
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
|
||||
|
||||
return path->locks_want < new_locks_want
|
||||
? __bch2_btree_path_upgrade(trans, path, new_locks_want)
|
||||
: path->uptodate == BTREE_ITER_UPTODATE;
|
||||
}
|
||||
|
||||
void __bch2_btree_path_downgrade(struct btree_trans *, struct btree_path *, unsigned);
|
||||
|
||||
static inline void bch2_btree_path_downgrade(struct btree_trans *trans,
|
||||
@ -367,8 +351,8 @@ static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *
|
||||
|
||||
static inline int btree_trans_too_many_iters(struct btree_trans *trans)
|
||||
{
|
||||
if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2) {
|
||||
trace_trans_restart_too_many_iters(trans, _THIS_IP_);
|
||||
if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX - 8) {
|
||||
trace_and_count(trans->c, trans_restart_too_many_iters, trans, _THIS_IP_);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
|
||||
}
|
||||
|
||||
@ -544,11 +528,10 @@ void bch2_btree_path_to_text(struct printbuf *, struct btree_path *);
|
||||
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
|
||||
void bch2_dump_trans_updates(struct btree_trans *);
|
||||
void bch2_dump_trans_paths_updates(struct btree_trans *);
|
||||
void __bch2_trans_init(struct btree_trans *, struct bch_fs *,
|
||||
unsigned, size_t, const char *);
|
||||
void __bch2_trans_init(struct btree_trans *, struct bch_fs *, const char *);
|
||||
void bch2_trans_exit(struct btree_trans *);
|
||||
|
||||
#define bch2_trans_init(...) __bch2_trans_init(__VA_ARGS__, __func__)
|
||||
#define bch2_trans_init(_trans, _c, _nr_iters, _mem) __bch2_trans_init(_trans, _c, __func__)
|
||||
|
||||
void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);
|
||||
|
||||
|
@ -13,6 +13,11 @@
|
||||
#include <linux/sched/mm.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static inline bool btree_uses_pcpu_readers(enum btree_id id)
|
||||
{
|
||||
return id == BTREE_ID_subvolumes;
|
||||
}
|
||||
|
||||
static struct kmem_cache *bch2_key_cache;
|
||||
|
||||
static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
|
||||
@ -84,7 +89,10 @@ static void bkey_cached_free(struct btree_key_cache *bc,
|
||||
ck->btree_trans_barrier_seq =
|
||||
start_poll_synchronize_srcu(&c->btree_trans_barrier);
|
||||
|
||||
list_move_tail(&ck->list, &bc->freed);
|
||||
if (ck->c.lock.readers)
|
||||
list_move_tail(&ck->list, &bc->freed_pcpu);
|
||||
else
|
||||
list_move_tail(&ck->list, &bc->freed_nonpcpu);
|
||||
atomic_long_inc(&bc->nr_freed);
|
||||
|
||||
kfree(ck->k);
|
||||
@ -95,15 +103,51 @@ static void bkey_cached_free(struct btree_key_cache *bc,
|
||||
six_unlock_intent(&ck->c.lock);
|
||||
}
|
||||
|
||||
static void bkey_cached_free_fast(struct btree_key_cache *bc,
|
||||
struct bkey_cached *ck)
|
||||
static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
|
||||
struct bkey_cached *ck)
|
||||
{
|
||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
||||
struct btree_key_cache_freelist *f;
|
||||
bool freed = false;
|
||||
|
||||
BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
|
||||
|
||||
if (!ck->c.lock.readers) {
|
||||
preempt_disable();
|
||||
f = this_cpu_ptr(bc->pcpu_freed);
|
||||
|
||||
if (f->nr < ARRAY_SIZE(f->objs)) {
|
||||
f->objs[f->nr++] = ck;
|
||||
freed = true;
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
if (!freed) {
|
||||
mutex_lock(&bc->lock);
|
||||
preempt_disable();
|
||||
f = this_cpu_ptr(bc->pcpu_freed);
|
||||
|
||||
while (f->nr > ARRAY_SIZE(f->objs) / 2) {
|
||||
struct bkey_cached *ck2 = f->objs[--f->nr];
|
||||
|
||||
list_move_tail(&ck2->list, &bc->freed_nonpcpu);
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
list_move_tail(&ck->list, &bc->freed_nonpcpu);
|
||||
mutex_unlock(&bc->lock);
|
||||
}
|
||||
} else {
|
||||
mutex_lock(&bc->lock);
|
||||
list_move_tail(&ck->list, &bc->freed_pcpu);
|
||||
mutex_unlock(&bc->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void bkey_cached_free_fast(struct btree_key_cache *bc,
|
||||
struct bkey_cached *ck)
|
||||
{
|
||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
||||
|
||||
ck->btree_trans_barrier_seq =
|
||||
start_poll_synchronize_srcu(&c->btree_trans_barrier);
|
||||
|
||||
@ -114,74 +158,84 @@ static void bkey_cached_free_fast(struct btree_key_cache *bc,
|
||||
ck->k = NULL;
|
||||
ck->u64s = 0;
|
||||
|
||||
preempt_disable();
|
||||
f = this_cpu_ptr(bc->pcpu_freed);
|
||||
|
||||
if (f->nr < ARRAY_SIZE(f->objs)) {
|
||||
f->objs[f->nr++] = ck;
|
||||
freed = true;
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
if (!freed) {
|
||||
mutex_lock(&bc->lock);
|
||||
preempt_disable();
|
||||
f = this_cpu_ptr(bc->pcpu_freed);
|
||||
|
||||
while (f->nr > ARRAY_SIZE(f->objs) / 2) {
|
||||
struct bkey_cached *ck2 = f->objs[--f->nr];
|
||||
|
||||
list_move_tail(&ck2->list, &bc->freed);
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
list_move_tail(&ck->list, &bc->freed);
|
||||
mutex_unlock(&bc->lock);
|
||||
}
|
||||
bkey_cached_move_to_freelist(bc, ck);
|
||||
|
||||
six_unlock_write(&ck->c.lock);
|
||||
six_unlock_intent(&ck->c.lock);
|
||||
}
|
||||
|
||||
static struct bkey_cached *
|
||||
bkey_cached_alloc(struct btree_key_cache *c)
|
||||
bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bkey_cached *ck = NULL;
|
||||
struct btree_key_cache_freelist *f;
|
||||
bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id);
|
||||
|
||||
preempt_disable();
|
||||
f = this_cpu_ptr(c->pcpu_freed);
|
||||
if (f->nr)
|
||||
ck = f->objs[--f->nr];
|
||||
preempt_enable();
|
||||
|
||||
if (!ck) {
|
||||
mutex_lock(&c->lock);
|
||||
if (!pcpu_readers) {
|
||||
preempt_disable();
|
||||
f = this_cpu_ptr(c->pcpu_freed);
|
||||
|
||||
while (!list_empty(&c->freed) &&
|
||||
f->nr < ARRAY_SIZE(f->objs) / 2) {
|
||||
ck = list_last_entry(&c->freed, struct bkey_cached, list);
|
||||
list_del_init(&ck->list);
|
||||
f->objs[f->nr++] = ck;
|
||||
}
|
||||
|
||||
ck = f->nr ? f->objs[--f->nr] : NULL;
|
||||
f = this_cpu_ptr(bc->pcpu_freed);
|
||||
if (f->nr)
|
||||
ck = f->objs[--f->nr];
|
||||
preempt_enable();
|
||||
mutex_unlock(&c->lock);
|
||||
|
||||
if (!ck) {
|
||||
mutex_lock(&bc->lock);
|
||||
preempt_disable();
|
||||
f = this_cpu_ptr(bc->pcpu_freed);
|
||||
|
||||
while (!list_empty(&bc->freed_nonpcpu) &&
|
||||
f->nr < ARRAY_SIZE(f->objs) / 2) {
|
||||
ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
|
||||
list_del_init(&ck->list);
|
||||
f->objs[f->nr++] = ck;
|
||||
}
|
||||
|
||||
ck = f->nr ? f->objs[--f->nr] : NULL;
|
||||
preempt_enable();
|
||||
mutex_unlock(&bc->lock);
|
||||
}
|
||||
} else {
|
||||
mutex_lock(&bc->lock);
|
||||
if (!list_empty(&bc->freed_pcpu)) {
|
||||
ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list);
|
||||
list_del_init(&ck->list);
|
||||
}
|
||||
mutex_unlock(&bc->lock);
|
||||
}
|
||||
|
||||
if (ck) {
|
||||
six_lock_intent(&ck->c.lock, NULL, NULL);
|
||||
six_lock_write(&ck->c.lock, NULL, NULL);
|
||||
int ret;
|
||||
|
||||
ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent);
|
||||
if (unlikely(ret)) {
|
||||
bkey_cached_move_to_freelist(bc, ck);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
path->l[0].b = (void *) ck;
|
||||
path->l[0].lock_seq = ck->c.lock.state.seq;
|
||||
mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
|
||||
|
||||
ret = bch2_btree_node_lock_write(trans, path, &ck->c);
|
||||
if (unlikely(ret)) {
|
||||
btree_node_unlock(trans, path, 0);
|
||||
bkey_cached_move_to_freelist(bc, ck);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return ck;
|
||||
}
|
||||
|
||||
ck = kmem_cache_alloc(bch2_key_cache, GFP_NOFS|__GFP_ZERO);
|
||||
if (likely(ck)) {
|
||||
INIT_LIST_HEAD(&ck->list);
|
||||
six_lock_init(&ck->c.lock);
|
||||
__six_lock_init(&ck->c.lock, "b->c.lock", &bch2_btree_node_lock_key);
|
||||
if (pcpu_readers)
|
||||
six_lock_pcpu_alloc(&ck->c.lock);
|
||||
|
||||
ck->c.cached = true;
|
||||
BUG_ON(!six_trylock_intent(&ck->c.lock));
|
||||
BUG_ON(!six_trylock_write(&ck->c.lock));
|
||||
return ck;
|
||||
@ -215,36 +269,36 @@ bkey_cached_reuse(struct btree_key_cache *c)
|
||||
}
|
||||
|
||||
static struct bkey_cached *
|
||||
btree_key_cache_create(struct bch_fs *c,
|
||||
enum btree_id btree_id,
|
||||
struct bpos pos)
|
||||
btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bkey_cached *ck;
|
||||
bool was_new = true;
|
||||
|
||||
ck = bkey_cached_alloc(bc);
|
||||
ck = bkey_cached_alloc(trans, path);
|
||||
if (unlikely(IS_ERR(ck)))
|
||||
return ck;
|
||||
|
||||
if (unlikely(!ck)) {
|
||||
ck = bkey_cached_reuse(bc);
|
||||
if (unlikely(!ck)) {
|
||||
bch_err(c, "error allocating memory for key cache item, btree %s",
|
||||
bch2_btree_ids[btree_id]);
|
||||
bch2_btree_ids[path->btree_id]);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
|
||||
was_new = false;
|
||||
} else {
|
||||
if (btree_id == BTREE_ID_subvolumes)
|
||||
if (path->btree_id == BTREE_ID_subvolumes)
|
||||
six_lock_pcpu_alloc(&ck->c.lock);
|
||||
else
|
||||
six_lock_pcpu_free(&ck->c.lock);
|
||||
}
|
||||
|
||||
ck->c.level = 0;
|
||||
ck->c.btree_id = btree_id;
|
||||
ck->key.btree_id = btree_id;
|
||||
ck->key.pos = pos;
|
||||
ck->c.btree_id = path->btree_id;
|
||||
ck->key.btree_id = path->btree_id;
|
||||
ck->key.pos = path->pos;
|
||||
ck->valid = false;
|
||||
ck->flags = 1U << BKEY_CACHED_ACCESSED;
|
||||
|
||||
@ -256,6 +310,7 @@ btree_key_cache_create(struct bch_fs *c,
|
||||
if (likely(was_new)) {
|
||||
six_unlock_write(&ck->c.lock);
|
||||
six_unlock_intent(&ck->c.lock);
|
||||
mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
|
||||
kfree(ck);
|
||||
} else {
|
||||
bkey_cached_free_fast(bc, ck);
|
||||
@ -291,7 +346,7 @@ static int btree_key_cache_fill(struct btree_trans *trans,
|
||||
k = bch2_btree_path_peek_slot(path, &u);
|
||||
|
||||
if (!bch2_btree_node_relock(trans, ck_path, 0)) {
|
||||
trace_trans_restart_relock_key_cache_fill(trans, _THIS_IP_, ck_path);
|
||||
trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
|
||||
goto err;
|
||||
}
|
||||
@ -320,11 +375,12 @@ static int btree_key_cache_fill(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: not allowed to be holding read locks when we take a write lock,
|
||||
* currently
|
||||
*/
|
||||
bch2_btree_node_lock_write(trans, ck_path, ck_path->l[0].b);
|
||||
ret = bch2_btree_node_lock_write(trans, ck_path, &ck_path->l[0].b->c);
|
||||
if (ret) {
|
||||
kfree(new_k);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (new_k) {
|
||||
kfree(ck->k);
|
||||
ck->u64s = new_u64s;
|
||||
@ -372,7 +428,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path
|
||||
retry:
|
||||
ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
|
||||
if (!ck) {
|
||||
ck = btree_key_cache_create(c, path->btree_id, path->pos);
|
||||
ck = btree_key_cache_create(trans, path);
|
||||
ret = PTR_ERR_OR_ZERO(ck);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -414,7 +470,7 @@ fill:
|
||||
*/
|
||||
if (!path->locks_want &&
|
||||
!__bch2_btree_path_upgrade(trans, path, 1)) {
|
||||
trace_transaction_restart_key_cache_upgrade(trans, _THIS_IP_);
|
||||
trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade);
|
||||
goto err;
|
||||
}
|
||||
@ -518,21 +574,21 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
atomic_long_dec(&c->btree_key_cache.nr_dirty);
|
||||
}
|
||||
} else {
|
||||
struct btree_path *path2;
|
||||
evict:
|
||||
BUG_ON(!btree_node_intent_locked(c_iter.path, 0));
|
||||
trans_for_each_path(trans, path2)
|
||||
if (path2 != c_iter.path)
|
||||
__bch2_btree_path_unlock(trans, path2);
|
||||
|
||||
mark_btree_node_unlocked(c_iter.path, 0);
|
||||
c_iter.path->l[0].b = NULL;
|
||||
|
||||
six_lock_write(&ck->c.lock, NULL, NULL);
|
||||
bch2_btree_node_lock_write_nofail(trans, c_iter.path, &ck->c);
|
||||
|
||||
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||
clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
|
||||
atomic_long_dec(&c->btree_key_cache.nr_dirty);
|
||||
}
|
||||
|
||||
mark_btree_node_locked_noreset(c_iter.path, 0, BTREE_NODE_UNLOCKED);
|
||||
bkey_cached_evict(&c->btree_key_cache, ck);
|
||||
|
||||
bkey_cached_free_fast(&c->btree_key_cache, ck);
|
||||
}
|
||||
out:
|
||||
@ -548,11 +604,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
|
||||
struct bkey_cached *ck =
|
||||
container_of(pin, struct bkey_cached, journal);
|
||||
struct bkey_cached_key key;
|
||||
struct btree_trans trans;
|
||||
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
int ret = 0;
|
||||
|
||||
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
six_lock_read(&ck->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(&trans, &ck->c, SIX_LOCK_read);
|
||||
key = ck->key;
|
||||
|
||||
if (ck->journal.seq != seq ||
|
||||
@ -562,12 +620,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
|
||||
}
|
||||
six_unlock_read(&ck->c.lock);
|
||||
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
ret = commit_do(&trans, NULL, NULL, 0,
|
||||
btree_key_cache_flush_pos(&trans, key, seq,
|
||||
BTREE_INSERT_JOURNAL_RECLAIM, false));
|
||||
unlock:
|
||||
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -674,12 +733,29 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
* Newest freed entries are at the end of the list - once we hit one
|
||||
* that's too new to be freed, we can bail out:
|
||||
*/
|
||||
list_for_each_entry_safe(ck, t, &bc->freed, list) {
|
||||
list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
|
||||
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
|
||||
ck->btree_trans_barrier_seq))
|
||||
break;
|
||||
|
||||
list_del(&ck->list);
|
||||
six_lock_pcpu_free(&ck->c.lock);
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
atomic_long_dec(&bc->nr_freed);
|
||||
scanned++;
|
||||
freed++;
|
||||
}
|
||||
|
||||
if (scanned >= nr)
|
||||
goto out;
|
||||
|
||||
list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
|
||||
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
|
||||
ck->btree_trans_barrier_seq))
|
||||
break;
|
||||
|
||||
list_del(&ck->list);
|
||||
six_lock_pcpu_free(&ck->c.lock);
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
atomic_long_dec(&bc->nr_freed);
|
||||
scanned++;
|
||||
@ -767,7 +843,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
for (i = 0; i < tbl->size; i++)
|
||||
rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
|
||||
bkey_cached_evict(bc, ck);
|
||||
list_add(&ck->list, &bc->freed);
|
||||
list_add(&ck->list, &bc->freed_nonpcpu);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@ -777,11 +853,13 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
|
||||
for (i = 0; i < f->nr; i++) {
|
||||
ck = f->objs[i];
|
||||
list_add(&ck->list, &bc->freed);
|
||||
list_add(&ck->list, &bc->freed_nonpcpu);
|
||||
}
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(ck, n, &bc->freed, list) {
|
||||
list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
|
||||
|
||||
list_for_each_entry_safe(ck, n, &bc->freed_nonpcpu, list) {
|
||||
cond_resched();
|
||||
|
||||
bch2_journal_pin_drop(&c->journal, &ck->journal);
|
||||
@ -789,6 +867,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
|
||||
list_del(&ck->list);
|
||||
kfree(ck->k);
|
||||
six_lock_pcpu_free(&ck->c.lock);
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
}
|
||||
|
||||
@ -808,7 +887,8 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
|
||||
{
|
||||
mutex_init(&c->lock);
|
||||
INIT_LIST_HEAD(&c->freed);
|
||||
INIT_LIST_HEAD(&c->freed_pcpu);
|
||||
INIT_LIST_HEAD(&c->freed_nonpcpu);
|
||||
}
|
||||
|
||||
static void bch2_btree_key_cache_shrinker_to_text(struct printbuf *out, struct shrinker *shrink)
|
||||
|
466
libbcachefs/btree_locking.c
Normal file
466
libbcachefs/btree_locking.c
Normal file
@ -0,0 +1,466 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_locking.h"
|
||||
#include "btree_types.h"
|
||||
|
||||
struct lock_class_key bch2_btree_node_lock_key;
|
||||
|
||||
/* Btree node locking: */
|
||||
|
||||
static inline void six_lock_readers_add(struct six_lock *lock, int nr)
|
||||
{
|
||||
if (lock->readers)
|
||||
this_cpu_add(*lock->readers, nr);
|
||||
else if (nr > 0)
|
||||
atomic64_add(__SIX_VAL(read_lock, nr), &lock->state.counter);
|
||||
else
|
||||
atomic64_sub(__SIX_VAL(read_lock, -nr), &lock->state.counter);
|
||||
}
|
||||
|
||||
struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans,
|
||||
struct btree_path *skip,
|
||||
struct btree_bkey_cached_common *b,
|
||||
unsigned level)
|
||||
{
|
||||
struct btree_path *path;
|
||||
struct six_lock_count ret;
|
||||
|
||||
memset(&ret, 0, sizeof(ret));
|
||||
|
||||
if (IS_ERR_OR_NULL(b))
|
||||
return ret;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
if (path != skip && &path->l[level].b->c == b) {
|
||||
int t = btree_node_locked_type(path, level);
|
||||
|
||||
if (t != BTREE_NODE_UNLOCKED)
|
||||
ret.n[t]++;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* unlock */
|
||||
|
||||
void bch2_btree_node_unlock_write(struct btree_trans *trans,
|
||||
struct btree_path *path, struct btree *b)
|
||||
{
|
||||
bch2_btree_node_unlock_write_inlined(trans, path, b);
|
||||
}
|
||||
|
||||
/* lock */
|
||||
|
||||
void __bch2_btree_node_lock_write(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b)
|
||||
{
|
||||
int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->level).n[SIX_LOCK_read];
|
||||
|
||||
/*
|
||||
* Must drop our read locks before calling six_lock_write() -
|
||||
* six_unlock() won't do wakeups until the reader count
|
||||
* goes to 0, and it's safe because we have the node intent
|
||||
* locked:
|
||||
*/
|
||||
six_lock_readers_add(&b->lock, -readers);
|
||||
btree_node_lock_nopath_nofail(trans, b, SIX_LOCK_write);
|
||||
six_lock_readers_add(&b->lock, readers);
|
||||
}
|
||||
|
||||
static inline bool path_has_read_locks(struct btree_path *path)
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
for (l = 0; l < BTREE_MAX_DEPTH; l++)
|
||||
if (btree_node_read_locked(path, l))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Slowpath: */
|
||||
int __bch2_btree_node_lock(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b,
|
||||
struct bpos pos, unsigned level,
|
||||
enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
{
|
||||
struct btree_path *linked;
|
||||
unsigned reason;
|
||||
|
||||
/* Check if it's safe to block: */
|
||||
trans_for_each_path(trans, linked) {
|
||||
if (!linked->nodes_locked)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Can't block taking an intent lock if we have _any_ nodes read
|
||||
* locked:
|
||||
*
|
||||
* - Our read lock blocks another thread with an intent lock on
|
||||
* the same node from getting a write lock, and thus from
|
||||
* dropping its intent lock
|
||||
*
|
||||
* - And the other thread may have multiple nodes intent locked:
|
||||
* both the node we want to intent lock, and the node we
|
||||
* already have read locked - deadlock:
|
||||
*/
|
||||
if (type == SIX_LOCK_intent &&
|
||||
path_has_read_locks(linked)) {
|
||||
reason = 1;
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
if (linked->btree_id != path->btree_id) {
|
||||
if (linked->btree_id < path->btree_id)
|
||||
continue;
|
||||
|
||||
reason = 3;
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Within the same btree, non-cached paths come before cached
|
||||
* paths:
|
||||
*/
|
||||
if (linked->cached != path->cached) {
|
||||
if (!linked->cached)
|
||||
continue;
|
||||
|
||||
reason = 4;
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Interior nodes must be locked before their descendants: if
|
||||
* another path has possible descendants locked of the node
|
||||
* we're about to lock, it must have the ancestors locked too:
|
||||
*/
|
||||
if (level > btree_path_highest_level_locked(linked)) {
|
||||
reason = 5;
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
/* Must lock btree nodes in key order: */
|
||||
if (btree_node_locked(linked, level) &&
|
||||
bpos_cmp(pos, btree_node_pos(&linked->l[level].b->c)) <= 0) {
|
||||
reason = 7;
|
||||
goto deadlock;
|
||||
}
|
||||
}
|
||||
|
||||
return btree_node_lock_type(trans, path, b, pos, level,
|
||||
type, should_sleep_fn, p);
|
||||
deadlock:
|
||||
trace_and_count(trans->c, trans_restart_would_deadlock, trans, ip, reason, linked, path, &pos);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
|
||||
}
|
||||
|
||||
/* relock */
|
||||
|
||||
static inline bool btree_path_get_locks(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
bool upgrade)
|
||||
{
|
||||
unsigned l = path->level;
|
||||
int fail_idx = -1;
|
||||
|
||||
do {
|
||||
if (!btree_path_node(path, l))
|
||||
break;
|
||||
|
||||
if (!(upgrade
|
||||
? bch2_btree_node_upgrade(trans, path, l)
|
||||
: bch2_btree_node_relock(trans, path, l)))
|
||||
fail_idx = l;
|
||||
|
||||
l++;
|
||||
} while (l < path->locks_want);
|
||||
|
||||
/*
|
||||
* When we fail to get a lock, we have to ensure that any child nodes
|
||||
* can't be relocked so bch2_btree_path_traverse has to walk back up to
|
||||
* the node that we failed to relock:
|
||||
*/
|
||||
if (fail_idx >= 0) {
|
||||
__bch2_btree_path_unlock(trans, path);
|
||||
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
|
||||
|
||||
do {
|
||||
path->l[fail_idx].b = upgrade
|
||||
? ERR_PTR(-BCH_ERR_no_btree_node_upgrade)
|
||||
: ERR_PTR(-BCH_ERR_no_btree_node_relock);
|
||||
--fail_idx;
|
||||
} while (fail_idx >= 0);
|
||||
}
|
||||
|
||||
if (path->uptodate == BTREE_ITER_NEED_RELOCK)
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
|
||||
bch2_trans_verify_locks(trans);
|
||||
|
||||
return path->uptodate < BTREE_ITER_NEED_RELOCK;
|
||||
}
|
||||
|
||||
bool __bch2_btree_node_relock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
struct btree *b = btree_path_node(path, level);
|
||||
int want = __btree_lock_want(path, level);
|
||||
|
||||
if (race_fault())
|
||||
goto fail;
|
||||
|
||||
if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
|
||||
(btree_node_lock_seq_matches(path, b, level) &&
|
||||
btree_node_lock_increment(trans, &b->c, level, want))) {
|
||||
mark_btree_node_locked(trans, path, level, want);
|
||||
return true;
|
||||
}
|
||||
fail:
|
||||
trace_and_count(trans->c, btree_path_relock_fail, trans, _RET_IP_, path, level);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* upgrade */
|
||||
|
||||
bool bch2_btree_node_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
struct btree *b = path->l[level].b;
|
||||
|
||||
if (!is_btree_node(path, level))
|
||||
return false;
|
||||
|
||||
switch (btree_lock_want(path, level)) {
|
||||
case BTREE_NODE_UNLOCKED:
|
||||
BUG_ON(btree_node_locked(path, level));
|
||||
return true;
|
||||
case BTREE_NODE_READ_LOCKED:
|
||||
BUG_ON(btree_node_intent_locked(path, level));
|
||||
return bch2_btree_node_relock(trans, path, level);
|
||||
case BTREE_NODE_INTENT_LOCKED:
|
||||
break;
|
||||
case BTREE_NODE_WRITE_LOCKED:
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (btree_node_intent_locked(path, level))
|
||||
return true;
|
||||
|
||||
if (race_fault())
|
||||
return false;
|
||||
|
||||
if (btree_node_locked(path, level)
|
||||
? six_lock_tryupgrade(&b->c.lock)
|
||||
: six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
|
||||
goto success;
|
||||
|
||||
if (btree_node_lock_seq_matches(path, b, level) &&
|
||||
btree_node_lock_increment(trans, &b->c, level, BTREE_NODE_INTENT_LOCKED)) {
|
||||
btree_node_unlock(trans, path, level);
|
||||
goto success;
|
||||
}
|
||||
|
||||
trace_and_count(trans->c, btree_path_upgrade_fail, trans, _RET_IP_, path, level);
|
||||
return false;
|
||||
success:
|
||||
mark_btree_node_locked_noreset(path, level, SIX_LOCK_intent);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Btree path locking: */
|
||||
|
||||
/*
|
||||
* Only for btree_cache.c - only relocks intent locks
|
||||
*/
|
||||
int bch2_btree_path_relock_intent(struct btree_trans *trans,
|
||||
struct btree_path *path)
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
for (l = path->level;
|
||||
l < path->locks_want && btree_path_node(path, l);
|
||||
l++) {
|
||||
if (!bch2_btree_node_relock(trans, path, l)) {
|
||||
__bch2_btree_path_unlock(trans, path);
|
||||
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
|
||||
trace_and_count(trans->c, trans_restart_relock_path_intent, trans, _RET_IP_, path);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__flatten
|
||||
bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned long trace_ip)
|
||||
{
|
||||
return btree_path_get_locks(trans, path, false);
|
||||
}
|
||||
|
||||
__flatten
|
||||
bool bch2_btree_path_upgrade_norestart(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned long trace_ip)
|
||||
{
|
||||
return btree_path_get_locks(trans, path, true);
|
||||
}
|
||||
|
||||
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned new_locks_want)
|
||||
{
|
||||
EBUG_ON(path->locks_want >= new_locks_want);
|
||||
|
||||
path->locks_want = new_locks_want;
|
||||
|
||||
return btree_path_get_locks(trans, path, true);
|
||||
}
|
||||
|
||||
bool __bch2_btree_path_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned new_locks_want)
|
||||
{
|
||||
struct btree_path *linked;
|
||||
|
||||
if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* XXX: this is ugly - we'd prefer to not be mucking with other
|
||||
* iterators in the btree_trans here.
|
||||
*
|
||||
* On failure to upgrade the iterator, setting iter->locks_want and
|
||||
* calling get_locks() is sufficient to make bch2_btree_path_traverse()
|
||||
* get the locks we want on transaction restart.
|
||||
*
|
||||
* But if this iterator was a clone, on transaction restart what we did
|
||||
* to this iterator isn't going to be preserved.
|
||||
*
|
||||
* Possibly we could add an iterator field for the parent iterator when
|
||||
* an iterator is a copy - for now, we'll just upgrade any other
|
||||
* iterators with the same btree id.
|
||||
*
|
||||
* The code below used to be needed to ensure ancestor nodes get locked
|
||||
* before interior nodes - now that's handled by
|
||||
* bch2_btree_path_traverse_all().
|
||||
*/
|
||||
if (!path->cached && !trans->in_traverse_all)
|
||||
trans_for_each_path(trans, linked)
|
||||
if (linked != path &&
|
||||
linked->cached == path->cached &&
|
||||
linked->btree_id == path->btree_id &&
|
||||
linked->locks_want < new_locks_want) {
|
||||
linked->locks_want = new_locks_want;
|
||||
btree_path_get_locks(trans, linked, true);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void __bch2_btree_path_downgrade(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned new_locks_want)
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
EBUG_ON(path->locks_want < new_locks_want);
|
||||
|
||||
path->locks_want = new_locks_want;
|
||||
|
||||
while (path->nodes_locked &&
|
||||
(l = btree_path_highest_level_locked(path)) >= path->locks_want) {
|
||||
if (l > path->level) {
|
||||
btree_node_unlock(trans, path, l);
|
||||
} else {
|
||||
if (btree_node_intent_locked(path, l)) {
|
||||
six_lock_downgrade(&path->l[l].b->c.lock);
|
||||
mark_btree_node_locked_noreset(path, l, SIX_LOCK_read);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_btree_path_verify_locks(path);
|
||||
}
|
||||
|
||||
/* Btree transaction locking: */
|
||||
|
||||
void bch2_trans_downgrade(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
bch2_btree_path_downgrade(trans, path);
|
||||
}
|
||||
|
||||
int bch2_trans_relock(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
if (unlikely(trans->restarted))
|
||||
return - ((int) trans->restarted);
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
if (path->should_be_locked &&
|
||||
!bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) {
|
||||
trace_and_count(trans->c, trans_restart_relock, trans, _RET_IP_, path);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_trans_unlock(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
__bch2_btree_path_unlock(trans, path);
|
||||
|
||||
/*
|
||||
* bch2_gc_btree_init_recurse() doesn't use btree iterators for walking
|
||||
* btree nodes, it implements its own walking:
|
||||
*/
|
||||
BUG_ON(!trans->is_initial_gc &&
|
||||
lock_class_is_held(&bch2_btree_node_lock_key));
|
||||
}
|
||||
|
||||
/* Debug */
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
|
||||
void bch2_btree_path_verify_locks(struct btree_path *path)
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
if (!path->nodes_locked) {
|
||||
BUG_ON(path->uptodate == BTREE_ITER_UPTODATE &&
|
||||
btree_path_node(path, path->level));
|
||||
return;
|
||||
}
|
||||
|
||||
for (l = 0; l < BTREE_MAX_DEPTH; l++) {
|
||||
int want = btree_lock_want(path, l);
|
||||
int have = btree_node_locked_type(path, l);
|
||||
|
||||
BUG_ON(!is_btree_node(path, l) && have != BTREE_NODE_UNLOCKED);
|
||||
|
||||
BUG_ON(is_btree_node(path, l) &&
|
||||
(want == BTREE_NODE_UNLOCKED ||
|
||||
have != BTREE_NODE_WRITE_LOCKED) &&
|
||||
want != have);
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_trans_verify_locks(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
bch2_btree_path_verify_locks(path);
|
||||
}
|
||||
|
||||
#endif
|
@ -14,68 +14,71 @@
|
||||
|
||||
#include "btree_iter.h"
|
||||
|
||||
extern struct lock_class_key bch2_btree_node_lock_key;
|
||||
|
||||
static inline bool is_btree_node(struct btree_path *path, unsigned l)
|
||||
{
|
||||
return l < BTREE_MAX_DEPTH && !IS_ERR_OR_NULL(path->l[l].b);
|
||||
}
|
||||
|
||||
static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans)
|
||||
{
|
||||
return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats)
|
||||
? &trans->c->btree_transaction_stats[trans->fn_idx]
|
||||
: NULL;
|
||||
}
|
||||
|
||||
/* matches six lock types */
|
||||
enum btree_node_locked_type {
|
||||
BTREE_NODE_UNLOCKED = -1,
|
||||
BTREE_NODE_READ_LOCKED = SIX_LOCK_read,
|
||||
BTREE_NODE_INTENT_LOCKED = SIX_LOCK_intent,
|
||||
BTREE_NODE_WRITE_LOCKED = SIX_LOCK_write,
|
||||
};
|
||||
|
||||
static inline int btree_node_locked_type(struct btree_path *path,
|
||||
unsigned level)
|
||||
{
|
||||
/*
|
||||
* We're relying on the fact that if nodes_intent_locked is set
|
||||
* nodes_locked must be set as well, so that we can compute without
|
||||
* branches:
|
||||
*/
|
||||
return BTREE_NODE_UNLOCKED +
|
||||
((path->nodes_locked >> level) & 1) +
|
||||
((path->nodes_intent_locked >> level) & 1);
|
||||
return BTREE_NODE_UNLOCKED + ((path->nodes_locked >> (level << 1)) & 3);
|
||||
}
|
||||
|
||||
static inline bool btree_node_intent_locked(struct btree_path *path,
|
||||
unsigned level)
|
||||
static inline bool btree_node_write_locked(struct btree_path *path, unsigned l)
|
||||
{
|
||||
return btree_node_locked_type(path, level) == BTREE_NODE_INTENT_LOCKED;
|
||||
return btree_node_locked_type(path, l) == BTREE_NODE_WRITE_LOCKED;
|
||||
}
|
||||
|
||||
static inline bool btree_node_read_locked(struct btree_path *path,
|
||||
unsigned level)
|
||||
static inline bool btree_node_intent_locked(struct btree_path *path, unsigned l)
|
||||
{
|
||||
return btree_node_locked_type(path, level) == BTREE_NODE_READ_LOCKED;
|
||||
return btree_node_locked_type(path, l) == BTREE_NODE_INTENT_LOCKED;
|
||||
}
|
||||
|
||||
static inline bool btree_node_read_locked(struct btree_path *path, unsigned l)
|
||||
{
|
||||
return btree_node_locked_type(path, l) == BTREE_NODE_READ_LOCKED;
|
||||
}
|
||||
|
||||
static inline bool btree_node_locked(struct btree_path *path, unsigned level)
|
||||
{
|
||||
return path->nodes_locked & (1 << level);
|
||||
return btree_node_locked_type(path, level) != BTREE_NODE_UNLOCKED;
|
||||
}
|
||||
|
||||
static inline void mark_btree_node_unlocked(struct btree_path *path,
|
||||
unsigned level)
|
||||
{
|
||||
path->nodes_locked &= ~(1 << level);
|
||||
path->nodes_intent_locked &= ~(1 << level);
|
||||
}
|
||||
|
||||
static inline void mark_btree_node_locked_noreset(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned level,
|
||||
enum six_lock_type type)
|
||||
static inline void mark_btree_node_locked_noreset(struct btree_path *path,
|
||||
unsigned level,
|
||||
enum btree_node_locked_type type)
|
||||
{
|
||||
/* relying on this to avoid a branch */
|
||||
BUILD_BUG_ON(SIX_LOCK_read != 0);
|
||||
BUILD_BUG_ON(SIX_LOCK_intent != 1);
|
||||
|
||||
BUG_ON(trans->in_traverse_all && path->sorted_idx > trans->traverse_all_idx);
|
||||
path->nodes_locked &= ~(3U << (level << 1));
|
||||
path->nodes_locked |= (type + 1) << (level << 1);
|
||||
}
|
||||
|
||||
path->nodes_locked |= 1 << level;
|
||||
path->nodes_intent_locked |= type << level;
|
||||
static inline void mark_btree_node_unlocked(struct btree_path *path,
|
||||
unsigned level)
|
||||
{
|
||||
EBUG_ON(btree_node_write_locked(path, level));
|
||||
mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED);
|
||||
}
|
||||
|
||||
static inline void mark_btree_node_locked(struct btree_trans *trans,
|
||||
@ -83,19 +86,12 @@ static inline void mark_btree_node_locked(struct btree_trans *trans,
|
||||
unsigned level,
|
||||
enum six_lock_type type)
|
||||
{
|
||||
mark_btree_node_locked_noreset(trans, path, level, type);
|
||||
mark_btree_node_locked_noreset(path, level, type);
|
||||
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
|
||||
path->l[level].lock_taken_time = ktime_get_ns();
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mark_btree_node_intent_locked(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned level)
|
||||
{
|
||||
mark_btree_node_locked_noreset(trans, path, level, SIX_LOCK_intent);
|
||||
}
|
||||
|
||||
static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level)
|
||||
{
|
||||
return level < path->locks_want
|
||||
@ -115,13 +111,6 @@ btree_lock_want(struct btree_path *path, int level)
|
||||
return BTREE_NODE_UNLOCKED;
|
||||
}
|
||||
|
||||
static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans)
|
||||
{
|
||||
return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats)
|
||||
? &trans->c->btree_transaction_stats[trans->fn_idx]
|
||||
: NULL;
|
||||
}
|
||||
|
||||
static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
@ -135,6 +124,8 @@ static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
|
||||
#endif
|
||||
}
|
||||
|
||||
/* unlock: */
|
||||
|
||||
static inline void btree_node_unlock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
@ -149,122 +140,23 @@ static inline void btree_node_unlock(struct btree_trans *trans,
|
||||
mark_btree_node_unlocked(path, level);
|
||||
}
|
||||
|
||||
static inline int btree_path_lowest_level_locked(struct btree_path *path)
|
||||
{
|
||||
return __ffs(path->nodes_locked) >> 1;
|
||||
}
|
||||
|
||||
static inline int btree_path_highest_level_locked(struct btree_path *path)
|
||||
{
|
||||
return __fls(path->nodes_locked) >> 1;
|
||||
}
|
||||
|
||||
static inline void __bch2_btree_path_unlock(struct btree_trans *trans,
|
||||
struct btree_path *path)
|
||||
{
|
||||
btree_path_set_dirty(path, BTREE_ITER_NEED_RELOCK);
|
||||
|
||||
while (path->nodes_locked)
|
||||
btree_node_unlock(trans, path, __ffs(path->nodes_locked));
|
||||
}
|
||||
|
||||
static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case SIX_LOCK_read:
|
||||
return BCH_TIME_btree_lock_contended_read;
|
||||
case SIX_LOCK_intent:
|
||||
return BCH_TIME_btree_lock_contended_intent;
|
||||
case SIX_LOCK_write:
|
||||
return BCH_TIME_btree_lock_contended_write;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static inline int btree_node_lock_type(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree *b,
|
||||
struct bpos pos, unsigned level,
|
||||
enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
u64 start_time;
|
||||
int ret;
|
||||
|
||||
if (six_trylock_type(&b->c.lock, type))
|
||||
return 0;
|
||||
|
||||
start_time = local_clock();
|
||||
|
||||
trans->locking_path_idx = path->idx;
|
||||
trans->locking_pos = pos;
|
||||
trans->locking_btree_id = path->btree_id;
|
||||
trans->locking_level = level;
|
||||
trans->locking_lock_type = type;
|
||||
trans->locking = &b->c;
|
||||
ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p);
|
||||
trans->locking = NULL;
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock a btree node if we already have it locked on one of our linked
|
||||
* iterators:
|
||||
*/
|
||||
static inline bool btree_node_lock_increment(struct btree_trans *trans,
|
||||
struct btree *b, unsigned level,
|
||||
enum btree_node_locked_type want)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
if (path->l[level].b == b &&
|
||||
btree_node_locked_type(path, level) >= want) {
|
||||
six_lock_increment(&b->c.lock, want);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int __bch2_btree_node_lock(struct btree_trans *, struct btree_path *,
|
||||
struct btree *, struct bpos, unsigned,
|
||||
enum six_lock_type,
|
||||
six_lock_should_sleep_fn, void *,
|
||||
unsigned long);
|
||||
|
||||
static inline int btree_node_lock(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree *b, struct bpos pos, unsigned level,
|
||||
enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
|
||||
|
||||
if (likely(six_trylock_type(&b->c.lock, type)) ||
|
||||
btree_node_lock_increment(trans, b, level, type) ||
|
||||
!(ret = __bch2_btree_node_lock(trans, path, b, pos, level, type,
|
||||
should_sleep_fn, p, ip))) {
|
||||
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
|
||||
path->l[b->c.level].lock_taken_time = ktime_get_ns();
|
||||
#endif
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned);
|
||||
|
||||
static inline bool bch2_btree_node_relock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
EBUG_ON(btree_node_locked(path, level) &&
|
||||
btree_node_locked_type(path, level) !=
|
||||
__btree_lock_want(path, level));
|
||||
|
||||
return likely(btree_node_locked(path, level)) ||
|
||||
__bch2_btree_node_relock(trans, path, level);
|
||||
btree_node_unlock(trans, path, btree_path_lowest_level_locked(path));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -279,6 +171,9 @@ bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_pat
|
||||
|
||||
EBUG_ON(path->l[b->c.level].b != b);
|
||||
EBUG_ON(path->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq);
|
||||
EBUG_ON(btree_node_locked_type(path, b->c.level) != SIX_LOCK_write);
|
||||
|
||||
mark_btree_node_locked_noreset(path, b->c.level, SIX_LOCK_intent);
|
||||
|
||||
trans_for_each_path_with_node(trans, b, linked)
|
||||
linked->l[b->c.level].lock_seq += 2;
|
||||
@ -289,20 +184,181 @@ bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_pat
|
||||
void bch2_btree_node_unlock_write(struct btree_trans *,
|
||||
struct btree_path *, struct btree *);
|
||||
|
||||
void __bch2_btree_node_lock_write(struct btree_trans *, struct btree *);
|
||||
/* lock: */
|
||||
|
||||
static inline void bch2_btree_node_lock_write(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree *b)
|
||||
static inline int __must_check
|
||||
btree_node_lock_nopath(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b,
|
||||
enum six_lock_type type)
|
||||
{
|
||||
EBUG_ON(path->l[b->c.level].b != b);
|
||||
EBUG_ON(path->l[b->c.level].lock_seq != b->c.lock.state.seq);
|
||||
EBUG_ON(!btree_node_intent_locked(path, b->c.level));
|
||||
six_lock_type(&b->lock, type, NULL, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(!six_trylock_write(&b->c.lock)))
|
||||
static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b,
|
||||
enum six_lock_type type)
|
||||
{
|
||||
int ret = btree_node_lock_nopath(trans, b, type);
|
||||
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
static inline int btree_node_lock_type(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b,
|
||||
struct bpos pos, unsigned level,
|
||||
enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (six_trylock_type(&b->lock, type))
|
||||
return 0;
|
||||
|
||||
trans->locking_path_idx = path->idx;
|
||||
trans->locking_pos = pos;
|
||||
trans->locking_btree_id = path->btree_id;
|
||||
trans->locking_level = level;
|
||||
trans->locking_lock_type = type;
|
||||
trans->locking = b;
|
||||
ret = six_lock_type(&b->lock, type, should_sleep_fn, p);
|
||||
trans->locking = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock a btree node if we already have it locked on one of our linked
|
||||
* iterators:
|
||||
*/
|
||||
static inline bool btree_node_lock_increment(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b,
|
||||
unsigned level,
|
||||
enum btree_node_locked_type want)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
if (&path->l[level].b->c == b &&
|
||||
btree_node_locked_type(path, level) >= want) {
|
||||
six_lock_increment(&b->lock, want);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int __bch2_btree_node_lock(struct btree_trans *, struct btree_path *,
|
||||
struct btree_bkey_cached_common *,
|
||||
struct bpos, unsigned,
|
||||
enum six_lock_type,
|
||||
six_lock_should_sleep_fn, void *,
|
||||
unsigned long);
|
||||
|
||||
static inline int btree_node_lock(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b,
|
||||
struct bpos pos, unsigned level,
|
||||
enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
|
||||
|
||||
if (likely(six_trylock_type(&b->lock, type)) ||
|
||||
btree_node_lock_increment(trans, b, level, type) ||
|
||||
!(ret = __bch2_btree_node_lock(trans, path, b, pos, level, type,
|
||||
should_sleep_fn, p, ip))) {
|
||||
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
|
||||
path->l[b->level].lock_taken_time = ktime_get_ns();
|
||||
#endif
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __bch2_btree_node_lock_write(struct btree_trans *, struct btree_bkey_cached_common *);
|
||||
|
||||
static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b)
|
||||
{
|
||||
EBUG_ON(&path->l[b->level].b->c != b);
|
||||
EBUG_ON(path->l[b->level].lock_seq != b->lock.state.seq);
|
||||
EBUG_ON(!btree_node_intent_locked(path, b->level));
|
||||
|
||||
/*
|
||||
* six locks are unfair, and read locks block while a thread wants a
|
||||
* write lock: thus, we need to tell the cycle detector we have a write
|
||||
* lock _before_ taking the lock:
|
||||
*/
|
||||
mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_write);
|
||||
|
||||
if (unlikely(!six_trylock_write(&b->lock)))
|
||||
__bch2_btree_node_lock_write(trans, b);
|
||||
}
|
||||
|
||||
static inline int __must_check
|
||||
bch2_btree_node_lock_write(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b)
|
||||
{
|
||||
bch2_btree_node_lock_write_nofail(trans, path, b);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* relock: */
|
||||
|
||||
bool bch2_btree_path_relock_norestart(struct btree_trans *,
|
||||
struct btree_path *, unsigned long);
|
||||
bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned);
|
||||
|
||||
static inline bool bch2_btree_node_relock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
EBUG_ON(btree_node_locked(path, level) &&
|
||||
!btree_node_write_locked(path, level) &&
|
||||
btree_node_locked_type(path, level) != __btree_lock_want(path, level));
|
||||
|
||||
return likely(btree_node_locked(path, level)) ||
|
||||
(!IS_ERR_OR_NULL(path->l[level].b) &&
|
||||
__bch2_btree_node_relock(trans, path, level));
|
||||
}
|
||||
|
||||
static inline int bch2_btree_path_relock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned long trace_ip)
|
||||
{
|
||||
if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
|
||||
trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* upgrade */
|
||||
|
||||
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
|
||||
struct btree_path *, unsigned);
|
||||
bool __bch2_btree_path_upgrade(struct btree_trans *,
|
||||
struct btree_path *, unsigned);
|
||||
|
||||
static inline bool bch2_btree_path_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned new_locks_want)
|
||||
{
|
||||
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
|
||||
|
||||
return path->locks_want < new_locks_want
|
||||
? __bch2_btree_path_upgrade(trans, path, new_locks_want)
|
||||
: path->uptodate == BTREE_ITER_UPTODATE;
|
||||
}
|
||||
|
||||
/* misc: */
|
||||
|
||||
static inline void btree_path_set_should_be_locked(struct btree_path *path)
|
||||
{
|
||||
EBUG_ON(!btree_node_locked(path, path->level));
|
||||
@ -326,7 +382,20 @@ static inline void btree_path_set_level_up(struct btree_trans *trans,
|
||||
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
|
||||
}
|
||||
|
||||
/* debug */
|
||||
|
||||
struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *,
|
||||
struct btree_path *, struct btree *, unsigned);
|
||||
struct btree_path *,
|
||||
struct btree_bkey_cached_common *b,
|
||||
unsigned);
|
||||
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void bch2_btree_path_verify_locks(struct btree_path *);
|
||||
void bch2_trans_verify_locks(struct btree_trans *);
|
||||
#else
|
||||
static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
|
||||
static inline void bch2_trans_verify_locks(struct btree_trans *trans) {}
|
||||
#endif
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_LOCKING_H */
|
||||
|
@ -63,6 +63,7 @@ struct btree_bkey_cached_common {
|
||||
struct six_lock lock;
|
||||
u8 level;
|
||||
u8 btree_id;
|
||||
bool cached;
|
||||
};
|
||||
|
||||
struct btree {
|
||||
@ -232,9 +233,8 @@ struct btree_path {
|
||||
*/
|
||||
bool should_be_locked:1;
|
||||
unsigned level:3,
|
||||
locks_want:4,
|
||||
nodes_locked:4,
|
||||
nodes_intent_locked:4;
|
||||
locks_want:4;
|
||||
u8 nodes_locked;
|
||||
|
||||
struct btree_path_level {
|
||||
struct btree *b;
|
||||
@ -302,7 +302,8 @@ struct btree_key_cache {
|
||||
struct mutex lock;
|
||||
struct rhashtable table;
|
||||
bool table_init_done;
|
||||
struct list_head freed;
|
||||
struct list_head freed_pcpu;
|
||||
struct list_head freed_nonpcpu;
|
||||
struct shrinker shrink;
|
||||
unsigned shrink_iter;
|
||||
struct btree_key_cache_freelist __percpu *pcpu_freed;
|
||||
@ -338,6 +339,13 @@ struct bkey_cached {
|
||||
struct bkey_i *k;
|
||||
};
|
||||
|
||||
static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
|
||||
{
|
||||
return !b->cached
|
||||
? container_of(b, struct btree, c)->key.k.p
|
||||
: container_of(b, struct bkey_cached, c)->key.pos;
|
||||
}
|
||||
|
||||
struct btree_insert_entry {
|
||||
unsigned flags;
|
||||
u8 bkey_type;
|
||||
@ -413,6 +421,7 @@ struct btree_trans {
|
||||
u64 paths_allocated;
|
||||
|
||||
unsigned mem_top;
|
||||
unsigned mem_max;
|
||||
unsigned mem_bytes;
|
||||
void *mem;
|
||||
|
||||
|
@ -143,7 +143,7 @@ bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
|
||||
|
||||
static void __btree_node_free(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
trace_btree_node_free(c, b);
|
||||
trace_and_count(c, btree_node_free, c, b);
|
||||
|
||||
BUG_ON(btree_node_dirty(b));
|
||||
BUG_ON(btree_node_need_write(b));
|
||||
@ -160,22 +160,23 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b)
|
||||
}
|
||||
|
||||
static void bch2_btree_node_free_inmem(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree *b)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_path *path;
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
BUG_ON(path->l[b->c.level].b == b &&
|
||||
path->l[b->c.level].lock_seq == b->c.lock.state.seq);
|
||||
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
unsigned level = b->c.level;
|
||||
|
||||
bch2_btree_node_lock_write_nofail(trans, path, &b->c);
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
__btree_node_free(c, b);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
mark_btree_node_locked_noreset(path, level, SIX_LOCK_intent);
|
||||
|
||||
trans_for_each_path(trans, path)
|
||||
if (path->l[level].b == b) {
|
||||
btree_node_unlock(trans, path, level);
|
||||
path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
|
||||
}
|
||||
}
|
||||
|
||||
static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
|
||||
@ -258,7 +259,9 @@ mem_alloc:
|
||||
return b;
|
||||
}
|
||||
|
||||
static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned level)
|
||||
static struct btree *bch2_btree_node_alloc(struct btree_update *as,
|
||||
struct btree_trans *trans,
|
||||
unsigned level)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
struct btree *b;
|
||||
@ -270,8 +273,8 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
||||
|
||||
b = p->b[--p->nr];
|
||||
|
||||
six_lock_intent(&b->c.lock, NULL, NULL);
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
|
||||
|
||||
set_btree_node_accessed(b);
|
||||
set_btree_node_dirty_acct(c, b);
|
||||
@ -304,7 +307,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
||||
ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id);
|
||||
BUG_ON(ret);
|
||||
|
||||
trace_btree_node_alloc(c, b);
|
||||
trace_and_count(c, btree_node_alloc, c, b);
|
||||
return b;
|
||||
}
|
||||
|
||||
@ -322,12 +325,13 @@ static void btree_set_max(struct btree *b, struct bpos pos)
|
||||
}
|
||||
|
||||
struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
|
||||
struct btree_trans *trans,
|
||||
struct btree *b,
|
||||
struct bkey_format format)
|
||||
{
|
||||
struct btree *n;
|
||||
|
||||
n = bch2_btree_node_alloc(as, b->c.level);
|
||||
n = bch2_btree_node_alloc(as, trans, b->c.level);
|
||||
|
||||
SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
|
||||
|
||||
@ -346,6 +350,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
|
||||
}
|
||||
|
||||
static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
|
||||
struct btree_trans *trans,
|
||||
struct btree *b)
|
||||
{
|
||||
struct bkey_format new_f = bch2_btree_calc_format(b);
|
||||
@ -357,12 +362,13 @@ static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
|
||||
if (!bch2_btree_node_format_fits(as->c, b, &new_f))
|
||||
new_f = b->format;
|
||||
|
||||
return __bch2_btree_node_alloc_replacement(as, b, new_f);
|
||||
return __bch2_btree_node_alloc_replacement(as, trans, b, new_f);
|
||||
}
|
||||
|
||||
static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
|
||||
static struct btree *__btree_root_alloc(struct btree_update *as,
|
||||
struct btree_trans *trans, unsigned level)
|
||||
{
|
||||
struct btree *b = bch2_btree_node_alloc(as, level);
|
||||
struct btree *b = bch2_btree_node_alloc(as, trans, level);
|
||||
|
||||
btree_set_min(b, POS_MIN);
|
||||
btree_set_max(b, SPOS_MAX);
|
||||
@ -377,7 +383,7 @@ static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
|
||||
return b;
|
||||
}
|
||||
|
||||
static void bch2_btree_reserve_put(struct btree_update *as)
|
||||
static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
struct prealloc_nodes *p;
|
||||
@ -404,8 +410,8 @@ static void bch2_btree_reserve_put(struct btree_update *as)
|
||||
|
||||
mutex_unlock(&c->btree_reserve_cache_lock);
|
||||
|
||||
six_lock_intent(&b->c.lock, NULL, NULL);
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
|
||||
__btree_node_free(c, b);
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
@ -459,7 +465,7 @@ err:
|
||||
|
||||
/* Asynchronous interior node update machinery */
|
||||
|
||||
static void bch2_btree_update_free(struct btree_update *as)
|
||||
static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
|
||||
@ -472,7 +478,7 @@ static void bch2_btree_update_free(struct btree_update *as)
|
||||
bch2_journal_pin_drop(&c->journal, &as->journal);
|
||||
bch2_journal_pin_flush(&c->journal, &as->journal);
|
||||
bch2_disk_reservation_put(c, &as->disk_res);
|
||||
bch2_btree_reserve_put(as);
|
||||
bch2_btree_reserve_put(as, trans);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total],
|
||||
as->start_time);
|
||||
@ -550,12 +556,13 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
|
||||
static void btree_update_nodes_written(struct btree_update *as)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
struct btree *b = as->b;
|
||||
struct btree *b;
|
||||
struct btree_trans trans;
|
||||
u64 journal_seq = 0;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 512);
|
||||
/*
|
||||
* If we're already in an error state, it might be because a btree node
|
||||
* was never written, and we might be trying to free that same btree
|
||||
@ -572,15 +579,16 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
* on disk:
|
||||
*/
|
||||
for (i = 0; i < as->nr_old_nodes; i++) {
|
||||
struct btree *old = as->old_nodes[i];
|
||||
__le64 seq;
|
||||
|
||||
six_lock_read(&old->c.lock, NULL, NULL);
|
||||
seq = old->data ? old->data->keys.seq : 0;
|
||||
six_unlock_read(&old->c.lock);
|
||||
b = as->old_nodes[i];
|
||||
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
|
||||
seq = b->data ? b->data->keys.seq : 0;
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
if (seq == as->old_nodes_seq[i])
|
||||
wait_on_bit_io(&old->flags, BTREE_NODE_write_in_flight_inner,
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
@ -597,19 +605,19 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
* journal reclaim does btree updates when flushing bkey_cached entries,
|
||||
* which may require allocations as well.
|
||||
*/
|
||||
bch2_trans_init(&trans, c, 0, 512);
|
||||
ret = commit_do(&trans, &as->disk_res, &journal_seq,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_JOURNAL_RECLAIM|
|
||||
JOURNAL_WATERMARK_reserved,
|
||||
btree_update_nodes_written_trans(&trans, as));
|
||||
bch2_trans_exit(&trans);
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_JOURNAL_RECLAIM|
|
||||
JOURNAL_WATERMARK_reserved,
|
||||
btree_update_nodes_written_trans(&trans, as));
|
||||
bch2_trans_unlock(&trans);
|
||||
|
||||
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
|
||||
"error %i in btree_update_nodes_written()", ret);
|
||||
err:
|
||||
if (b) {
|
||||
if (as->b) {
|
||||
b = as->b;
|
||||
/*
|
||||
* @b is the node we did the final insert into:
|
||||
*
|
||||
@ -622,8 +630,8 @@ err:
|
||||
* we're in journal error state:
|
||||
*/
|
||||
|
||||
six_lock_intent(&b->c.lock, NULL, NULL);
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent);
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_write);
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
list_del(&as->write_blocked_list);
|
||||
@ -680,7 +688,7 @@ err:
|
||||
for (i = 0; i < as->nr_new_nodes; i++) {
|
||||
b = as->new_nodes[i];
|
||||
|
||||
six_lock_read(&b->c.lock, NULL, NULL);
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
|
||||
btree_node_write_if_need(c, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->c.lock);
|
||||
}
|
||||
@ -688,7 +696,8 @@ err:
|
||||
for (i = 0; i < as->nr_open_buckets; i++)
|
||||
bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
|
||||
|
||||
bch2_btree_update_free(as);
|
||||
bch2_btree_update_free(as, &trans);
|
||||
bch2_trans_exit(&trans);
|
||||
}
|
||||
|
||||
static void btree_interior_update_work(struct work_struct *work)
|
||||
@ -935,7 +944,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
||||
as->nr_old_nodes++;
|
||||
}
|
||||
|
||||
static void bch2_btree_update_done(struct btree_update *as)
|
||||
static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
u64 start_time = as->start_time;
|
||||
@ -946,7 +955,7 @@ static void bch2_btree_update_done(struct btree_update *as)
|
||||
up_read(&as->c->gc_lock);
|
||||
as->took_gc_lock = false;
|
||||
|
||||
bch2_btree_reserve_put(as);
|
||||
bch2_btree_reserve_put(as, trans);
|
||||
|
||||
continue_at(&as->cl, btree_update_set_nodes_written,
|
||||
as->c->btree_interior_update_worker);
|
||||
@ -994,7 +1003,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
nr_nodes[1] += 1;
|
||||
|
||||
if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) {
|
||||
trace_trans_restart_iter_upgrade(trans, _RET_IP_, path);
|
||||
trace_and_count(c, trans_restart_iter_upgrade, trans, _RET_IP_, path);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@ -1048,11 +1057,16 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
if (ret) {
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
|
||||
BTREE_UPDATE_JOURNAL_RES,
|
||||
journal_flags);
|
||||
if (ret) {
|
||||
trace_trans_restart_journal_preres_get(trans, _RET_IP_);
|
||||
trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
|
||||
goto err;
|
||||
}
|
||||
@ -1085,8 +1099,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
trace_btree_reserve_get_fail(trans->fn, _RET_IP_,
|
||||
nr_nodes[0] + nr_nodes[1]);
|
||||
trace_and_count(c, btree_reserve_get_fail, trans->fn, _RET_IP_, nr_nodes[0] + nr_nodes[1]);
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1097,7 +1110,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
bch2_trans_verify_not_restarted(trans, restart_count);
|
||||
return as;
|
||||
err:
|
||||
bch2_btree_update_free(as);
|
||||
bch2_btree_update_free(as, trans);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
@ -1141,7 +1154,7 @@ static void bch2_btree_set_root(struct btree_update *as,
|
||||
struct bch_fs *c = as->c;
|
||||
struct btree *old;
|
||||
|
||||
trace_btree_set_root(c, b);
|
||||
trace_and_count(c, btree_node_set_root, c, b);
|
||||
BUG_ON(!b->written);
|
||||
|
||||
old = btree_node_root(c, b);
|
||||
@ -1150,7 +1163,7 @@ static void bch2_btree_set_root(struct btree_update *as,
|
||||
* Ensure no one is using the old root while we switch to the
|
||||
* new root:
|
||||
*/
|
||||
bch2_btree_node_lock_write(trans, path, old);
|
||||
bch2_btree_node_lock_write_nofail(trans, path, &old->c);
|
||||
|
||||
bch2_btree_set_root_inmem(c, b);
|
||||
|
||||
@ -1249,6 +1262,7 @@ __bch2_btree_insert_keys_interior(struct btree_update *as,
|
||||
* node)
|
||||
*/
|
||||
static struct btree *__btree_split_node(struct btree_update *as,
|
||||
struct btree_trans *trans,
|
||||
struct btree *n1)
|
||||
{
|
||||
struct bkey_format_state s;
|
||||
@ -1258,7 +1272,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
|
||||
struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL;
|
||||
struct bpos n1_pos;
|
||||
|
||||
n2 = bch2_btree_node_alloc(as, n1->c.level);
|
||||
n2 = bch2_btree_node_alloc(as, trans, n1->c.level);
|
||||
|
||||
n2->data->max_key = n1->data->max_key;
|
||||
n2->data->format = n1->format;
|
||||
@ -1422,15 +1436,15 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
|
||||
n1 = bch2_btree_node_alloc_replacement(as, b);
|
||||
n1 = bch2_btree_node_alloc_replacement(as, trans, b);
|
||||
|
||||
if (keys)
|
||||
btree_split_insert_keys(as, trans, path, n1, keys);
|
||||
|
||||
if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) {
|
||||
trace_btree_split(c, b);
|
||||
trace_and_count(c, btree_node_split, c, b);
|
||||
|
||||
n2 = __btree_split_node(as, n1);
|
||||
n2 = __btree_split_node(as, trans, n1);
|
||||
|
||||
bch2_btree_build_aux_trees(n2);
|
||||
bch2_btree_build_aux_trees(n1);
|
||||
@ -1452,7 +1466,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
|
||||
if (!parent) {
|
||||
/* Depth increases, make a new root */
|
||||
n3 = __btree_root_alloc(as, b->c.level + 1);
|
||||
n3 = __btree_root_alloc(as, trans, b->c.level + 1);
|
||||
|
||||
n3->sib_u64s[0] = U16_MAX;
|
||||
n3->sib_u64s[1] = U16_MAX;
|
||||
@ -1462,7 +1476,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
|
||||
}
|
||||
} else {
|
||||
trace_btree_compact(c, b);
|
||||
trace_and_count(c, btree_node_compact, c, b);
|
||||
|
||||
bch2_btree_build_aux_trees(n1);
|
||||
six_unlock_write(&n1->c.lock);
|
||||
@ -1493,22 +1507,19 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
if (n3)
|
||||
bch2_btree_update_get_open_buckets(as, n3);
|
||||
|
||||
/* Successful split, update the path to point to the new nodes: */
|
||||
|
||||
six_lock_increment(&b->c.lock, SIX_LOCK_intent);
|
||||
if (n3)
|
||||
bch2_trans_node_add(trans, n3);
|
||||
if (n2)
|
||||
bch2_trans_node_add(trans, n2);
|
||||
bch2_trans_node_add(trans, n1);
|
||||
|
||||
/*
|
||||
* The old node must be freed (in memory) _before_ unlocking the new
|
||||
* nodes - else another thread could re-acquire a read lock on the old
|
||||
* node after another thread has locked and updated the new node, thus
|
||||
* seeing stale data:
|
||||
*/
|
||||
bch2_btree_node_free_inmem(trans, b);
|
||||
bch2_btree_node_free_inmem(trans, path, b);
|
||||
|
||||
if (n3)
|
||||
bch2_trans_node_add(trans, n3);
|
||||
if (n2)
|
||||
bch2_trans_node_add(trans, n2);
|
||||
bch2_trans_node_add(trans, n1);
|
||||
|
||||
if (n3)
|
||||
six_unlock_intent(&n3->c.lock);
|
||||
@ -1617,7 +1628,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
|
||||
return PTR_ERR(as);
|
||||
|
||||
btree_split(as, trans, path, b, NULL, flags);
|
||||
bch2_btree_update_done(as);
|
||||
bch2_btree_update_done(as, trans);
|
||||
|
||||
for (l = path->level + 1; btree_path_node(path, l) && !ret; l++)
|
||||
ret = bch2_foreground_maybe_merge(trans, path, l, flags);
|
||||
@ -1731,12 +1742,12 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
trace_btree_merge(c, b);
|
||||
trace_and_count(c, btree_node_merge, c, b);
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
bch2_btree_interior_update_will_free_node(as, m);
|
||||
|
||||
n = bch2_btree_node_alloc(as, b->c.level);
|
||||
n = bch2_btree_node_alloc(as, trans, b->c.level);
|
||||
|
||||
SET_BTREE_NODE_SEQ(n->data,
|
||||
max(BTREE_NODE_SEQ(b->data),
|
||||
@ -1771,19 +1782,16 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
|
||||
bch2_btree_update_get_open_buckets(as, n);
|
||||
|
||||
six_lock_increment(&b->c.lock, SIX_LOCK_intent);
|
||||
six_lock_increment(&m->c.lock, SIX_LOCK_intent);
|
||||
bch2_btree_node_free_inmem(trans, path, b);
|
||||
bch2_btree_node_free_inmem(trans, sib_path, m);
|
||||
|
||||
bch2_trans_node_add(trans, n);
|
||||
|
||||
bch2_trans_verify_paths(trans);
|
||||
|
||||
bch2_btree_node_free_inmem(trans, b);
|
||||
bch2_btree_node_free_inmem(trans, m);
|
||||
|
||||
six_unlock_intent(&n->c.lock);
|
||||
|
||||
bch2_btree_update_done(as);
|
||||
bch2_btree_update_done(as, trans);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time);
|
||||
out:
|
||||
@ -1817,13 +1825,13 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
|
||||
n = bch2_btree_node_alloc_replacement(as, b);
|
||||
n = bch2_btree_node_alloc_replacement(as, trans, b);
|
||||
bch2_btree_update_add_new_node(as, n);
|
||||
|
||||
bch2_btree_build_aux_trees(n);
|
||||
six_unlock_write(&n->c.lock);
|
||||
|
||||
trace_btree_rewrite(c, b);
|
||||
trace_and_count(c, btree_node_rewrite, c, b);
|
||||
|
||||
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
|
||||
|
||||
@ -1837,12 +1845,12 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
|
||||
bch2_btree_update_get_open_buckets(as, n);
|
||||
|
||||
six_lock_increment(&b->c.lock, SIX_LOCK_intent);
|
||||
bch2_btree_node_free_inmem(trans, iter->path, b);
|
||||
|
||||
bch2_trans_node_add(trans, n);
|
||||
bch2_btree_node_free_inmem(trans, b);
|
||||
six_unlock_intent(&n->c.lock);
|
||||
|
||||
bch2_btree_update_done(as);
|
||||
bch2_btree_update_done(as, trans);
|
||||
out:
|
||||
bch2_btree_path_downgrade(trans, iter->path);
|
||||
return ret;
|
||||
@ -1989,7 +1997,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_btree_node_lock_write(trans, iter->path, b);
|
||||
bch2_btree_node_lock_write_nofail(trans, iter->path, &b->c);
|
||||
|
||||
if (new_hash) {
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
|
@ -117,6 +117,7 @@ struct btree_update {
|
||||
};
|
||||
|
||||
struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
|
||||
struct btree_trans *,
|
||||
struct btree *,
|
||||
struct bkey_format);
|
||||
|
||||
|
@ -81,7 +81,7 @@ void bch2_btree_node_lock_for_insert(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree *b)
|
||||
{
|
||||
bch2_btree_node_lock_write(trans, path, b);
|
||||
bch2_btree_node_lock_write_nofail(trans, path, &b->c);
|
||||
bch2_btree_node_prep_for_write(trans, path, b);
|
||||
}
|
||||
|
||||
@ -169,10 +169,13 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct btree_write *w = container_of(pin, struct btree_write, journal);
|
||||
struct btree *b = container_of(w, struct btree, writes[i]);
|
||||
struct btree_trans trans;
|
||||
unsigned long old, new, v;
|
||||
unsigned idx = w - b->writes;
|
||||
|
||||
six_lock_read(&b->c.lock, NULL, NULL);
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
|
||||
v = READ_ONCE(b->flags);
|
||||
|
||||
do {
|
||||
@ -188,6 +191,8 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
|
||||
btree_node_write_if_need(c, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -285,7 +290,7 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s,
|
||||
|
||||
ret = bch2_trans_relock(trans);
|
||||
if (ret) {
|
||||
trace_trans_restart_journal_preres_get(trans, trace_ip);
|
||||
trace_and_count(c, trans_restart_journal_preres_get, trans, trace_ip, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -375,7 +380,7 @@ btree_key_can_insert_cached(struct btree_trans *trans,
|
||||
* Keys returned by peek() are no longer valid pointers, so we need a
|
||||
* transaction restart:
|
||||
*/
|
||||
trace_trans_restart_key_cache_key_realloced(trans, _RET_IP_, path, old_u64s, new_u64s);
|
||||
trace_and_count(c, trans_restart_key_cache_key_realloced, trans, _RET_IP_, path, old_u64s, new_u64s);
|
||||
return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_key_cache_realloced);
|
||||
}
|
||||
|
||||
@ -567,7 +572,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
int ret;
|
||||
|
||||
if (race_fault()) {
|
||||
trace_trans_restart_fault_inject(trans, trace_ip);
|
||||
trace_and_count(c, trans_restart_fault_inject, trans, trace_ip);
|
||||
return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject);
|
||||
}
|
||||
|
||||
@ -741,11 +746,12 @@ static inline void path_upgrade_readers(struct btree_trans *trans, struct btree_
|
||||
static inline void upgrade_readers(struct btree_trans *trans, struct btree_path *path)
|
||||
{
|
||||
struct btree *b = path_l(path)->b;
|
||||
unsigned l;
|
||||
|
||||
do {
|
||||
if (path->nodes_locked &&
|
||||
path->nodes_locked != path->nodes_intent_locked)
|
||||
path_upgrade_readers(trans, path);
|
||||
for (l = 0; l < BTREE_MAX_DEPTH; l++)
|
||||
if (btree_node_read_locked(path, l))
|
||||
path_upgrade_readers(trans, path);
|
||||
} while ((path = prev_btree_path(trans, path)) &&
|
||||
path_l(path)->b == b);
|
||||
}
|
||||
@ -764,11 +770,13 @@ static inline void normalize_read_intent_locks(struct btree_trans *trans)
|
||||
? trans->paths + trans->sorted[i + 1]
|
||||
: NULL;
|
||||
|
||||
if (path->nodes_locked) {
|
||||
if (path->nodes_intent_locked)
|
||||
nr_intent++;
|
||||
else
|
||||
nr_read++;
|
||||
switch (btree_node_locked_type(path, path->level)) {
|
||||
case BTREE_NODE_READ_LOCKED:
|
||||
nr_read++;
|
||||
break;
|
||||
case BTREE_NODE_INTENT_LOCKED:
|
||||
nr_intent++;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!next || path_l(path)->b != path_l(next)->b) {
|
||||
@ -791,7 +799,7 @@ static inline bool have_conflicting_read_lock(struct btree_trans *trans, struct
|
||||
//if (path == pos)
|
||||
// break;
|
||||
|
||||
if (path->nodes_locked != path->nodes_intent_locked &&
|
||||
if (btree_node_read_locked(path, path->level) &&
|
||||
!bch2_btree_path_upgrade(trans, path, path->level + 1))
|
||||
return true;
|
||||
}
|
||||
@ -808,12 +816,19 @@ static inline int trans_lock_write(struct btree_trans *trans)
|
||||
if (same_leaf_as_prev(trans, i))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* six locks are unfair, and read locks block while a thread
|
||||
* wants a write lock: thus, we need to tell the cycle detector
|
||||
* we have a write lock _before_ taking the lock:
|
||||
*/
|
||||
mark_btree_node_locked_noreset(i->path, i->level, SIX_LOCK_write);
|
||||
|
||||
if (!six_trylock_write(&insert_l(i)->b->c.lock)) {
|
||||
if (have_conflicting_read_lock(trans, i->path))
|
||||
goto fail;
|
||||
|
||||
ret = btree_node_lock_type(trans, i->path,
|
||||
insert_l(i)->b,
|
||||
&insert_l(i)->b->c,
|
||||
i->path->pos, i->level,
|
||||
SIX_LOCK_write, NULL, NULL);
|
||||
BUG_ON(ret);
|
||||
@ -824,6 +839,8 @@ static inline int trans_lock_write(struct btree_trans *trans)
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
mark_btree_node_locked_noreset(i->path, i->level, SIX_LOCK_intent);
|
||||
|
||||
while (--i >= trans->updates) {
|
||||
if (same_leaf_as_prev(trans, i))
|
||||
continue;
|
||||
@ -831,7 +848,7 @@ fail:
|
||||
bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b);
|
||||
}
|
||||
|
||||
trace_trans_restart_would_deadlock_write(trans);
|
||||
trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
|
||||
}
|
||||
|
||||
@ -964,7 +981,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
case BTREE_INSERT_BTREE_NODE_FULL:
|
||||
ret = bch2_btree_split_leaf(trans, i->path, trans->flags);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
trace_trans_restart_btree_node_split(trans, trace_ip, i->path);
|
||||
trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, i->path);
|
||||
break;
|
||||
case BTREE_INSERT_NEED_MARK_REPLICAS:
|
||||
bch2_trans_unlock(trans);
|
||||
@ -975,7 +992,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
|
||||
ret = bch2_trans_relock(trans);
|
||||
if (ret)
|
||||
trace_trans_restart_mark_replicas(trans, trace_ip);
|
||||
trace_and_count(c, trans_restart_mark_replicas, trans, trace_ip);
|
||||
break;
|
||||
case BTREE_INSERT_NEED_JOURNAL_RES:
|
||||
bch2_trans_unlock(trans);
|
||||
@ -992,12 +1009,12 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
|
||||
ret = bch2_trans_relock(trans);
|
||||
if (ret)
|
||||
trace_trans_restart_journal_res_get(trans, trace_ip);
|
||||
trace_and_count(c, trans_restart_journal_res_get, trans, trace_ip);
|
||||
break;
|
||||
case BTREE_INSERT_NEED_JOURNAL_RECLAIM:
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
trace_trans_blocked_journal_reclaim(trans, trace_ip);
|
||||
trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip);
|
||||
|
||||
wait_event_freezable(c->journal.reclaim_wait,
|
||||
(ret = journal_reclaim_wait_done(c)));
|
||||
@ -1006,7 +1023,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
|
||||
ret = bch2_trans_relock(trans);
|
||||
if (ret)
|
||||
trace_trans_restart_journal_reclaim(trans, trace_ip);
|
||||
trace_and_count(c, trans_restart_journal_reclaim, trans, trace_ip);
|
||||
break;
|
||||
default:
|
||||
BUG_ON(ret >= 0);
|
||||
@ -1107,7 +1124,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
|
||||
BUG_ON(!i->path->should_be_locked);
|
||||
|
||||
if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) {
|
||||
trace_trans_restart_upgrade(trans, _RET_IP_, i->path);
|
||||
trace_and_count(c, trans_restart_upgrade, trans, _RET_IP_, i->path);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
|
||||
goto out;
|
||||
}
|
||||
@ -1148,7 +1165,7 @@ retry:
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
trace_transaction_commit(trans, _RET_IP_);
|
||||
trace_and_count(c, transaction_commit, trans, _RET_IP_);
|
||||
out:
|
||||
bch2_journal_preres_put(&c->journal, &trans->journal_preres);
|
||||
|
||||
@ -1617,7 +1634,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
|
||||
ck = (void *) iter->key_cache_path->l[0].b;
|
||||
|
||||
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||
trace_trans_restart_key_cache_raced(trans, _RET_IP_);
|
||||
trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
|
||||
}
|
||||
|
||||
|
@ -231,9 +231,12 @@ static int bch2_data_update_index_update(struct bch_write_op *op)
|
||||
m->data_opts.btree_insert_flags);
|
||||
if (!ret) {
|
||||
bch2_btree_iter_set_pos(&iter, next_pos);
|
||||
atomic_long_inc(&c->extent_migrate_done);
|
||||
|
||||
if (ec_ob)
|
||||
bch2_ob_add_backpointer(c, ec_ob, &insert->k);
|
||||
|
||||
this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size);
|
||||
trace_move_extent_finish(&new->k);
|
||||
}
|
||||
err:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
@ -248,22 +251,16 @@ next:
|
||||
}
|
||||
continue;
|
||||
nomatch:
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, old);
|
||||
bch_info(c, "no match for %s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
if (m->ctxt) {
|
||||
BUG_ON(k.k->p.offset <= iter.pos.offset);
|
||||
atomic64_inc(&m->ctxt->stats->keys_raced);
|
||||
atomic64_add(k.k->p.offset - iter.pos.offset,
|
||||
&m->ctxt->stats->sectors_raced);
|
||||
}
|
||||
atomic_long_inc(&c->extent_migrate_raced);
|
||||
trace_move_race(&new->k);
|
||||
|
||||
this_cpu_add(c->counters[BCH_COUNTER_move_extent_race], new->k.size);
|
||||
trace_move_extent_race(&new->k);
|
||||
|
||||
bch2_btree_iter_advance(&iter);
|
||||
goto next;
|
||||
}
|
||||
|
@ -666,6 +666,9 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
|
||||
|
||||
mutex_lock(&s->lock);
|
||||
|
||||
prt_printf(&i->buf, "Max mem used: %u", s->max_mem);
|
||||
prt_newline(&i->buf);
|
||||
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
|
||||
prt_printf(&i->buf, "Lock hold times:");
|
||||
prt_newline(&i->buf);
|
||||
|
@ -384,32 +384,34 @@ inval:
|
||||
prt_printf(out, "invalid label %u", v);
|
||||
}
|
||||
|
||||
int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
|
||||
int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
|
||||
{
|
||||
struct bch_member *mi;
|
||||
int v = -1;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
int ret, v = -1;
|
||||
|
||||
if (!strlen(name) || !strcmp(name, "none"))
|
||||
goto write_sb;
|
||||
return 0;
|
||||
|
||||
v = bch2_disk_path_find_or_create(&c->disk_sb, name);
|
||||
if (v < 0) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
if (v < 0)
|
||||
return v;
|
||||
}
|
||||
|
||||
ret = bch2_sb_disk_groups_to_cpu(c);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
write_sb:
|
||||
return ret;
|
||||
|
||||
mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
|
||||
SET_BCH_MEMBER_GROUP(mi, v + 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
bch2_write_super(c);
|
||||
unlock:
|
||||
int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
ret = __bch2_dev_group_set(c, ca, name) ?:
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return ret;
|
||||
|
@ -82,6 +82,7 @@ void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *
|
||||
|
||||
int bch2_sb_disk_groups_to_cpu(struct bch_fs *);
|
||||
|
||||
int __bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
|
||||
int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
|
||||
|
||||
const char *bch2_sb_validate_disk_groups(struct bch_sb *,
|
||||
|
@ -772,9 +772,6 @@ static int hash_redo_key(struct btree_trans *trans,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct btree_iter *k_iter, struct bkey_s_c k)
|
||||
{
|
||||
bch_err(trans->c, "hash_redo_key() not implemented yet");
|
||||
return -EINVAL;
|
||||
#if 0
|
||||
struct bkey_i *delete;
|
||||
struct bkey_i *tmp;
|
||||
|
||||
@ -792,8 +789,14 @@ static int hash_redo_key(struct btree_trans *trans,
|
||||
delete->k.p = k_iter->pos;
|
||||
return bch2_btree_iter_traverse(k_iter) ?:
|
||||
bch2_trans_update(trans, k_iter, delete, 0) ?:
|
||||
bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0);
|
||||
#endif
|
||||
bch2_hash_set_snapshot(trans, desc, hash_info,
|
||||
(subvol_inum) { 0, k.k->p.inode },
|
||||
k.k->p.snapshot, tmp,
|
||||
BCH_HASH_SET_MUST_CREATE,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW);
|
||||
}
|
||||
|
||||
static int hash_check_key(struct btree_trans *trans,
|
||||
|
@ -1387,7 +1387,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
|
||||
struct closure *cl = &op->cl;
|
||||
struct bio *bio = &op->write.op.wbio.bio;
|
||||
|
||||
trace_promote(&rbio->bio);
|
||||
trace_and_count(op->write.op.c, read_promote, &rbio->bio);
|
||||
|
||||
/* we now own pages: */
|
||||
BUG_ON(!rbio->bounce);
|
||||
@ -1653,7 +1653,7 @@ static void bch2_rbio_retry(struct work_struct *work)
|
||||
};
|
||||
struct bch_io_failures failed = { .nr = 0 };
|
||||
|
||||
trace_read_retry(&rbio->bio);
|
||||
trace_and_count(c, read_retry, &rbio->bio);
|
||||
|
||||
if (rbio->retry == READ_RETRY_AVOID)
|
||||
bch2_mark_io_failure(&failed, &rbio->pick);
|
||||
@ -1909,7 +1909,7 @@ static void bch2_read_endio(struct bio *bio)
|
||||
|
||||
if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
|
||||
ptr_stale(ca, &rbio->pick.ptr)) {
|
||||
atomic_long_inc(&c->read_realloc_races);
|
||||
trace_and_count(c, read_reuse_race, &rbio->bio);
|
||||
|
||||
if (rbio->flags & BCH_READ_RETRY_IF_STALE)
|
||||
bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
|
||||
@ -2197,7 +2197,7 @@ get_bio:
|
||||
rbio->bio.bi_end_io = bch2_read_endio;
|
||||
|
||||
if (rbio->bounce)
|
||||
trace_read_bounce(&rbio->bio);
|
||||
trace_and_count(c, read_bounce, &rbio->bio);
|
||||
|
||||
this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
|
||||
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
|
||||
@ -2212,7 +2212,7 @@ get_bio:
|
||||
|
||||
if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
|
||||
bio_inc_remaining(&orig->bio);
|
||||
trace_read_split(&orig->bio);
|
||||
trace_and_count(c, read_split, &orig->bio);
|
||||
}
|
||||
|
||||
if (!rbio->pick.idx) {
|
||||
|
@ -391,12 +391,12 @@ retry:
|
||||
ret = journal_entry_open(j);
|
||||
|
||||
if (ret == JOURNAL_ERR_max_in_flight)
|
||||
trace_journal_entry_full(c);
|
||||
trace_and_count(c, journal_entry_full, c);
|
||||
unlock:
|
||||
if ((ret && ret != JOURNAL_ERR_insufficient_devices) &&
|
||||
!j->res_get_blocked_start) {
|
||||
j->res_get_blocked_start = local_clock() ?: 1;
|
||||
trace_journal_full(c);
|
||||
trace_and_count(c, journal_full, c);
|
||||
}
|
||||
|
||||
can_discard = j->can_discard;
|
||||
|
@ -1551,7 +1551,7 @@ static void do_journal_write(struct closure *cl)
|
||||
|
||||
bch2_bio_map(bio, w->data, sectors << 9);
|
||||
|
||||
trace_journal_write(bio);
|
||||
trace_and_count(c, journal_write, bio);
|
||||
closure_bio_submit(bio, cl);
|
||||
|
||||
ca->journal.bucket_seq[ca->journal.cur_idx] =
|
||||
|
@ -641,7 +641,8 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
||||
|
||||
min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128);
|
||||
|
||||
trace_journal_reclaim_start(c, direct, kicked,
|
||||
trace_and_count(c, journal_reclaim_start, c,
|
||||
direct, kicked,
|
||||
min_nr, min_key_cache,
|
||||
j->prereserved.reserved,
|
||||
j->prereserved.remaining,
|
||||
@ -657,7 +658,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
||||
j->nr_direct_reclaim += nr_flushed;
|
||||
else
|
||||
j->nr_background_reclaim += nr_flushed;
|
||||
trace_journal_reclaim_finish(c, nr_flushed);
|
||||
trace_and_count(c, journal_reclaim_finish, c, nr_flushed);
|
||||
|
||||
if (nr_flushed)
|
||||
wake_up(&j->reclaim_wait);
|
||||
|
@ -252,8 +252,8 @@ static int bch2_move_extent(struct btree_trans *trans,
|
||||
atomic64_inc(&ctxt->stats->keys_moved);
|
||||
atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
|
||||
this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
|
||||
|
||||
trace_move_extent(k.k);
|
||||
this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size);
|
||||
trace_move_extent_read(k.k);
|
||||
|
||||
atomic_add(io->read_sectors, &ctxt->read_sectors);
|
||||
list_add_tail(&io->list, &ctxt->reads);
|
||||
@ -275,7 +275,7 @@ err_free:
|
||||
kfree(io);
|
||||
err:
|
||||
percpu_ref_put(&c->writes);
|
||||
trace_move_alloc_mem_fail(k.k);
|
||||
trace_and_count(c, move_extent_alloc_mem_fail, k.k);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -165,7 +165,7 @@ static int bch2_copygc(struct bch_fs *c)
|
||||
if (ret < 0)
|
||||
bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
|
||||
|
||||
trace_copygc(c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
|
||||
trace_and_count(c, copygc, c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -221,7 +221,7 @@ static int bch2_copygc_thread(void *arg)
|
||||
wait = bch2_copygc_wait_amount(c);
|
||||
|
||||
if (wait > clock->max_slop) {
|
||||
trace_copygc_wait(c, wait, last + wait);
|
||||
trace_and_count(c, copygc_wait, c, wait, last + wait);
|
||||
c->copygc_wait = last + wait;
|
||||
bch2_kthread_io_clock_wait(clock, last + wait,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
|
@ -239,29 +239,26 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
int bch2_hash_set(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
subvol_inum inum,
|
||||
struct bkey_i *insert, int flags)
|
||||
int bch2_hash_set_snapshot(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
subvol_inum inum, u32 snapshot,
|
||||
struct bkey_i *insert,
|
||||
int flags,
|
||||
int update_flags)
|
||||
{
|
||||
struct btree_iter iter, slot = { NULL };
|
||||
struct bkey_s_c k;
|
||||
bool found = false;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for_each_btree_key_upto_norestart(trans, iter, desc.btree_id,
|
||||
SPOS(inum.inum,
|
||||
SPOS(insert->k.p.inode,
|
||||
desc.hash_bkey(info, bkey_i_to_s_c(insert)),
|
||||
snapshot),
|
||||
POS(inum.inum, U64_MAX),
|
||||
POS(insert->k.p.inode, U64_MAX),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
|
||||
if (is_visible_key(desc, inum, k)) {
|
||||
if (!inum.subvol || is_visible_key(desc, inum, k)) {
|
||||
if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
|
||||
goto found;
|
||||
|
||||
@ -303,6 +300,26 @@ not_found:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
int bch2_hash_set(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
subvol_inum inum,
|
||||
struct bkey_i *insert, int flags)
|
||||
{
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
insert->k.p.inode = inum.inum;
|
||||
|
||||
return bch2_hash_set_snapshot(trans, desc, info, inum,
|
||||
snapshot, insert, flags, 0);
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
int bch2_hash_delete_at(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
|
@ -796,7 +796,7 @@ int bch2_write_super(struct bch_fs *c)
|
||||
unsigned degraded_flags = BCH_FORCE_IF_DEGRADED;
|
||||
int ret = 0;
|
||||
|
||||
trace_write_super(c, _RET_IP_);
|
||||
trace_and_count(c, write_super, c, _RET_IP_);
|
||||
|
||||
if (c->opts.very_degraded)
|
||||
degraded_flags |= BCH_FORCE_IF_LOST;
|
||||
|
@ -1530,6 +1530,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
|
||||
struct bch_member dev_mi;
|
||||
unsigned dev_idx, nr_devices, u64s;
|
||||
struct printbuf errbuf = PRINTBUF;
|
||||
struct printbuf label = PRINTBUF;
|
||||
int ret;
|
||||
|
||||
ret = bch2_read_super(path, &opts, &sb);
|
||||
@ -1540,6 +1541,14 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
|
||||
|
||||
dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx];
|
||||
|
||||
if (BCH_MEMBER_GROUP(&dev_mi)) {
|
||||
bch2_disk_path_to_text(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1);
|
||||
if (label.allocation_failure) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
err = bch2_dev_may_add(sb.sb, c);
|
||||
if (err) {
|
||||
bch_err(c, "device add error: %s", err);
|
||||
@ -1620,6 +1629,14 @@ have_slot:
|
||||
ca->disk_sb.sb->dev_idx = dev_idx;
|
||||
bch2_dev_attach(c, ca, dev_idx);
|
||||
|
||||
if (BCH_MEMBER_GROUP(&dev_mi)) {
|
||||
ret = __bch2_dev_group_set(c, ca, label.buf);
|
||||
if (ret) {
|
||||
bch_err(c, "device add error: error setting label");
|
||||
goto err_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
@ -1652,6 +1669,7 @@ err:
|
||||
if (ca)
|
||||
bch2_dev_free(ca);
|
||||
bch2_free_super(&sb);
|
||||
printbuf_exit(&label);
|
||||
printbuf_exit(&errbuf);
|
||||
return ret;
|
||||
err_late:
|
||||
|
@ -190,11 +190,6 @@ read_attribute(internal_uuid);
|
||||
read_attribute(has_data);
|
||||
read_attribute(alloc_debug);
|
||||
|
||||
read_attribute(read_realloc_races);
|
||||
read_attribute(extent_migrate_done);
|
||||
read_attribute(extent_migrate_raced);
|
||||
read_attribute(bucket_alloc_fail);
|
||||
|
||||
#define x(t, n, ...) read_attribute(t);
|
||||
BCH_PERSISTENT_COUNTERS()
|
||||
#undef x
|
||||
@ -378,15 +373,6 @@ SHOW(bch2_fs)
|
||||
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
|
||||
sysfs_hprint(btree_avg_write_size, bch2_btree_avg_write_size(c));
|
||||
|
||||
sysfs_print(read_realloc_races,
|
||||
atomic_long_read(&c->read_realloc_races));
|
||||
sysfs_print(extent_migrate_done,
|
||||
atomic_long_read(&c->extent_migrate_done));
|
||||
sysfs_print(extent_migrate_raced,
|
||||
atomic_long_read(&c->extent_migrate_raced));
|
||||
sysfs_print(bucket_alloc_fail,
|
||||
atomic_long_read(&c->bucket_alloc_fail));
|
||||
|
||||
sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic);
|
||||
|
||||
if (attr == &sysfs_gc_gens_pos)
|
||||
@ -625,11 +611,6 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_trigger_invalidates,
|
||||
&sysfs_prune_cache,
|
||||
|
||||
&sysfs_read_realloc_races,
|
||||
&sysfs_extent_migrate_done,
|
||||
&sysfs_extent_migrate_raced,
|
||||
&sysfs_bucket_alloc_fail,
|
||||
|
||||
&sysfs_gc_gens_pos,
|
||||
|
||||
&sysfs_copy_gc_enabled,
|
||||
|
467
linux/six.c
467
linux/six.c
@ -19,6 +19,8 @@
|
||||
#define six_acquire(l, t) lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
|
||||
#define six_release(l) lock_release(l, _RET_IP_)
|
||||
|
||||
static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
|
||||
|
||||
struct six_lock_vals {
|
||||
/* Value we add to the lock in order to take the lock: */
|
||||
u64 lock_val;
|
||||
@ -65,14 +67,15 @@ struct six_lock_vals {
|
||||
}
|
||||
|
||||
static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
|
||||
union six_lock_state old)
|
||||
union six_lock_state old,
|
||||
struct task_struct *owner)
|
||||
{
|
||||
if (type != SIX_LOCK_intent)
|
||||
return;
|
||||
|
||||
if (!old.intent_lock) {
|
||||
EBUG_ON(lock->owner);
|
||||
lock->owner = current;
|
||||
lock->owner = owner;
|
||||
} else {
|
||||
EBUG_ON(lock->owner != current);
|
||||
}
|
||||
@ -88,64 +91,21 @@ static inline unsigned pcpu_read_count(struct six_lock *lock)
|
||||
return read_count;
|
||||
}
|
||||
|
||||
struct six_lock_waiter {
|
||||
struct list_head list;
|
||||
struct task_struct *task;
|
||||
};
|
||||
|
||||
/* This is probably up there with the more evil things I've done */
|
||||
#define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
|
||||
|
||||
static inline void six_lock_wakeup(struct six_lock *lock,
|
||||
union six_lock_state state,
|
||||
unsigned waitlist_id)
|
||||
{
|
||||
if (waitlist_id == SIX_LOCK_write) {
|
||||
if (state.write_locking && !state.read_lock) {
|
||||
struct task_struct *p = READ_ONCE(lock->owner);
|
||||
if (p)
|
||||
wake_up_process(p);
|
||||
}
|
||||
} else {
|
||||
struct list_head *wait_list = &lock->wait_list[waitlist_id];
|
||||
struct six_lock_waiter *w, *next;
|
||||
|
||||
if (!(state.waiters & (1 << waitlist_id)))
|
||||
return;
|
||||
|
||||
clear_bit(waitlist_bitnr(waitlist_id),
|
||||
(unsigned long *) &lock->state.v);
|
||||
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
|
||||
list_for_each_entry_safe(w, next, wait_list, list) {
|
||||
list_del_init(&w->list);
|
||||
|
||||
if (wake_up_process(w->task) &&
|
||||
waitlist_id != SIX_LOCK_read) {
|
||||
if (!list_empty(wait_list))
|
||||
set_bit(waitlist_bitnr(waitlist_id),
|
||||
(unsigned long *) &lock->state.v);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline bool do_six_trylock_type(struct six_lock *lock,
|
||||
enum six_lock_type type,
|
||||
bool try)
|
||||
static int __do_six_trylock_type(struct six_lock *lock,
|
||||
enum six_lock_type type,
|
||||
struct task_struct *task,
|
||||
bool try)
|
||||
{
|
||||
const struct six_lock_vals l[] = LOCK_VALS;
|
||||
union six_lock_state old, new;
|
||||
bool ret;
|
||||
int ret;
|
||||
u64 v;
|
||||
|
||||
EBUG_ON(type == SIX_LOCK_write && lock->owner != current);
|
||||
EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
|
||||
EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1));
|
||||
|
||||
EBUG_ON(type == SIX_LOCK_write && (try != !(lock->state.write_locking)));
|
||||
|
||||
/*
|
||||
@ -176,18 +136,6 @@ retry:
|
||||
this_cpu_sub(*lock->readers, !ret);
|
||||
preempt_enable();
|
||||
|
||||
/*
|
||||
* If we failed because a writer was trying to take the
|
||||
* lock, issue a wakeup because we might have caused a
|
||||
* spurious trylock failure:
|
||||
*/
|
||||
if (old.write_locking) {
|
||||
struct task_struct *p = READ_ONCE(lock->owner);
|
||||
|
||||
if (p)
|
||||
wake_up_process(p);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we failed from the lock path and the waiting bit wasn't
|
||||
* set, set it:
|
||||
@ -208,6 +156,14 @@ retry:
|
||||
} while ((v = atomic64_cmpxchg(&lock->state.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we failed because a writer was trying to take the
|
||||
* lock, issue a wakeup because we might have caused a
|
||||
* spurious trylock failure:
|
||||
*/
|
||||
if (old.write_locking)
|
||||
ret = -1 - SIX_LOCK_write;
|
||||
} else if (type == SIX_LOCK_write && lock->readers) {
|
||||
if (try) {
|
||||
atomic64_add(__SIX_VAL(write_locking, 1),
|
||||
@ -227,9 +183,13 @@ retry:
|
||||
if (ret || try)
|
||||
v -= __SIX_VAL(write_locking, 1);
|
||||
|
||||
if (!ret && !try && !(lock->state.waiters & (1 << SIX_LOCK_write)))
|
||||
v += __SIX_VAL(waiters, 1 << SIX_LOCK_write);
|
||||
|
||||
if (try && !ret) {
|
||||
old.v = atomic64_add_return(v, &lock->state.counter);
|
||||
six_lock_wakeup(lock, old, SIX_LOCK_read);
|
||||
if (old.waiters & (1 << SIX_LOCK_read))
|
||||
ret = -1 - SIX_LOCK_read;
|
||||
} else {
|
||||
atomic64_add(v, &lock->state.counter);
|
||||
}
|
||||
@ -243,8 +203,7 @@ retry:
|
||||
|
||||
if (type == SIX_LOCK_write)
|
||||
new.write_locking = 0;
|
||||
} else if (!try && type != SIX_LOCK_write &&
|
||||
!(new.waiters & (1 << type)))
|
||||
} else if (!try && !(new.waiters & (1 << type)))
|
||||
new.waiters |= 1 << type;
|
||||
else
|
||||
break; /* waiting bit already set */
|
||||
@ -256,14 +215,84 @@ retry:
|
||||
EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
|
||||
}
|
||||
|
||||
if (ret)
|
||||
six_set_owner(lock, type, old);
|
||||
if (ret > 0)
|
||||
six_set_owner(lock, type, old, task);
|
||||
|
||||
EBUG_ON(type == SIX_LOCK_write && (try || ret) && (lock->state.write_locking));
|
||||
EBUG_ON(type == SIX_LOCK_write && (try || ret > 0) && (lock->state.write_locking));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
|
||||
{
|
||||
struct six_lock_waiter *w, *next;
|
||||
struct task_struct *task;
|
||||
bool saw_one;
|
||||
int ret;
|
||||
again:
|
||||
ret = 0;
|
||||
saw_one = false;
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
|
||||
list_for_each_entry_safe(w, next, &lock->wait_list, list) {
|
||||
if (w->lock_want != lock_type)
|
||||
continue;
|
||||
|
||||
if (saw_one && lock_type != SIX_LOCK_read)
|
||||
goto unlock;
|
||||
saw_one = true;
|
||||
|
||||
ret = __do_six_trylock_type(lock, lock_type, w->task, false);
|
||||
if (ret <= 0)
|
||||
goto unlock;
|
||||
|
||||
__list_del(w->list.prev, w->list.next);
|
||||
task = w->task;
|
||||
/*
|
||||
* Do no writes to @w besides setting lock_acquired - otherwise
|
||||
* we would need a memory barrier:
|
||||
*/
|
||||
barrier();
|
||||
w->lock_acquired = true;
|
||||
wake_up_process(task);
|
||||
}
|
||||
|
||||
clear_bit(waitlist_bitnr(lock_type), (unsigned long *) &lock->state.v);
|
||||
unlock:
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
if (ret < 0) {
|
||||
lock_type = -ret - 1;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void six_lock_wakeup(struct six_lock *lock,
|
||||
union six_lock_state state,
|
||||
enum six_lock_type lock_type)
|
||||
{
|
||||
if (lock_type == SIX_LOCK_write && state.read_lock)
|
||||
return;
|
||||
|
||||
if (!(state.waiters & (1 << lock_type)))
|
||||
return;
|
||||
|
||||
__six_lock_wakeup(lock, lock_type);
|
||||
}
|
||||
|
||||
static bool do_six_trylock_type(struct six_lock *lock,
|
||||
enum six_lock_type type,
|
||||
bool try)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = __do_six_trylock_type(lock, type, current, try);
|
||||
if (ret < 0)
|
||||
__six_lock_wakeup(lock, -ret - 1);
|
||||
|
||||
return ret > 0;
|
||||
}
|
||||
|
||||
__always_inline __flatten
|
||||
static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
@ -304,12 +333,8 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
* Similar to the lock path, we may have caused a spurious write
|
||||
* lock fail and need to issue a wakeup:
|
||||
*/
|
||||
if (old.write_locking) {
|
||||
struct task_struct *p = READ_ONCE(lock->owner);
|
||||
|
||||
if (p)
|
||||
wake_up_process(p);
|
||||
}
|
||||
if (old.write_locking)
|
||||
six_lock_wakeup(lock, old, SIX_LOCK_write);
|
||||
|
||||
if (ret)
|
||||
six_acquire(&lock->dep_map, 1);
|
||||
@ -327,7 +352,7 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
old.v,
|
||||
old.v + l[type].lock_val)) != old.v);
|
||||
|
||||
six_set_owner(lock, type, old);
|
||||
six_set_owner(lock, type, old, current);
|
||||
if (type != SIX_LOCK_write)
|
||||
six_acquire(&lock->dep_map, 1);
|
||||
return true;
|
||||
@ -335,33 +360,26 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
|
||||
#ifdef CONFIG_LOCK_SPIN_ON_OWNER
|
||||
|
||||
static inline int six_can_spin_on_owner(struct six_lock *lock)
|
||||
static inline bool six_optimistic_spin(struct six_lock *lock,
|
||||
struct six_lock_waiter *wait)
|
||||
{
|
||||
struct task_struct *owner;
|
||||
int retval = 1;
|
||||
struct task_struct *owner, *task = current;
|
||||
|
||||
if (need_resched())
|
||||
return 0;
|
||||
switch (wait->lock_want) {
|
||||
case SIX_LOCK_read:
|
||||
break;
|
||||
case SIX_LOCK_intent:
|
||||
if (lock->wait_list.next != &wait->list)
|
||||
return false;
|
||||
break;
|
||||
case SIX_LOCK_write:
|
||||
return false;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
owner = READ_ONCE(lock->owner);
|
||||
if (owner)
|
||||
retval = owner->on_cpu;
|
||||
rcu_read_unlock();
|
||||
/*
|
||||
* if lock->owner is not set, the mutex owner may have just acquired
|
||||
* it and not set the owner yet or the mutex has been released.
|
||||
*/
|
||||
return retval;
|
||||
}
|
||||
|
||||
static inline bool six_spin_on_owner(struct six_lock *lock,
|
||||
struct task_struct *owner)
|
||||
{
|
||||
bool ret = true;
|
||||
|
||||
rcu_read_lock();
|
||||
while (lock->owner == owner) {
|
||||
while (owner && lock->owner == owner) {
|
||||
/*
|
||||
* Ensure we emit the owner->on_cpu, dereference _after_
|
||||
* checking lock->owner still matches owner. If that fails,
|
||||
@ -370,85 +388,27 @@ static inline bool six_spin_on_owner(struct six_lock *lock,
|
||||
*/
|
||||
barrier();
|
||||
|
||||
if (!owner->on_cpu || need_resched()) {
|
||||
ret = false;
|
||||
/*
|
||||
* If we're an RT task that will live-lock because we won't let
|
||||
* the owner complete.
|
||||
*/
|
||||
if (wait->lock_acquired ||
|
||||
!owner->on_cpu ||
|
||||
rt_task(task) ||
|
||||
need_resched())
|
||||
break;
|
||||
}
|
||||
|
||||
cpu_relax();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
|
||||
if (type == SIX_LOCK_write)
|
||||
return false;
|
||||
|
||||
preempt_disable();
|
||||
if (!six_can_spin_on_owner(lock))
|
||||
goto fail;
|
||||
|
||||
if (!osq_lock(&lock->osq))
|
||||
goto fail;
|
||||
|
||||
while (1) {
|
||||
struct task_struct *owner;
|
||||
|
||||
/*
|
||||
* If there's an owner, wait for it to either
|
||||
* release the lock or go to sleep.
|
||||
*/
|
||||
owner = READ_ONCE(lock->owner);
|
||||
if (owner && !six_spin_on_owner(lock, owner))
|
||||
break;
|
||||
|
||||
if (do_six_trylock_type(lock, type, false)) {
|
||||
osq_unlock(&lock->osq);
|
||||
preempt_enable();
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* When there's no owner, we might have preempted between the
|
||||
* owner acquiring the lock and setting the owner field. If
|
||||
* we're an RT task that will live-lock because we won't let
|
||||
* the owner complete.
|
||||
*/
|
||||
if (!owner && (need_resched() || rt_task(task)))
|
||||
break;
|
||||
|
||||
/*
|
||||
* The cpu_relax() call is a compiler barrier which forces
|
||||
* everything in this loop to be re-loaded. We don't need
|
||||
* memory barriers as we'll eventually observe the right
|
||||
* values at the cost of a few extra spins.
|
||||
*/
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
osq_unlock(&lock->osq);
|
||||
fail:
|
||||
preempt_enable();
|
||||
|
||||
/*
|
||||
* If we fell out of the spin path because of need_resched(),
|
||||
* reschedule now, before we try-lock again. This avoids getting
|
||||
* scheduled out right after we obtained the lock.
|
||||
*/
|
||||
if (need_resched())
|
||||
schedule();
|
||||
|
||||
return false;
|
||||
return wait->lock_acquired;
|
||||
}
|
||||
|
||||
#else /* CONFIG_LOCK_SPIN_ON_OWNER */
|
||||
|
||||
static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
|
||||
static inline bool six_optimistic_spin(struct six_lock *lock,
|
||||
struct six_lock_waiter *wait)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@ -457,10 +417,10 @@ static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type
|
||||
|
||||
noinline
|
||||
static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
{
|
||||
union six_lock_state old;
|
||||
struct six_lock_waiter wait;
|
||||
int ret = 0;
|
||||
|
||||
if (type == SIX_LOCK_write) {
|
||||
@ -469,46 +429,58 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty
|
||||
smp_mb__after_atomic();
|
||||
}
|
||||
|
||||
ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
|
||||
if (ret)
|
||||
goto out_before_sleep;
|
||||
|
||||
if (six_optimistic_spin(lock, type))
|
||||
goto out_before_sleep;
|
||||
|
||||
lock_contended(&lock->dep_map, _RET_IP_);
|
||||
|
||||
INIT_LIST_HEAD(&wait.list);
|
||||
wait.task = current;
|
||||
wait->task = current;
|
||||
wait->lock_want = type;
|
||||
wait->lock_acquired = false;
|
||||
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
/*
|
||||
* Retry taking the lock after taking waitlist lock, have raced with an
|
||||
* unlock:
|
||||
*/
|
||||
ret = __do_six_trylock_type(lock, type, current, false);
|
||||
if (ret <= 0)
|
||||
list_add_tail(&wait->list, &lock->wait_list);
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
if (unlikely(ret > 0)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(ret < 0)) {
|
||||
__six_lock_wakeup(lock, -ret - 1);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (six_optimistic_spin(lock, wait))
|
||||
goto out;
|
||||
|
||||
while (1) {
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
if (type == SIX_LOCK_write)
|
||||
EBUG_ON(lock->owner != current);
|
||||
else if (list_empty_careful(&wait.list)) {
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
list_add_tail(&wait.list, &lock->wait_list[type]);
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
}
|
||||
|
||||
if (do_six_trylock_type(lock, type, false))
|
||||
if (wait->lock_acquired)
|
||||
break;
|
||||
|
||||
ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
|
||||
if (ret)
|
||||
if (unlikely(ret)) {
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
if (!wait->lock_acquired)
|
||||
list_del(&wait->list);
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
if (wait->lock_acquired)
|
||||
do_six_unlock_type(lock, type);
|
||||
break;
|
||||
}
|
||||
|
||||
schedule();
|
||||
}
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
if (!list_empty_careful(&wait.list)) {
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
list_del_init(&wait.list);
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
}
|
||||
out_before_sleep:
|
||||
out:
|
||||
if (ret && type == SIX_LOCK_write) {
|
||||
old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1),
|
||||
&lock->state.counter);
|
||||
@ -518,9 +490,10 @@ out_before_sleep:
|
||||
return ret;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
__always_inline __flatten
|
||||
static int __six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -528,7 +501,7 @@ static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
six_acquire(&lock->dep_map, 0);
|
||||
|
||||
ret = do_six_trylock_type(lock, type, true) ? 0
|
||||
: __six_lock_type_slowpath(lock, type, should_sleep_fn, p);
|
||||
: __six_lock_type_slowpath(lock, type, wait, should_sleep_fn, p);
|
||||
|
||||
if (ret && type != SIX_LOCK_write)
|
||||
six_release(&lock->dep_map);
|
||||
@ -538,28 +511,23 @@ static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
return ret;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
{
|
||||
struct six_lock_waiter wait;
|
||||
|
||||
return __six_lock_type_waiter(lock, type, &wait, should_sleep_fn, p);
|
||||
}
|
||||
|
||||
__always_inline __flatten
|
||||
static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
const struct six_lock_vals l[] = LOCK_VALS;
|
||||
union six_lock_state state;
|
||||
|
||||
EBUG_ON(type == SIX_LOCK_write &&
|
||||
!(lock->state.v & __SIX_LOCK_HELD_intent));
|
||||
|
||||
if (type != SIX_LOCK_write)
|
||||
six_release(&lock->dep_map);
|
||||
|
||||
if (type == SIX_LOCK_intent) {
|
||||
EBUG_ON(lock->owner != current);
|
||||
|
||||
if (lock->intent_lock_recurse) {
|
||||
--lock->intent_lock_recurse;
|
||||
return;
|
||||
}
|
||||
|
||||
if (type == SIX_LOCK_intent)
|
||||
lock->owner = NULL;
|
||||
}
|
||||
|
||||
if (type == SIX_LOCK_read &&
|
||||
lock->readers) {
|
||||
@ -576,6 +544,27 @@ static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
six_lock_wakeup(lock, state, l[type].unlock_wakeup);
|
||||
}
|
||||
|
||||
__always_inline __flatten
|
||||
static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
EBUG_ON(type == SIX_LOCK_write &&
|
||||
!(lock->state.v & __SIX_LOCK_HELD_intent));
|
||||
EBUG_ON((type == SIX_LOCK_write ||
|
||||
type == SIX_LOCK_intent) &&
|
||||
lock->owner != current);
|
||||
|
||||
if (type != SIX_LOCK_write)
|
||||
six_release(&lock->dep_map);
|
||||
|
||||
if (type == SIX_LOCK_intent &&
|
||||
lock->intent_lock_recurse) {
|
||||
--lock->intent_lock_recurse;
|
||||
return;
|
||||
}
|
||||
|
||||
do_six_unlock_type(lock, type);
|
||||
}
|
||||
|
||||
#define __SIX_LOCK(type) \
|
||||
bool six_trylock_##type(struct six_lock *lock) \
|
||||
{ \
|
||||
@ -596,6 +585,14 @@ int six_lock_##type(struct six_lock *lock, \
|
||||
} \
|
||||
EXPORT_SYMBOL_GPL(six_lock_##type); \
|
||||
\
|
||||
int six_lock_waiter_##type(struct six_lock *lock, \
|
||||
struct six_lock_waiter *wait, \
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)\
|
||||
{ \
|
||||
return __six_lock_type_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p);\
|
||||
} \
|
||||
EXPORT_SYMBOL_GPL(six_lock_waiter_##type); \
|
||||
\
|
||||
void six_unlock_##type(struct six_lock *lock) \
|
||||
{ \
|
||||
__six_unlock_type(lock, SIX_LOCK_##type); \
|
||||
@ -639,7 +636,7 @@ bool six_lock_tryupgrade(struct six_lock *lock)
|
||||
if (lock->readers)
|
||||
this_cpu_dec(*lock->readers);
|
||||
|
||||
six_set_owner(lock, SIX_LOCK_intent, old);
|
||||
six_set_owner(lock, SIX_LOCK_intent, old, current);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -701,44 +698,12 @@ void six_lock_wakeup_all(struct six_lock *lock)
|
||||
struct six_lock_waiter *w;
|
||||
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
|
||||
list_for_each_entry(w, &lock->wait_list[0], list)
|
||||
list_for_each_entry(w, &lock->wait_list, list)
|
||||
wake_up_process(w->task);
|
||||
list_for_each_entry(w, &lock->wait_list[1], list)
|
||||
wake_up_process(w->task);
|
||||
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
|
||||
|
||||
struct free_pcpu_rcu {
|
||||
struct rcu_head rcu;
|
||||
void __percpu *p;
|
||||
};
|
||||
|
||||
static void free_pcpu_rcu_fn(struct rcu_head *_rcu)
|
||||
{
|
||||
struct free_pcpu_rcu *rcu =
|
||||
container_of(_rcu, struct free_pcpu_rcu, rcu);
|
||||
|
||||
free_percpu(rcu->p);
|
||||
kfree(rcu);
|
||||
}
|
||||
|
||||
void six_lock_pcpu_free_rcu(struct six_lock *lock)
|
||||
{
|
||||
struct free_pcpu_rcu *rcu = kzalloc(sizeof(*rcu), GFP_KERNEL);
|
||||
|
||||
if (!rcu)
|
||||
return;
|
||||
|
||||
rcu->p = lock->readers;
|
||||
lock->readers = NULL;
|
||||
|
||||
call_rcu(&rcu->rcu, free_pcpu_rcu_fn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_pcpu_free_rcu);
|
||||
|
||||
void six_lock_pcpu_free(struct six_lock *lock)
|
||||
{
|
||||
BUG_ON(lock->readers && pcpu_read_count(lock));
|
||||
@ -763,15 +728,19 @@ EXPORT_SYMBOL_GPL(six_lock_pcpu_alloc);
|
||||
*/
|
||||
struct six_lock_count six_lock_counts(struct six_lock *lock)
|
||||
{
|
||||
struct six_lock_count ret = { 0, lock->state.intent_lock };
|
||||
struct six_lock_count ret;
|
||||
|
||||
ret.n[SIX_LOCK_read] = 0;
|
||||
ret.n[SIX_LOCK_intent] = lock->state.intent_lock + lock->intent_lock_recurse;
|
||||
ret.n[SIX_LOCK_write] = lock->state.seq & 1;
|
||||
|
||||
if (!lock->readers)
|
||||
ret.read += lock->state.read_lock;
|
||||
ret.n[SIX_LOCK_read] += lock->state.read_lock;
|
||||
else {
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
ret.read += *per_cpu_ptr(lock->readers, cpu);
|
||||
ret.n[SIX_LOCK_read] += *per_cpu_ptr(lock->readers, cpu);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
Loading…
Reference in New Issue
Block a user