mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 24c6361e20 bcachefs: Fix a trans path overflow in bch2_btree_delete_range_trans()
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
7af94e14b5
commit
99caca2c70
@ -1 +1 @@
|
||||
26202210393adf3fce3d98a3a2598c21d07b5634
|
||||
24c6361e202cc09de0159505eb3ab3ca265520d8
|
||||
|
@ -229,6 +229,7 @@ static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *
|
||||
}
|
||||
|
||||
struct printbuf;
|
||||
extern __printf(2, 0) void prt_vprintf(struct printbuf *out, const char *fmt, va_list args);
|
||||
extern __printf(2, 3) void prt_printf(struct printbuf *out, const char *fmt, ...);
|
||||
|
||||
static const char hex_asc[] = "0123456789abcdef";
|
||||
|
@ -107,6 +107,7 @@ struct six_lock {
|
||||
struct task_struct *owner;
|
||||
unsigned __percpu *readers;
|
||||
unsigned intent_lock_recurse;
|
||||
unsigned long ip;
|
||||
raw_spinlock_t wait_lock;
|
||||
struct list_head wait_list;
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
@ -119,6 +120,7 @@ struct six_lock_waiter {
|
||||
struct task_struct *task;
|
||||
enum six_lock_type lock_want;
|
||||
bool lock_acquired;
|
||||
u64 start_time;
|
||||
};
|
||||
|
||||
typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
|
||||
|
@ -401,6 +401,7 @@ TRACE_EVENT(btree_path_relock_fail,
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(u8, btree_id )
|
||||
__field(u8, level )
|
||||
TRACE_BPOS_entries(pos)
|
||||
__array(char, node, 24 )
|
||||
__field(u32, iter_lock_seq )
|
||||
@ -413,6 +414,7 @@ TRACE_EVENT(btree_path_relock_fail,
|
||||
strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
|
||||
__entry->caller_ip = caller_ip;
|
||||
__entry->btree_id = path->btree_id;
|
||||
__entry->level = path->level;
|
||||
TRACE_BPOS_assign(pos, path->pos);
|
||||
if (IS_ERR(b))
|
||||
strscpy(__entry->node, bch2_err_str(PTR_ERR(b)), sizeof(__entry->node));
|
||||
@ -422,13 +424,14 @@ TRACE_EVENT(btree_path_relock_fail,
|
||||
__entry->node_lock_seq = is_btree_node(path, level) ? path->l[level].b->c.lock.state.seq : 0;
|
||||
),
|
||||
|
||||
TP_printk("%s %pS btree %s pos %llu:%llu:%u, node %s iter seq %u lock seq %u",
|
||||
TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u node %s iter seq %u lock seq %u",
|
||||
__entry->trans_fn,
|
||||
(void *) __entry->caller_ip,
|
||||
bch2_btree_ids[__entry->btree_id],
|
||||
__entry->pos_inode,
|
||||
__entry->pos_offset,
|
||||
__entry->pos_snapshot,
|
||||
__entry->level,
|
||||
__entry->node,
|
||||
__entry->iter_lock_seq,
|
||||
__entry->node_lock_seq)
|
||||
@ -445,12 +448,15 @@ TRACE_EVENT(btree_path_upgrade_fail,
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(u8, btree_id )
|
||||
__field(u8, level )
|
||||
TRACE_BPOS_entries(pos)
|
||||
__field(u8, locked )
|
||||
__field(u8, self_read_count )
|
||||
__field(u8, self_intent_count)
|
||||
__field(u8, read_count )
|
||||
__field(u8, intent_count )
|
||||
__field(u32, iter_lock_seq )
|
||||
__field(u32, node_lock_seq )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@ -459,6 +465,7 @@ TRACE_EVENT(btree_path_upgrade_fail,
|
||||
strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
|
||||
__entry->caller_ip = caller_ip;
|
||||
__entry->btree_id = path->btree_id;
|
||||
__entry->level = level;
|
||||
TRACE_BPOS_assign(pos, path->pos);
|
||||
__entry->locked = btree_node_locked(path, level);
|
||||
|
||||
@ -468,20 +475,25 @@ TRACE_EVENT(btree_path_upgrade_fail,
|
||||
c = six_lock_counts(&path->l[level].b->c.lock);
|
||||
__entry->read_count = c.n[SIX_LOCK_read];
|
||||
__entry->intent_count = c.n[SIX_LOCK_read];
|
||||
__entry->iter_lock_seq = path->l[level].lock_seq;
|
||||
__entry->node_lock_seq = is_btree_node(path, level) ? path->l[level].b->c.lock.state.seq : 0;
|
||||
),
|
||||
|
||||
TP_printk("%s %pS btree %s pos %llu:%llu:%u, locked %u held %u:%u lock count %u:%u",
|
||||
TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u locked %u held %u:%u lock count %u:%u iter seq %u lock seq %u",
|
||||
__entry->trans_fn,
|
||||
(void *) __entry->caller_ip,
|
||||
bch2_btree_ids[__entry->btree_id],
|
||||
__entry->pos_inode,
|
||||
__entry->pos_offset,
|
||||
__entry->pos_snapshot,
|
||||
__entry->level,
|
||||
__entry->locked,
|
||||
__entry->self_read_count,
|
||||
__entry->self_intent_count,
|
||||
__entry->read_count,
|
||||
__entry->intent_count)
|
||||
__entry->intent_count,
|
||||
__entry->iter_lock_seq,
|
||||
__entry->node_lock_seq)
|
||||
);
|
||||
|
||||
/* Garbage collection */
|
||||
@ -499,22 +511,29 @@ DEFINE_EVENT(bch_fs, gc_gens_end,
|
||||
/* Allocator */
|
||||
|
||||
TRACE_EVENT(bucket_alloc,
|
||||
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve),
|
||||
TP_ARGS(ca, alloc_reserve),
|
||||
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
|
||||
bool user, u64 bucket),
|
||||
TP_ARGS(ca, alloc_reserve, user, bucket),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
__array(char, reserve, 16 )
|
||||
__field(bool, user )
|
||||
__field(u64, bucket )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = ca->dev;
|
||||
strlcpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve));
|
||||
__entry->user = user;
|
||||
__entry->bucket = bucket;
|
||||
),
|
||||
|
||||
TP_printk("%d,%d reserve %s",
|
||||
TP_printk("%d,%d reserve %s user %u bucket %llu",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->reserve)
|
||||
__entry->reserve,
|
||||
__entry->user,
|
||||
__entry->bucket)
|
||||
);
|
||||
|
||||
TRACE_EVENT(bucket_alloc_fail,
|
||||
@ -544,7 +563,7 @@ TRACE_EVENT(bucket_alloc_fail,
|
||||
__field(u64, need_journal_commit )
|
||||
__field(u64, nouse )
|
||||
__field(bool, nonblocking )
|
||||
__array(char, err, 16 )
|
||||
__array(char, err, 32 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@ -881,18 +900,41 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split,
|
||||
TP_ARGS(trans, caller_ip, path)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_restart_iter, trans_restart_upgrade,
|
||||
TRACE_EVENT(trans_restart_upgrade,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip,
|
||||
struct btree_path *path),
|
||||
TP_ARGS(trans, caller_ip, path)
|
||||
);
|
||||
struct btree_path *path,
|
||||
unsigned old_locks_want,
|
||||
unsigned new_locks_want),
|
||||
TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want),
|
||||
|
||||
DEFINE_EVENT(transaction_restart_iter, trans_restart_iter_upgrade,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip,
|
||||
struct btree_path *path),
|
||||
TP_ARGS(trans, caller_ip, path)
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(u8, btree_id )
|
||||
__field(u8, old_locks_want )
|
||||
__field(u8, new_locks_want )
|
||||
TRACE_BPOS_entries(pos)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
|
||||
__entry->caller_ip = caller_ip;
|
||||
__entry->btree_id = path->btree_id;
|
||||
__entry->old_locks_want = old_locks_want;
|
||||
__entry->new_locks_want = new_locks_want;
|
||||
TRACE_BPOS_assign(pos, path->pos)
|
||||
),
|
||||
|
||||
TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u",
|
||||
__entry->trans_fn,
|
||||
(void *) __entry->caller_ip,
|
||||
bch2_btree_ids[__entry->btree_id],
|
||||
__entry->pos_inode,
|
||||
__entry->pos_offset,
|
||||
__entry->pos_snapshot,
|
||||
__entry->old_locks_want,
|
||||
__entry->new_locks_want)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_restart_iter, trans_restart_relock,
|
||||
@ -964,57 +1006,16 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure,
|
||||
TP_ARGS(trans, caller_ip, path)
|
||||
);
|
||||
|
||||
TRACE_EVENT(trans_restart_would_deadlock,
|
||||
DEFINE_EVENT(transaction_event, trans_restart_would_deadlock,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip,
|
||||
unsigned reason,
|
||||
struct btree_path *have,
|
||||
struct btree_path *want,
|
||||
struct bpos *want_pos),
|
||||
TP_ARGS(trans, caller_ip, reason,
|
||||
have, want, want_pos),
|
||||
unsigned long caller_ip),
|
||||
TP_ARGS(trans, caller_ip)
|
||||
);
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(u8, in_traverse_all )
|
||||
__field(u8, reason )
|
||||
__field(u8, have_btree_id )
|
||||
__field(u8, have_type )
|
||||
__field(u8, want_btree_id )
|
||||
__field(u8, want_type )
|
||||
TRACE_BPOS_entries(have_pos)
|
||||
TRACE_BPOS_entries(want_pos)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
|
||||
__entry->caller_ip = caller_ip;
|
||||
__entry->in_traverse_all = trans->in_traverse_all;
|
||||
__entry->reason = reason;
|
||||
__entry->have_btree_id = have->btree_id;
|
||||
__entry->have_type = have->cached;
|
||||
__entry->want_btree_id = want->btree_id;
|
||||
__entry->want_type = want->cached;
|
||||
TRACE_BPOS_assign(have_pos, have->pos);
|
||||
TRACE_BPOS_assign(want_pos, *want_pos);
|
||||
),
|
||||
|
||||
TP_printk("%s %pS traverse_all %u because %u have %u:%u %llu:%llu:%u want %u:%u %llu:%llu:%u",
|
||||
__entry->trans_fn,
|
||||
(void *) __entry->caller_ip,
|
||||
__entry->in_traverse_all,
|
||||
__entry->reason,
|
||||
__entry->have_btree_id,
|
||||
__entry->have_type,
|
||||
__entry->have_pos_inode,
|
||||
__entry->have_pos_offset,
|
||||
__entry->have_pos_snapshot,
|
||||
__entry->want_btree_id,
|
||||
__entry->want_type,
|
||||
__entry->want_pos_inode,
|
||||
__entry->want_pos_offset,
|
||||
__entry->want_pos_snapshot)
|
||||
DEFINE_EVENT(transaction_event, trans_restart_would_deadlock_recursion_limit,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip),
|
||||
TP_ARGS(trans, caller_ip)
|
||||
);
|
||||
|
||||
TRACE_EVENT(trans_restart_would_deadlock_write,
|
||||
|
@ -268,7 +268,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
trace_and_count(c, bucket_alloc, ca, bch2_alloc_reserves[reserve]);
|
||||
return ob;
|
||||
}
|
||||
|
||||
@ -575,7 +574,10 @@ err:
|
||||
if (!ob)
|
||||
ob = ERR_PTR(-BCH_ERR_no_buckets_found);
|
||||
|
||||
if (IS_ERR(ob))
|
||||
if (!IS_ERR(ob))
|
||||
trace_and_count(c, bucket_alloc, ca, bch2_alloc_reserves[reserve],
|
||||
may_alloc_partial, ob->bucket);
|
||||
else
|
||||
trace_and_count(c, bucket_alloc_fail,
|
||||
ca, bch2_alloc_reserves[reserve],
|
||||
usage.d[BCH_DATA_free].buckets,
|
||||
@ -1223,7 +1225,9 @@ err:
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) ||
|
||||
bch2_err_matches(ret, BCH_ERR_freelist_empty))
|
||||
return cl ? ERR_PTR(-EAGAIN) : ERR_PTR(-ENOSPC);
|
||||
return cl
|
||||
? ERR_PTR(-EAGAIN)
|
||||
: ERR_PTR(-BCH_ERR_ENOSPC_bucket_alloc);
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_insufficient_devices))
|
||||
return ERR_PTR(-EROFS);
|
||||
|
@ -529,14 +529,22 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
|
||||
if (bp.level) {
|
||||
struct btree *b;
|
||||
|
||||
/*
|
||||
* If a backpointer for a btree node wasn't found, it may be
|
||||
* because it was overwritten by a new btree node that hasn't
|
||||
* been written out yet - backpointer_get_node() checks for
|
||||
* this:
|
||||
*/
|
||||
bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp);
|
||||
b = bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp);
|
||||
if (!IS_ERR_OR_NULL(b))
|
||||
return bkey_i_to_s_c(&b->key);
|
||||
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
|
||||
if (IS_ERR(b))
|
||||
return bkey_s_c_err(PTR_ERR(b));
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
|
@ -226,9 +226,11 @@ do { \
|
||||
dynamic_fault("bcachefs:meta:write:" name)
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#define bch2_fmt(_c, fmt) "bcachefs (%s): " fmt "\n", ((_c)->name)
|
||||
#define bch2_log_msg(_c, fmt) "bcachefs (%s): " fmt, ((_c)->name)
|
||||
#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n")
|
||||
#define bch2_fmt_inum(_c, _inum, fmt) "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum)
|
||||
#else
|
||||
#define bch2_log_msg(_c, fmt) fmt
|
||||
#define bch2_fmt(_c, fmt) fmt "\n"
|
||||
#define bch2_fmt_inum(_c, _inum, fmt) "inum %llu: " fmt "\n", (_inum)
|
||||
#endif
|
||||
@ -812,7 +814,6 @@ struct bch_fs {
|
||||
struct mutex gc_gens_lock;
|
||||
|
||||
/* IO PATH */
|
||||
struct semaphore io_in_flight;
|
||||
struct bio_set bio_read;
|
||||
struct bio_set bio_read_split;
|
||||
struct bio_set bio_write;
|
||||
|
@ -1411,7 +1411,8 @@ struct bch_sb_field_disk_groups {
|
||||
x(trans_restart_key_cache_upgrade, 70) \
|
||||
x(trans_traverse_all, 71) \
|
||||
x(transaction_commit, 72) \
|
||||
x(write_super, 73)
|
||||
x(write_super, 73) \
|
||||
x(trans_restart_would_deadlock_recursion_limit, 74) \
|
||||
|
||||
enum bch_persistent_counters {
|
||||
#define x(t, n, ...) BCH_COUNTER_##t,
|
||||
|
@ -110,14 +110,17 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct btree *__btree_node_mem_alloc(struct bch_fs *c)
|
||||
static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
{
|
||||
struct btree *b = kzalloc(sizeof(struct btree), GFP_KERNEL);
|
||||
struct btree *b = kzalloc(sizeof(struct btree), gfp);
|
||||
if (!b)
|
||||
return NULL;
|
||||
|
||||
bkey_btree_ptr_init(&b->key);
|
||||
__six_lock_init(&b->c.lock, "b->c.lock", &bch2_btree_node_lock_key);
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
lockdep_set_no_check_recursion(&b->c.lock.dep_map);
|
||||
#endif
|
||||
INIT_LIST_HEAD(&b->list);
|
||||
INIT_LIST_HEAD(&b->write_blocked);
|
||||
b->byte_order = ilog2(btree_bytes(c));
|
||||
@ -127,7 +130,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c)
|
||||
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b = __btree_node_mem_alloc(c);
|
||||
struct btree *b = __btree_node_mem_alloc(c, GFP_KERNEL);
|
||||
if (!b)
|
||||
return NULL;
|
||||
|
||||
@ -150,8 +153,6 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
|
||||
/* Cause future lookups for this node to fail: */
|
||||
b->hash_val = 0;
|
||||
|
||||
six_lock_wakeup_all(&b->c.lock);
|
||||
}
|
||||
|
||||
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
||||
@ -281,20 +282,17 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||
struct btree *b, *t;
|
||||
unsigned long nr = sc->nr_to_scan;
|
||||
unsigned long can_free = 0;
|
||||
unsigned long touched = 0;
|
||||
unsigned long freed = 0;
|
||||
unsigned long touched = 0;
|
||||
unsigned i, flags;
|
||||
unsigned long ret = SHRINK_STOP;
|
||||
bool trigger_writes = atomic_read(&bc->dirty) + nr >=
|
||||
bc->used * 3 / 4;
|
||||
|
||||
if (bch2_btree_shrinker_disabled)
|
||||
return SHRINK_STOP;
|
||||
|
||||
/* Return -1 if we can't do anything right now */
|
||||
if (sc->gfp_mask & __GFP_FS)
|
||||
mutex_lock(&bc->lock);
|
||||
else if (!mutex_trylock(&bc->lock))
|
||||
goto out_norestore;
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
flags = memalloc_nofs_save();
|
||||
|
||||
/*
|
||||
@ -319,7 +317,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||
touched++;
|
||||
|
||||
if (touched >= nr)
|
||||
break;
|
||||
goto out;
|
||||
|
||||
if (!btree_node_reclaim(c, b)) {
|
||||
btree_node_data_free(c, b);
|
||||
@ -330,52 +328,43 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||
}
|
||||
restart:
|
||||
list_for_each_entry_safe(b, t, &bc->live, list) {
|
||||
/* tweak this */
|
||||
touched++;
|
||||
|
||||
if (btree_node_accessed(b)) {
|
||||
clear_btree_node_accessed(b);
|
||||
goto touched;
|
||||
}
|
||||
|
||||
if (!btree_node_reclaim(c, b)) {
|
||||
/* can't call bch2_btree_node_hash_remove under lock */
|
||||
} else if (!btree_node_reclaim(c, b)) {
|
||||
freed++;
|
||||
if (&t->list != &bc->live)
|
||||
list_move_tail(&bc->live, &t->list);
|
||||
|
||||
btree_node_data_free(c, b);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
bch2_btree_node_hash_remove(bc, b);
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
|
||||
if (freed >= nr)
|
||||
goto out;
|
||||
|
||||
if (sc->gfp_mask & __GFP_FS)
|
||||
mutex_lock(&bc->lock);
|
||||
else if (!mutex_trylock(&bc->lock))
|
||||
if (freed == nr)
|
||||
goto out;
|
||||
} else if (trigger_writes &&
|
||||
btree_node_dirty(b) &&
|
||||
!btree_node_will_make_reachable(b) &&
|
||||
!btree_node_write_blocked(b) &&
|
||||
six_trylock_read(&b->c.lock)) {
|
||||
list_move(&bc->live, &b->list);
|
||||
mutex_unlock(&bc->lock);
|
||||
__bch2_btree_node_write(c, b, 0);
|
||||
six_unlock_read(&b->c.lock);
|
||||
if (touched >= nr)
|
||||
goto out_nounlock;
|
||||
mutex_lock(&bc->lock);
|
||||
goto restart;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
touched:
|
||||
touched++;
|
||||
|
||||
if (touched >= nr) {
|
||||
/* Save position */
|
||||
if (&t->list != &bc->live)
|
||||
list_move_tail(&bc->live, &t->list);
|
||||
if (touched >= nr)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&bc->lock);
|
||||
out:
|
||||
mutex_unlock(&bc->lock);
|
||||
out_nounlock:
|
||||
ret = freed;
|
||||
memalloc_nofs_restore(flags);
|
||||
out_norestore:
|
||||
trace_and_count(c, btree_cache_scan, sc->nr_to_scan, can_free, ret);
|
||||
return ret;
|
||||
}
|
||||
@ -596,9 +585,14 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c, bool pcpu_read_locks)
|
||||
goto got_node;
|
||||
}
|
||||
|
||||
b = __btree_node_mem_alloc(c);
|
||||
if (!b)
|
||||
goto err_locked;
|
||||
b = __btree_node_mem_alloc(c, __GFP_NOWARN);
|
||||
if (!b) {
|
||||
mutex_unlock(&bc->lock);
|
||||
b = __btree_node_mem_alloc(c, GFP_KERNEL);
|
||||
if (!b)
|
||||
goto err;
|
||||
mutex_lock(&bc->lock);
|
||||
}
|
||||
|
||||
if (pcpu_read_locks)
|
||||
six_lock_pcpu_alloc(&b->c.lock);
|
||||
@ -651,7 +645,7 @@ out:
|
||||
return b;
|
||||
err:
|
||||
mutex_lock(&bc->lock);
|
||||
err_locked:
|
||||
|
||||
/* Try to cannibalize another cached btree node: */
|
||||
if (bc->alloc_lock == current) {
|
||||
b2 = btree_node_cannibalize(c);
|
||||
@ -763,16 +757,6 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
return b;
|
||||
}
|
||||
|
||||
static int lock_node_check_fn(struct six_lock *lock, void *p)
|
||||
{
|
||||
struct btree *b = container_of(lock, struct btree, c.lock);
|
||||
const struct bkey_i *k = p;
|
||||
|
||||
if (b->hash_val != btree_ptr_hash_val(k))
|
||||
return BCH_ERR_lock_fail_node_reused;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
@ -894,15 +878,11 @@ lock_node:
|
||||
if (btree_node_read_locked(path, level + 1))
|
||||
btree_node_unlock(trans, path, level + 1);
|
||||
|
||||
ret = btree_node_lock(trans, path, &b->c, k->k.p, level, lock_type,
|
||||
lock_node_check_fn, (void *) k, trace_ip);
|
||||
if (unlikely(ret)) {
|
||||
if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
|
||||
goto retry;
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ERR_PTR(ret);
|
||||
BUG();
|
||||
}
|
||||
ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ERR_PTR(ret);
|
||||
|
||||
BUG_ON(ret);
|
||||
|
||||
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
|
||||
b->c.level != level ||
|
||||
@ -1008,13 +988,10 @@ retry:
|
||||
} else {
|
||||
lock_node:
|
||||
ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read);
|
||||
if (unlikely(ret)) {
|
||||
if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
|
||||
goto retry;
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ERR_PTR(ret);
|
||||
BUG();
|
||||
}
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ERR_PTR(ret);
|
||||
|
||||
BUG_ON(ret);
|
||||
|
||||
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
|
||||
b->c.btree_id != btree_id ||
|
||||
|
@ -536,9 +536,9 @@ static int bch2_repair_topology(struct bch_fs *c)
|
||||
if (btree_node_fake(b))
|
||||
continue;
|
||||
|
||||
six_unlock_read(&b->c.lock);
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
|
||||
ret = bch2_btree_repair_topology_recurse(&trans, b);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
if (ret == DROP_THIS_NODE) {
|
||||
bch_err(c, "empty btree root - repair unimplemented");
|
||||
|
@ -513,9 +513,11 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
|
||||
struct btree *b, struct bset *i,
|
||||
unsigned offset, int write)
|
||||
{
|
||||
prt_printf(out, "error validating btree node ");
|
||||
if (write)
|
||||
prt_printf(out, "before write ");
|
||||
prt_printf(out, bch2_log_msg(c, ""));
|
||||
if (!write)
|
||||
prt_str(out, "error validating btree node ");
|
||||
else
|
||||
prt_str(out, "corrupt btree node before write ");
|
||||
if (ca)
|
||||
prt_printf(out, "on %s ", ca->name);
|
||||
prt_printf(out, "at btree ");
|
||||
@ -524,6 +526,7 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, "\n node offset %u", b->written);
|
||||
if (i)
|
||||
prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s));
|
||||
prt_str(out, ": ");
|
||||
}
|
||||
|
||||
enum btree_err_type {
|
||||
@ -543,7 +546,7 @@ enum btree_validate_ret {
|
||||
struct printbuf out = PRINTBUF; \
|
||||
\
|
||||
btree_err_msg(&out, c, ca, b, i, b->written, write); \
|
||||
prt_printf(&out, ": " msg, ##__VA_ARGS__); \
|
||||
prt_printf(&out, msg, ##__VA_ARGS__); \
|
||||
\
|
||||
if (type == BTREE_ERR_FIXABLE && \
|
||||
write == READ && \
|
||||
@ -552,10 +555,10 @@ enum btree_validate_ret {
|
||||
goto out; \
|
||||
} \
|
||||
\
|
||||
bch2_print_string_as_lines(KERN_ERR, out.buf); \
|
||||
\
|
||||
switch (write) { \
|
||||
case READ: \
|
||||
bch_err(c, "%s", out.buf); \
|
||||
\
|
||||
switch (type) { \
|
||||
case BTREE_ERR_FIXABLE: \
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed; \
|
||||
@ -575,8 +578,6 @@ enum btree_validate_ret {
|
||||
} \
|
||||
break; \
|
||||
case WRITE: \
|
||||
bch_err(c, "corrupt metadata before write: %s", out.buf);\
|
||||
\
|
||||
if (bch2_fs_inconsistent(c)) { \
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed; \
|
||||
goto fsck_err; \
|
||||
|
@ -179,7 +179,7 @@ static void bch2_btree_path_verify_level(struct btree_trans *trans,
|
||||
if (!btree_path_node(path, level))
|
||||
return;
|
||||
|
||||
if (!bch2_btree_node_relock(trans, path, level))
|
||||
if (!bch2_btree_node_relock_notrace(trans, path, level))
|
||||
return;
|
||||
|
||||
BUG_ON(!btree_path_pos_in_node(path, l->b));
|
||||
@ -627,61 +627,6 @@ static inline bool btree_path_advance_to_pos(struct btree_path *path,
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify that iterator for parent node points to child node:
|
||||
*/
|
||||
static void btree_path_verify_new_node(struct btree_trans *trans,
|
||||
struct btree_path *path, struct btree *b)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_path_level *l;
|
||||
unsigned plevel;
|
||||
bool parent_locked;
|
||||
struct bkey_packed *k;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
|
||||
return;
|
||||
|
||||
if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
|
||||
return;
|
||||
|
||||
plevel = b->c.level + 1;
|
||||
if (!btree_path_node(path, plevel))
|
||||
return;
|
||||
|
||||
parent_locked = btree_node_locked(path, plevel);
|
||||
|
||||
if (!bch2_btree_node_relock(trans, path, plevel))
|
||||
return;
|
||||
|
||||
l = &path->l[plevel];
|
||||
k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
|
||||
if (!k ||
|
||||
bkey_deleted(k) ||
|
||||
bkey_cmp_left_packed(l->b, k, &b->key.k.p)) {
|
||||
struct printbuf buf1 = PRINTBUF;
|
||||
struct printbuf buf2 = PRINTBUF;
|
||||
struct printbuf buf3 = PRINTBUF;
|
||||
struct printbuf buf4 = PRINTBUF;
|
||||
struct bkey uk = bkey_unpack_key(b, k);
|
||||
|
||||
bch2_dump_btree_node(c, l->b);
|
||||
bch2_bpos_to_text(&buf1, path->pos);
|
||||
bch2_bkey_to_text(&buf2, &uk);
|
||||
bch2_bpos_to_text(&buf3, b->data->min_key);
|
||||
bch2_bpos_to_text(&buf3, b->data->max_key);
|
||||
panic("parent iter doesn't point to new node:\n"
|
||||
"iter pos %s %s\n"
|
||||
"iter key %s\n"
|
||||
"new node %s-%s\n",
|
||||
bch2_btree_ids[path->btree_id],
|
||||
buf1.buf, buf2.buf, buf3.buf, buf4.buf);
|
||||
}
|
||||
|
||||
if (!parent_locked)
|
||||
btree_node_unlock(trans, path, plevel);
|
||||
}
|
||||
|
||||
static inline void __btree_path_level_init(struct btree_path *path,
|
||||
unsigned level)
|
||||
{
|
||||
@ -697,14 +642,12 @@ static inline void __btree_path_level_init(struct btree_path *path,
|
||||
bch2_btree_node_iter_peek(&l->iter, l->b);
|
||||
}
|
||||
|
||||
static inline void btree_path_level_init(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree *b)
|
||||
inline void bch2_btree_path_level_init(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree *b)
|
||||
{
|
||||
BUG_ON(path->cached);
|
||||
|
||||
btree_path_verify_new_node(trans, path, b);
|
||||
|
||||
EBUG_ON(!btree_path_pos_in_node(path, b));
|
||||
EBUG_ON(b->c.lock.state.seq & 1);
|
||||
|
||||
@ -736,7 +679,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
|
||||
mark_btree_node_locked(trans, path, b->c.level, t);
|
||||
}
|
||||
|
||||
btree_path_level_init(trans, path, b);
|
||||
bch2_btree_path_level_init(trans, path, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -754,16 +697,6 @@ void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
|
||||
|
||||
/* Btree path: traverse, set_pos: */
|
||||
|
||||
static int lock_root_check_fn(struct six_lock *lock, void *p)
|
||||
{
|
||||
struct btree *b = container_of(lock, struct btree, c.lock);
|
||||
struct btree **rootp = p;
|
||||
|
||||
if (b != *rootp)
|
||||
return BCH_ERR_lock_fail_root_changed;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int btree_path_lock_root(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned depth_want,
|
||||
@ -795,10 +728,8 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
lock_type = __btree_lock_want(path, path->level);
|
||||
ret = btree_node_lock(trans, path, &b->c, SPOS_MAX,
|
||||
path->level, lock_type,
|
||||
lock_root_check_fn, rootp,
|
||||
trace_ip);
|
||||
ret = btree_node_lock(trans, path, &b->c,
|
||||
path->level, lock_type, trace_ip);
|
||||
if (unlikely(ret)) {
|
||||
if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed))
|
||||
continue;
|
||||
@ -817,7 +748,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
|
||||
path->l[i].b = NULL;
|
||||
|
||||
mark_btree_node_locked(trans, path, path->level, lock_type);
|
||||
btree_path_level_init(trans, path, b);
|
||||
bch2_btree_path_level_init(trans, path, b);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -990,7 +921,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
|
||||
|
||||
mark_btree_node_locked(trans, path, level, lock_type);
|
||||
path->level = level;
|
||||
btree_path_level_init(trans, path, b);
|
||||
bch2_btree_path_level_init(trans, path, b);
|
||||
|
||||
bch2_btree_path_verify_locks(path);
|
||||
err:
|
||||
@ -1006,7 +937,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_path *path;
|
||||
unsigned long trace_ip = _RET_IP_;
|
||||
int i, ret = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (trans->in_traverse_all)
|
||||
return -BCH_ERR_transaction_restart_in_traverse_all;
|
||||
@ -1021,17 +952,6 @@ retry_all:
|
||||
|
||||
btree_trans_verify_sorted(trans);
|
||||
|
||||
for (i = trans->nr_sorted - 2; i >= 0; --i) {
|
||||
struct btree_path *path1 = trans->paths + trans->sorted[i];
|
||||
struct btree_path *path2 = trans->paths + trans->sorted[i + 1];
|
||||
|
||||
if (path1->btree_id == path2->btree_id &&
|
||||
path1->locks_want < path2->locks_want)
|
||||
__bch2_btree_path_upgrade(trans, path1, path2->locks_want);
|
||||
else if (!path1->locks_want && path2->locks_want)
|
||||
__bch2_btree_path_upgrade(trans, path1, 1);
|
||||
}
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
cond_resched();
|
||||
|
||||
@ -1120,7 +1040,7 @@ static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
|
||||
int check_pos)
|
||||
{
|
||||
unsigned i, l = path->level;
|
||||
|
||||
again:
|
||||
while (btree_path_node(path, l) &&
|
||||
!btree_path_good_node(trans, path, l, check_pos))
|
||||
__btree_path_set_level_up(trans, path, l++);
|
||||
@ -1129,9 +1049,11 @@ static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
|
||||
for (i = l + 1;
|
||||
i < path->locks_want && btree_path_node(path, i);
|
||||
i++)
|
||||
if (!bch2_btree_node_relock(trans, path, i))
|
||||
if (!bch2_btree_node_relock(trans, path, i)) {
|
||||
while (l <= i)
|
||||
__btree_path_set_level_up(trans, path, l++);
|
||||
goto again;
|
||||
}
|
||||
|
||||
return l;
|
||||
}
|
||||
@ -1175,6 +1097,9 @@ static int btree_path_traverse_one(struct btree_trans *trans,
|
||||
|
||||
path->level = btree_path_up_until_good_node(trans, path, 0);
|
||||
|
||||
EBUG_ON(btree_path_node(path, path->level) &&
|
||||
!btree_node_locked(path, path->level));
|
||||
|
||||
/*
|
||||
* Note: path->nodes[path->level] may be temporarily NULL here - that
|
||||
* would indicate to other code that we got to the end of the btree,
|
||||
@ -1431,7 +1356,7 @@ void bch2_dump_trans_updates(struct btree_trans *trans)
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_trans_updates_to_text(&buf, trans);
|
||||
bch_err(trans->c, "%s", buf.buf);
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
@ -1467,11 +1392,10 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_trans_paths_to_text(&buf, trans);
|
||||
bch2_trans_updates_to_text(&buf, trans);
|
||||
|
||||
printk(KERN_ERR "%s", buf.buf);
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
||||
bch2_dump_trans_updates(trans);
|
||||
}
|
||||
|
||||
noinline
|
||||
@ -1485,7 +1409,8 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans)
|
||||
if (!buf.allocation_failure) {
|
||||
mutex_lock(&s->lock);
|
||||
if (s->nr_max_paths < hweight64(trans->paths_allocated)) {
|
||||
s->nr_max_paths = hweight64(trans->paths_allocated);
|
||||
s->nr_max_paths = trans->nr_max_paths =
|
||||
hweight64(trans->paths_allocated);
|
||||
swap(s->max_paths_text, buf.buf);
|
||||
}
|
||||
mutex_unlock(&s->lock);
|
||||
@ -1494,23 +1419,26 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans)
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
static struct btree_path *btree_path_alloc(struct btree_trans *trans,
|
||||
struct btree_path *pos)
|
||||
static noinline void btree_path_overflow(struct btree_trans *trans)
|
||||
{
|
||||
bch2_dump_trans_paths_updates(trans);
|
||||
panic("trans path oveflow\n");
|
||||
}
|
||||
|
||||
static inline struct btree_path *btree_path_alloc(struct btree_trans *trans,
|
||||
struct btree_path *pos)
|
||||
{
|
||||
struct btree_transaction_stats *s = btree_trans_stats(trans);
|
||||
struct btree_path *path;
|
||||
unsigned idx;
|
||||
|
||||
if (unlikely(trans->paths_allocated ==
|
||||
~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) {
|
||||
bch2_dump_trans_paths_updates(trans);
|
||||
panic("trans path oveflow\n");
|
||||
}
|
||||
~((~0ULL << 1) << (BTREE_ITER_MAX - 1))))
|
||||
btree_path_overflow(trans);
|
||||
|
||||
idx = __ffs64(~trans->paths_allocated);
|
||||
trans->paths_allocated |= 1ULL << idx;
|
||||
|
||||
if (s && unlikely(hweight64(trans->paths_allocated) > s->nr_max_paths))
|
||||
if (unlikely(idx > trans->nr_max_paths))
|
||||
bch2_trans_update_max_paths(trans);
|
||||
|
||||
path = &trans->paths[idx];
|
||||
@ -2649,15 +2577,18 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
|
||||
iter->key_cache_path = NULL;
|
||||
}
|
||||
|
||||
static void __bch2_trans_iter_init(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
unsigned btree_id, struct bpos pos,
|
||||
unsigned locks_want,
|
||||
unsigned depth,
|
||||
unsigned flags,
|
||||
unsigned long ip)
|
||||
static inline void __bch2_trans_iter_init(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
unsigned btree_id, struct bpos pos,
|
||||
unsigned locks_want,
|
||||
unsigned depth,
|
||||
unsigned flags,
|
||||
unsigned long ip)
|
||||
{
|
||||
EBUG_ON(trans->restarted);
|
||||
if (trans->restarted)
|
||||
panic("bch2_trans_iter_init(): in transaction restart, %s by %pS\n",
|
||||
bch2_err_str(trans->restarted),
|
||||
(void *) trans->last_restarted_ip);
|
||||
|
||||
if (flags & BTREE_ITER_ALL_LEVELS)
|
||||
flags |= BTREE_ITER_ALL_SNAPSHOTS|__BTREE_ITER_ALL_SNAPSHOTS;
|
||||
@ -2742,37 +2673,34 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
|
||||
dst->key_cache_path = NULL;
|
||||
}
|
||||
|
||||
void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
{
|
||||
unsigned new_top = trans->mem_top + size;
|
||||
size_t old_bytes = trans->mem_bytes;
|
||||
size_t new_bytes = roundup_pow_of_two(new_top);
|
||||
void *new_mem;
|
||||
void *p;
|
||||
|
||||
trans->mem_max = max(trans->mem_max, new_top);
|
||||
|
||||
if (new_top > trans->mem_bytes) {
|
||||
size_t old_bytes = trans->mem_bytes;
|
||||
size_t new_bytes = roundup_pow_of_two(new_top);
|
||||
void *new_mem;
|
||||
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
|
||||
|
||||
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
|
||||
new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
|
||||
if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
|
||||
new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
|
||||
new_bytes = BTREE_TRANS_MEM_MAX;
|
||||
kfree(trans->mem);
|
||||
}
|
||||
|
||||
new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
|
||||
if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
|
||||
new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
|
||||
new_bytes = BTREE_TRANS_MEM_MAX;
|
||||
kfree(trans->mem);
|
||||
}
|
||||
if (!new_mem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (!new_mem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
trans->mem = new_mem;
|
||||
trans->mem_bytes = new_bytes;
|
||||
|
||||
trans->mem = new_mem;
|
||||
trans->mem_bytes = new_bytes;
|
||||
|
||||
if (old_bytes) {
|
||||
trace_and_count(trans->c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
|
||||
}
|
||||
if (old_bytes) {
|
||||
trace_and_count(trans->c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
|
||||
}
|
||||
|
||||
p = trans->mem + trans->mem_top;
|
||||
@ -2898,8 +2826,9 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *
|
||||
trans->c = c;
|
||||
trans->fn = fn;
|
||||
trans->last_begin_time = ktime_get_ns();
|
||||
trans->task = current;
|
||||
trans->fn_idx = bch2_trans_get_fn_idx(trans, c, fn);
|
||||
trans->locking_wait.task = current;
|
||||
closure_init_stack(&trans->ref);
|
||||
|
||||
bch2_trans_alloc_paths(trans, c);
|
||||
|
||||
@ -2909,6 +2838,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *
|
||||
|
||||
trans->mem_bytes = roundup_pow_of_two(expected_mem_bytes);
|
||||
trans->mem = kmalloc(trans->mem_bytes, GFP_KERNEL|__GFP_NOFAIL);
|
||||
trans->nr_max_paths = s->nr_max_paths;
|
||||
|
||||
if (!unlikely(trans->mem)) {
|
||||
trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
|
||||
@ -2920,7 +2850,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *
|
||||
|
||||
mutex_lock(&c->btree_trans_lock);
|
||||
list_for_each_entry(pos, &c->btree_trans_list, list) {
|
||||
if (trans->task->pid < pos->task->pid) {
|
||||
if (trans->locking_wait.task->pid < pos->locking_wait.task->pid) {
|
||||
list_add_tail(&trans->list, &pos->list);
|
||||
goto list_add_done;
|
||||
}
|
||||
@ -2961,6 +2891,8 @@ void bch2_trans_exit(struct btree_trans *trans)
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
closure_sync(&trans->ref);
|
||||
|
||||
if (s)
|
||||
s->max_mem = max(s->max_mem, trans->mem_max);
|
||||
|
||||
@ -3009,8 +2941,8 @@ void bch2_trans_exit(struct btree_trans *trans)
|
||||
}
|
||||
|
||||
static void __maybe_unused
|
||||
bch2_btree_path_node_to_text(struct printbuf *out,
|
||||
struct btree_bkey_cached_common *b)
|
||||
bch2_btree_bkey_cached_common_to_text(struct printbuf *out,
|
||||
struct btree_bkey_cached_common *b)
|
||||
{
|
||||
struct six_lock_count c = six_lock_counts(&b->lock);
|
||||
struct task_struct *owner;
|
||||
@ -3021,11 +2953,13 @@ bch2_btree_path_node_to_text(struct printbuf *out,
|
||||
pid = owner ? owner->pid : 0;;
|
||||
rcu_read_unlock();
|
||||
|
||||
prt_printf(out, " l=%u %s:",
|
||||
b->level, bch2_btree_ids[b->btree_id]);
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%px %c l=%u %s:", b, b->cached ? 'c' : 'b',
|
||||
b->level, bch2_btree_ids[b->btree_id]);
|
||||
bch2_bpos_to_text(out, btree_node_pos(b));
|
||||
|
||||
prt_printf(out, " locks %u:%u:%u held by pid %u",
|
||||
prt_tab(out);
|
||||
prt_printf(out, " locks %u:%u:%u held by pid %u",
|
||||
c.n[0], c.n[1], c.n[2], pid);
|
||||
}
|
||||
|
||||
@ -3036,7 +2970,12 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
|
||||
static char lock_types[] = { 'r', 'i', 'w' };
|
||||
unsigned l;
|
||||
|
||||
prt_printf(out, "%i %s\n", trans->task->pid, trans->fn);
|
||||
if (!out->nr_tabstops) {
|
||||
printbuf_tabstop_push(out, 16);
|
||||
printbuf_tabstop_push(out, 32);
|
||||
}
|
||||
|
||||
prt_printf(out, "%i %s\n", trans->locking_wait.task->pid, trans->fn);
|
||||
|
||||
trans_for_each_path(trans, path) {
|
||||
if (!path->nodes_locked)
|
||||
@ -3048,33 +2987,26 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
|
||||
path->level,
|
||||
bch2_btree_ids[path->btree_id]);
|
||||
bch2_bpos_to_text(out, path->pos);
|
||||
prt_printf(out, "\n");
|
||||
prt_newline(out);
|
||||
|
||||
for (l = 0; l < BTREE_MAX_DEPTH; l++) {
|
||||
if (btree_node_locked(path, l) &&
|
||||
!IS_ERR_OR_NULL(b = (void *) READ_ONCE(path->l[l].b))) {
|
||||
prt_printf(out, " %c l=%u ",
|
||||
lock_types[btree_node_locked_type(path, l)], l);
|
||||
bch2_btree_path_node_to_text(out, b);
|
||||
prt_printf(out, "\n");
|
||||
bch2_btree_bkey_cached_common_to_text(out, b);
|
||||
prt_newline(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
b = READ_ONCE(trans->locking);
|
||||
if (b) {
|
||||
path = &trans->paths[trans->locking_path_idx];
|
||||
prt_printf(out, " locking path %u %c l=%u %c %s:",
|
||||
trans->locking_path_idx,
|
||||
path->cached ? 'c' : 'b',
|
||||
trans->locking_level,
|
||||
lock_types[trans->locking_lock_type],
|
||||
bch2_btree_ids[trans->locking_btree_id]);
|
||||
bch2_bpos_to_text(out, trans->locking_pos);
|
||||
|
||||
prt_printf(out, " node ");
|
||||
bch2_btree_path_node_to_text(out, b);
|
||||
prt_printf(out, "\n");
|
||||
prt_str(out, " want");
|
||||
prt_newline(out);
|
||||
prt_printf(out, " %c", lock_types[trans->locking_wait.lock_want]);
|
||||
bch2_btree_bkey_cached_common_to_text(out, b);
|
||||
prt_newline(out);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -74,11 +74,14 @@ __trans_next_path(struct btree_trans *trans, unsigned idx)
|
||||
|
||||
void bch2_btree_path_check_sort(struct btree_trans *, struct btree_path *, int);
|
||||
|
||||
#define trans_for_each_path(_trans, _path) \
|
||||
for (_path = __trans_next_path((_trans), 0); \
|
||||
#define trans_for_each_path_from(_trans, _path, _start) \
|
||||
for (_path = __trans_next_path((_trans), _start); \
|
||||
(_path); \
|
||||
_path = __trans_next_path((_trans), (_path)->idx + 1))
|
||||
|
||||
#define trans_for_each_path(_trans, _path) \
|
||||
trans_for_each_path_from(_trans, _path, 0)
|
||||
|
||||
static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path)
|
||||
{
|
||||
unsigned idx = path ? path->sorted_idx + 1 : 0;
|
||||
@ -143,6 +146,9 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bke
|
||||
struct bkey_i *bch2_btree_journal_peek_slot(struct btree_trans *,
|
||||
struct btree_iter *, struct bpos);
|
||||
|
||||
inline void bch2_btree_path_level_init(struct btree_trans *,
|
||||
struct btree_path *, struct btree *);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void bch2_trans_verify_paths(struct btree_trans *);
|
||||
void bch2_assert_pos_locked(struct btree_trans *, enum btree_id,
|
||||
@ -286,7 +292,23 @@ static inline void set_btree_iter_dontneed(struct btree_iter *iter)
|
||||
iter->path->preserve = false;
|
||||
}
|
||||
|
||||
void *bch2_trans_kmalloc(struct btree_trans *, size_t);
|
||||
void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
|
||||
|
||||
static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
{
|
||||
unsigned new_top = trans->mem_top + size;
|
||||
void *p = trans->mem + trans->mem_top;
|
||||
|
||||
if (likely(new_top <= trans->mem_bytes)) {
|
||||
trans->mem_top += size;
|
||||
memset(p, 0, size);
|
||||
return p;
|
||||
} else {
|
||||
return __bch2_trans_kmalloc(trans, size);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
u32 bch2_trans_begin(struct btree_trans *);
|
||||
|
||||
static inline struct btree *
|
||||
|
@ -398,20 +398,9 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bkey_cached_check_fn(struct six_lock *lock, void *p)
|
||||
{
|
||||
struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
|
||||
const struct btree_path *path = p;
|
||||
|
||||
if (ck->key.btree_id != path->btree_id &&
|
||||
bpos_cmp(ck->key.pos, path->pos))
|
||||
return BCH_ERR_lock_fail_node_reused;
|
||||
return 0;
|
||||
}
|
||||
|
||||
__flatten
|
||||
int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
|
||||
unsigned flags)
|
||||
noinline static int
|
||||
bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_cached *ck;
|
||||
@ -440,16 +429,12 @@ retry:
|
||||
} else {
|
||||
enum six_lock_type lock_want = __btree_lock_want(path, 0);
|
||||
|
||||
ret = btree_node_lock(trans, path, (void *) ck, path->pos, 0,
|
||||
lock_want,
|
||||
bkey_cached_check_fn, path, _THIS_IP_);
|
||||
if (ret) {
|
||||
if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
|
||||
goto retry;
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
goto err;
|
||||
BUG();
|
||||
}
|
||||
ret = btree_node_lock(trans, path, (void *) ck, 0,
|
||||
lock_want, _THIS_IP_);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
goto err;
|
||||
|
||||
BUG_ON(ret);
|
||||
|
||||
if (ck->key.btree_id != path->btree_id ||
|
||||
bpos_cmp(ck->key.pos, path->pos)) {
|
||||
@ -496,6 +481,60 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_cached *ck;
|
||||
int ret = 0;
|
||||
|
||||
EBUG_ON(path->level);
|
||||
|
||||
path->l[1].b = NULL;
|
||||
|
||||
if (bch2_btree_node_relock(trans, path, 0)) {
|
||||
ck = (void *) path->l[0].b;
|
||||
goto fill;
|
||||
}
|
||||
retry:
|
||||
ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
|
||||
if (!ck) {
|
||||
return bch2_btree_path_traverse_cached_slowpath(trans, path, flags);
|
||||
} else {
|
||||
enum six_lock_type lock_want = __btree_lock_want(path, 0);
|
||||
|
||||
ret = btree_node_lock(trans, path, (void *) ck, 0,
|
||||
lock_want, _THIS_IP_);
|
||||
EBUG_ON(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart));
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (ck->key.btree_id != path->btree_id ||
|
||||
bpos_cmp(ck->key.pos, path->pos)) {
|
||||
six_unlock_type(&ck->c.lock, lock_want);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
mark_btree_node_locked(trans, path, 0, lock_want);
|
||||
}
|
||||
|
||||
path->l[0].lock_seq = ck->c.lock.state.seq;
|
||||
path->l[0].b = (void *) ck;
|
||||
fill:
|
||||
if (!ck->valid)
|
||||
return bch2_btree_path_traverse_cached_slowpath(trans, path, flags);
|
||||
|
||||
if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
|
||||
set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
|
||||
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
EBUG_ON(!ck->valid);
|
||||
EBUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
struct bkey_cached_key key,
|
||||
u64 journal_seq,
|
||||
|
@ -52,10 +52,257 @@ void bch2_btree_node_unlock_write(struct btree_trans *trans,
|
||||
|
||||
/* lock */
|
||||
|
||||
void __bch2_btree_node_lock_write(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b)
|
||||
/*
|
||||
* @trans wants to lock @b with type @type
|
||||
*/
|
||||
struct trans_waiting_for_lock {
|
||||
struct btree_trans *trans;
|
||||
struct btree_bkey_cached_common *node_want;
|
||||
enum six_lock_type lock_want;
|
||||
|
||||
/* for iterating over held locks :*/
|
||||
u8 path_idx;
|
||||
u8 level;
|
||||
u64 lock_start_time;
|
||||
};
|
||||
|
||||
struct lock_graph {
|
||||
struct trans_waiting_for_lock g[8];
|
||||
unsigned nr;
|
||||
};
|
||||
|
||||
static void lock_graph_pop(struct lock_graph *g)
|
||||
{
|
||||
closure_put(&g->g[--g->nr].trans->ref);
|
||||
}
|
||||
|
||||
static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
|
||||
{
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
prt_printf(out, "Found lock cycle (%u entries):", g->nr);
|
||||
prt_newline(out);
|
||||
|
||||
for (i = g->g; i < g->g + g->nr; i++)
|
||||
bch2_btree_trans_to_text(out, i->trans);
|
||||
}
|
||||
|
||||
static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (i == g->g) {
|
||||
trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_);
|
||||
ret = btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
|
||||
} else {
|
||||
i->trans->lock_must_abort = true;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
for (i = g->g + 1; i < g->g + g->nr; i++)
|
||||
wake_up_process(i->trans->locking_wait.task);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int break_cycle(struct lock_graph *g)
|
||||
{
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
for (i = g->g; i < g->g + g->nr; i++) {
|
||||
if (i->trans->lock_may_not_fail ||
|
||||
i->trans->locking_wait.lock_want == SIX_LOCK_write)
|
||||
continue;
|
||||
|
||||
return abort_lock(g, i);
|
||||
}
|
||||
|
||||
for (i = g->g; i < g->g + g->nr; i++) {
|
||||
if (i->trans->lock_may_not_fail ||
|
||||
!i->trans->in_traverse_all)
|
||||
continue;
|
||||
|
||||
return abort_lock(g, i);
|
||||
}
|
||||
|
||||
for (i = g->g; i < g->g + g->nr; i++) {
|
||||
if (i->trans->lock_may_not_fail)
|
||||
continue;
|
||||
|
||||
return abort_lock(g, i);
|
||||
}
|
||||
|
||||
BUG();
|
||||
}
|
||||
|
||||
static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
|
||||
struct printbuf *cycle)
|
||||
{
|
||||
struct btree_trans *orig_trans = g->g->trans;
|
||||
struct trans_waiting_for_lock *i;
|
||||
int ret = 0;
|
||||
|
||||
for (i = g->g; i < g->g + g->nr; i++) {
|
||||
if (i->trans->locking != i->node_want)
|
||||
while (g->g + g->nr >= i) {
|
||||
lock_graph_pop(g);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (i->trans == trans) {
|
||||
if (cycle) {
|
||||
/* Only checking: */
|
||||
print_cycle(cycle, g);
|
||||
ret = -1;
|
||||
} else {
|
||||
ret = break_cycle(g);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto deadlock;
|
||||
/*
|
||||
* If we didn't abort (instead telling another
|
||||
* transaction to abort), keep checking:
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
if (g->nr == ARRAY_SIZE(g->g)) {
|
||||
if (orig_trans->lock_may_not_fail)
|
||||
return 0;
|
||||
|
||||
trace_and_count(trans->c, trans_restart_would_deadlock_recursion_limit, trans, _RET_IP_);
|
||||
ret = btree_trans_restart(orig_trans, BCH_ERR_transaction_restart_deadlock_recursion_limit);
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
closure_get(&trans->ref);
|
||||
|
||||
g->g[g->nr++] = (struct trans_waiting_for_lock) {
|
||||
.trans = trans,
|
||||
.node_want = trans->locking,
|
||||
.lock_want = trans->locking_wait.lock_want,
|
||||
};
|
||||
|
||||
return 0;
|
||||
deadlock:
|
||||
while (g->nr)
|
||||
lock_graph_pop(g);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline void lock_graph_remove_non_waiters(struct lock_graph *g)
|
||||
{
|
||||
struct trans_waiting_for_lock *i;
|
||||
|
||||
for (i = g->g + 1; i < g->g + g->nr; i++)
|
||||
if (i->trans->locking != i->node_want ||
|
||||
i->trans->locking_wait.start_time != i[-1].lock_start_time) {
|
||||
while (g->g + g->nr >= i)
|
||||
lock_graph_pop(g);
|
||||
return;
|
||||
}
|
||||
BUG();
|
||||
}
|
||||
|
||||
static bool lock_type_conflicts(enum six_lock_type t1, enum six_lock_type t2)
|
||||
{
|
||||
return t1 + t2 > 1;
|
||||
}
|
||||
|
||||
int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
|
||||
{
|
||||
struct lock_graph g;
|
||||
struct trans_waiting_for_lock *top;
|
||||
struct btree_bkey_cached_common *b;
|
||||
struct btree_path *path;
|
||||
int ret;
|
||||
|
||||
if (trans->lock_must_abort) {
|
||||
trace_and_count(trans->c, trans_restart_would_deadlock, trans, _RET_IP_);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
|
||||
}
|
||||
|
||||
g.nr = 0;
|
||||
ret = lock_graph_descend(&g, trans, cycle);
|
||||
BUG_ON(ret);
|
||||
next:
|
||||
if (!g.nr)
|
||||
return 0;
|
||||
|
||||
top = &g.g[g.nr - 1];
|
||||
|
||||
trans_for_each_path_from(top->trans, path, top->path_idx) {
|
||||
if (!path->nodes_locked)
|
||||
continue;
|
||||
|
||||
if (top->path_idx != path->idx) {
|
||||
top->path_idx = path->idx;
|
||||
top->level = 0;
|
||||
top->lock_start_time = 0;
|
||||
}
|
||||
|
||||
for (;
|
||||
top->level < BTREE_MAX_DEPTH;
|
||||
top->level++, top->lock_start_time = 0) {
|
||||
int lock_held = btree_node_locked_type(path, top->level);
|
||||
|
||||
if (lock_held == BTREE_NODE_UNLOCKED)
|
||||
continue;
|
||||
|
||||
b = &READ_ONCE(path->l[top->level].b)->c;
|
||||
|
||||
if (unlikely(IS_ERR_OR_NULL(b))) {
|
||||
lock_graph_remove_non_waiters(&g);
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (list_empty_careful(&b->lock.wait_list))
|
||||
continue;
|
||||
|
||||
raw_spin_lock(&b->lock.wait_lock);
|
||||
list_for_each_entry(trans, &b->lock.wait_list, locking_wait.list) {
|
||||
BUG_ON(b != trans->locking);
|
||||
|
||||
if (top->lock_start_time &&
|
||||
time_after_eq64(top->lock_start_time, trans->locking_wait.start_time))
|
||||
continue;
|
||||
|
||||
top->lock_start_time = trans->locking_wait.start_time;
|
||||
|
||||
/* Don't check for self deadlock: */
|
||||
if (trans == top->trans ||
|
||||
!lock_type_conflicts(lock_held, trans->locking_wait.lock_want))
|
||||
continue;
|
||||
|
||||
ret = lock_graph_descend(&g, trans, cycle);
|
||||
raw_spin_unlock(&b->lock.wait_lock);
|
||||
|
||||
if (ret)
|
||||
return ret < 0 ? ret : 0;
|
||||
goto next;
|
||||
|
||||
}
|
||||
raw_spin_unlock(&b->lock.wait_lock);
|
||||
}
|
||||
}
|
||||
|
||||
lock_graph_pop(&g);
|
||||
goto next;
|
||||
}
|
||||
|
||||
int bch2_six_check_for_deadlock(struct six_lock *lock, void *p)
|
||||
{
|
||||
struct btree_trans *trans = p;
|
||||
|
||||
return bch2_check_for_deadlock(trans, NULL);
|
||||
}
|
||||
|
||||
int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b,
|
||||
bool lock_may_not_fail)
|
||||
{
|
||||
int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->level).n[SIX_LOCK_read];
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Must drop our read locks before calling six_lock_write() -
|
||||
@ -64,98 +311,13 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans,
|
||||
* locked:
|
||||
*/
|
||||
six_lock_readers_add(&b->lock, -readers);
|
||||
btree_node_lock_nopath_nofail(trans, b, SIX_LOCK_write);
|
||||
ret = __btree_node_lock_nopath(trans, b, SIX_LOCK_write, lock_may_not_fail);
|
||||
six_lock_readers_add(&b->lock, readers);
|
||||
}
|
||||
|
||||
static inline bool path_has_read_locks(struct btree_path *path)
|
||||
{
|
||||
unsigned l;
|
||||
if (ret)
|
||||
mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_intent);
|
||||
|
||||
for (l = 0; l < BTREE_MAX_DEPTH; l++)
|
||||
if (btree_node_read_locked(path, l))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Slowpath: */
|
||||
int __bch2_btree_node_lock(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b,
|
||||
struct bpos pos, unsigned level,
|
||||
enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
{
|
||||
struct btree_path *linked;
|
||||
unsigned reason;
|
||||
|
||||
/* Check if it's safe to block: */
|
||||
trans_for_each_path(trans, linked) {
|
||||
if (!linked->nodes_locked)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Can't block taking an intent lock if we have _any_ nodes read
|
||||
* locked:
|
||||
*
|
||||
* - Our read lock blocks another thread with an intent lock on
|
||||
* the same node from getting a write lock, and thus from
|
||||
* dropping its intent lock
|
||||
*
|
||||
* - And the other thread may have multiple nodes intent locked:
|
||||
* both the node we want to intent lock, and the node we
|
||||
* already have read locked - deadlock:
|
||||
*/
|
||||
if (type == SIX_LOCK_intent &&
|
||||
path_has_read_locks(linked)) {
|
||||
reason = 1;
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
if (linked->btree_id != path->btree_id) {
|
||||
if (linked->btree_id < path->btree_id)
|
||||
continue;
|
||||
|
||||
reason = 3;
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Within the same btree, non-cached paths come before cached
|
||||
* paths:
|
||||
*/
|
||||
if (linked->cached != path->cached) {
|
||||
if (!linked->cached)
|
||||
continue;
|
||||
|
||||
reason = 4;
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Interior nodes must be locked before their descendants: if
|
||||
* another path has possible descendants locked of the node
|
||||
* we're about to lock, it must have the ancestors locked too:
|
||||
*/
|
||||
if (level > btree_path_highest_level_locked(linked)) {
|
||||
reason = 5;
|
||||
goto deadlock;
|
||||
}
|
||||
|
||||
/* Must lock btree nodes in key order: */
|
||||
if (btree_node_locked(linked, level) &&
|
||||
bpos_cmp(pos, btree_node_pos(&linked->l[level].b->c)) <= 0) {
|
||||
reason = 7;
|
||||
goto deadlock;
|
||||
}
|
||||
}
|
||||
|
||||
return btree_node_lock_type(trans, path, b, pos, level,
|
||||
type, should_sleep_fn, p);
|
||||
deadlock:
|
||||
trace_and_count(trans->c, trans_restart_would_deadlock, trans, ip, reason, linked, path, &pos);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* relock */
|
||||
@ -205,7 +367,8 @@ static inline bool btree_path_get_locks(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
bool __bch2_btree_node_relock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
struct btree_path *path, unsigned level,
|
||||
bool trace)
|
||||
{
|
||||
struct btree *b = btree_path_node(path, level);
|
||||
int want = __btree_lock_want(path, level);
|
||||
@ -220,7 +383,8 @@ bool __bch2_btree_node_relock(struct btree_trans *trans,
|
||||
return true;
|
||||
}
|
||||
fail:
|
||||
trace_and_count(trans->c, btree_path_relock_fail, trans, _RET_IP_, path, level);
|
||||
if (trace)
|
||||
trace_and_count(trans->c, btree_path_relock_fail, trans, _RET_IP_, path, level);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -230,6 +394,7 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
struct btree *b = path->l[level].b;
|
||||
struct six_lock_count count = bch2_btree_node_lock_counts(trans, path, &b->c, level);
|
||||
|
||||
if (!is_btree_node(path, level))
|
||||
return false;
|
||||
@ -253,11 +418,24 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans,
|
||||
if (race_fault())
|
||||
return false;
|
||||
|
||||
if (btree_node_locked(path, level)
|
||||
? six_lock_tryupgrade(&b->c.lock)
|
||||
: six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
|
||||
goto success;
|
||||
if (btree_node_locked(path, level)) {
|
||||
bool ret;
|
||||
|
||||
six_lock_readers_add(&b->c.lock, -count.n[SIX_LOCK_read]);
|
||||
ret = six_lock_tryupgrade(&b->c.lock);
|
||||
six_lock_readers_add(&b->c.lock, count.n[SIX_LOCK_read]);
|
||||
|
||||
if (ret)
|
||||
goto success;
|
||||
} else {
|
||||
if (six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
|
||||
goto success;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do we already have an intent lock via another path? If so, just bump
|
||||
* lock count:
|
||||
*/
|
||||
if (btree_node_lock_seq_matches(path, b, level) &&
|
||||
btree_node_lock_increment(trans, &b->c, level, BTREE_NODE_INTENT_LOCKED)) {
|
||||
btree_node_unlock(trans, path, level);
|
||||
|
@ -184,49 +184,44 @@ bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_pat
|
||||
void bch2_btree_node_unlock_write(struct btree_trans *,
|
||||
struct btree_path *, struct btree *);
|
||||
|
||||
int bch2_six_check_for_deadlock(struct six_lock *lock, void *p);
|
||||
|
||||
/* lock: */
|
||||
|
||||
static inline int __btree_node_lock_nopath(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b,
|
||||
enum six_lock_type type,
|
||||
bool lock_may_not_fail)
|
||||
{
|
||||
int ret;
|
||||
trans->lock_may_not_fail = lock_may_not_fail;
|
||||
trans->lock_must_abort = false;
|
||||
trans->locking = b;
|
||||
|
||||
ret = six_lock_type_waiter(&b->lock, type, &trans->locking_wait,
|
||||
bch2_six_check_for_deadlock, trans);
|
||||
WRITE_ONCE(trans->locking, NULL);
|
||||
WRITE_ONCE(trans->locking_wait.start_time, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int __must_check
|
||||
btree_node_lock_nopath(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b,
|
||||
enum six_lock_type type)
|
||||
{
|
||||
six_lock_type(&b->lock, type, NULL, NULL);
|
||||
return 0;
|
||||
return __btree_node_lock_nopath(trans, b, type, false);
|
||||
}
|
||||
|
||||
static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
|
||||
struct btree_bkey_cached_common *b,
|
||||
enum six_lock_type type)
|
||||
{
|
||||
int ret = btree_node_lock_nopath(trans, b, type);
|
||||
int ret = __btree_node_lock_nopath(trans, b, type, true);
|
||||
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
static inline int btree_node_lock_type(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b,
|
||||
struct bpos pos, unsigned level,
|
||||
enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (six_trylock_type(&b->lock, type))
|
||||
return 0;
|
||||
|
||||
trans->locking_path_idx = path->idx;
|
||||
trans->locking_pos = pos;
|
||||
trans->locking_btree_id = path->btree_id;
|
||||
trans->locking_level = level;
|
||||
trans->locking_lock_type = type;
|
||||
trans->locking = b;
|
||||
ret = six_lock_type(&b->lock, type, should_sleep_fn, p);
|
||||
trans->locking = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock a btree node if we already have it locked on one of our linked
|
||||
* iterators:
|
||||
@ -248,19 +243,11 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
|
||||
return false;
|
||||
}
|
||||
|
||||
int __bch2_btree_node_lock(struct btree_trans *, struct btree_path *,
|
||||
struct btree_bkey_cached_common *,
|
||||
struct bpos, unsigned,
|
||||
enum six_lock_type,
|
||||
six_lock_should_sleep_fn, void *,
|
||||
unsigned long);
|
||||
|
||||
static inline int btree_node_lock(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b,
|
||||
struct bpos pos, unsigned level,
|
||||
unsigned level,
|
||||
enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
{
|
||||
int ret = 0;
|
||||
@ -270,8 +257,7 @@ static inline int btree_node_lock(struct btree_trans *trans,
|
||||
|
||||
if (likely(six_trylock_type(&b->lock, type)) ||
|
||||
btree_node_lock_increment(trans, b, level, type) ||
|
||||
!(ret = __bch2_btree_node_lock(trans, path, b, pos, level, type,
|
||||
should_sleep_fn, p, ip))) {
|
||||
!(ret = btree_node_lock_nopath(trans, b, type))) {
|
||||
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
|
||||
path->l[b->level].lock_taken_time = ktime_get_ns();
|
||||
#endif
|
||||
@ -280,11 +266,13 @@ static inline int btree_node_lock(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __bch2_btree_node_lock_write(struct btree_trans *, struct btree_bkey_cached_common *);
|
||||
int __bch2_btree_node_lock_write(struct btree_trans *, struct btree_path *,
|
||||
struct btree_bkey_cached_common *b, bool);
|
||||
|
||||
static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b)
|
||||
static inline int __btree_node_lock_write(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b,
|
||||
bool lock_may_not_fail)
|
||||
{
|
||||
EBUG_ON(&path->l[b->level].b->c != b);
|
||||
EBUG_ON(path->l[b->level].lock_seq != b->lock.state.seq);
|
||||
@ -297,8 +285,17 @@ static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
|
||||
*/
|
||||
mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_write);
|
||||
|
||||
if (unlikely(!six_trylock_write(&b->lock)))
|
||||
__bch2_btree_node_lock_write(trans, b);
|
||||
return likely(six_trylock_write(&b->lock))
|
||||
? 0
|
||||
: __bch2_btree_node_lock_write(trans, path, b, lock_may_not_fail);
|
||||
}
|
||||
|
||||
static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b)
|
||||
{
|
||||
int ret = __btree_node_lock_write(trans, path, b, true);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
static inline int __must_check
|
||||
@ -306,15 +303,14 @@ bch2_btree_node_lock_write(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b)
|
||||
{
|
||||
bch2_btree_node_lock_write_nofail(trans, path, b);
|
||||
return 0;
|
||||
return __btree_node_lock_write(trans, path, b, false);
|
||||
}
|
||||
|
||||
/* relock: */
|
||||
|
||||
bool bch2_btree_path_relock_norestart(struct btree_trans *,
|
||||
struct btree_path *, unsigned long);
|
||||
bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned);
|
||||
bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned, bool trace);
|
||||
|
||||
static inline bool bch2_btree_node_relock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
@ -325,7 +321,19 @@ static inline bool bch2_btree_node_relock(struct btree_trans *trans,
|
||||
|
||||
return likely(btree_node_locked(path, level)) ||
|
||||
(!IS_ERR_OR_NULL(path->l[level].b) &&
|
||||
__bch2_btree_node_relock(trans, path, level));
|
||||
__bch2_btree_node_relock(trans, path, level, true));
|
||||
}
|
||||
|
||||
static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
EBUG_ON(btree_node_locked(path, level) &&
|
||||
!btree_node_write_locked(path, level) &&
|
||||
btree_node_locked_type(path, level) != __btree_lock_want(path, level));
|
||||
|
||||
return likely(btree_node_locked(path, level)) ||
|
||||
(!IS_ERR_OR_NULL(path->l[level].b) &&
|
||||
__bch2_btree_node_relock(trans, path, level, false));
|
||||
}
|
||||
|
||||
static inline int bch2_btree_path_relock(struct btree_trans *trans,
|
||||
@ -346,15 +354,22 @@ bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
|
||||
bool __bch2_btree_path_upgrade(struct btree_trans *,
|
||||
struct btree_path *, unsigned);
|
||||
|
||||
static inline bool bch2_btree_path_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned new_locks_want)
|
||||
static inline int bch2_btree_path_upgrade(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned new_locks_want)
|
||||
{
|
||||
unsigned old_locks_want = path->locks_want;
|
||||
|
||||
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
|
||||
|
||||
return path->locks_want < new_locks_want
|
||||
? __bch2_btree_path_upgrade(trans, path, new_locks_want)
|
||||
: path->uptodate == BTREE_ITER_UPTODATE;
|
||||
if (path->locks_want < new_locks_want
|
||||
? __bch2_btree_path_upgrade(trans, path, new_locks_want)
|
||||
: path->uptodate == BTREE_ITER_UPTODATE)
|
||||
return 0;
|
||||
|
||||
trace_and_count(trans->c, trans_restart_upgrade, trans, _THIS_IP_, path,
|
||||
old_locks_want, new_locks_want);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
|
||||
}
|
||||
|
||||
/* misc: */
|
||||
@ -389,6 +404,7 @@ struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *,
|
||||
struct btree_bkey_cached_common *b,
|
||||
unsigned);
|
||||
|
||||
int bch2_check_for_deadlock(struct btree_trans *, struct printbuf *);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void bch2_btree_path_verify_locks(struct btree_path *);
|
||||
|
@ -389,15 +389,15 @@ struct btree_trans_commit_hook {
|
||||
struct btree_trans {
|
||||
struct bch_fs *c;
|
||||
const char *fn;
|
||||
struct closure ref;
|
||||
struct list_head list;
|
||||
u64 last_begin_time;
|
||||
|
||||
u8 lock_may_not_fail;
|
||||
u8 lock_must_abort;
|
||||
struct btree_bkey_cached_common *locking;
|
||||
unsigned locking_path_idx;
|
||||
struct bpos locking_pos;
|
||||
u8 locking_btree_id;
|
||||
u8 locking_level;
|
||||
u8 locking_lock_type;
|
||||
struct task_struct *task;
|
||||
struct six_lock_waiter locking_wait;
|
||||
|
||||
int srcu_idx;
|
||||
|
||||
u8 fn_idx;
|
||||
@ -417,6 +417,7 @@ struct btree_trans {
|
||||
* extent:
|
||||
*/
|
||||
unsigned extra_journal_res;
|
||||
unsigned nr_max_paths;
|
||||
|
||||
u64 paths_allocated;
|
||||
|
||||
|
@ -28,6 +28,21 @@ static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
|
||||
struct keylist *, unsigned);
|
||||
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
|
||||
|
||||
static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans,
|
||||
enum btree_id btree_id,
|
||||
unsigned level,
|
||||
struct bpos pos)
|
||||
{
|
||||
struct btree_path *path;
|
||||
|
||||
path = bch2_path_get(trans, btree_id, pos, level + 1, level,
|
||||
BTREE_ITER_INTENT, _THIS_IP_);
|
||||
path = bch2_btree_path_make_mut(trans, path, true, _THIS_IP_);
|
||||
bch2_btree_path_downgrade(trans, path);
|
||||
__bch2_btree_path_unlock(trans, path);
|
||||
return path;
|
||||
}
|
||||
|
||||
/* Debug code: */
|
||||
|
||||
/*
|
||||
@ -608,6 +623,7 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
ret = commit_do(&trans, &as->disk_res, &journal_seq,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_JOURNAL_RECLAIM|
|
||||
JOURNAL_WATERMARK_reserved,
|
||||
btree_update_nodes_written_trans(&trans, as));
|
||||
@ -617,7 +633,10 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
"error %i in btree_update_nodes_written()", ret);
|
||||
err:
|
||||
if (as->b) {
|
||||
struct btree_path *path;
|
||||
|
||||
b = as->b;
|
||||
path = get_unlocked_mut_path(&trans, as->btree_id, b->c.level, b->key.k.p);
|
||||
/*
|
||||
* @b is the node we did the final insert into:
|
||||
*
|
||||
@ -631,7 +650,11 @@ err:
|
||||
*/
|
||||
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent);
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_write);
|
||||
mark_btree_node_locked(&trans, path, b->c.level, SIX_LOCK_intent);
|
||||
bch2_btree_path_level_init(&trans, path, b);
|
||||
|
||||
bch2_btree_node_lock_write_nofail(&trans, path, &b->c);
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
list_del(&as->write_blocked_list);
|
||||
@ -665,10 +688,13 @@ err:
|
||||
}
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
mark_btree_node_locked_noreset(path, b->c.level, SIX_LOCK_intent);
|
||||
six_unlock_write(&b->c.lock);
|
||||
|
||||
btree_node_write_if_need(c, b, SIX_LOCK_intent);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
btree_node_unlock(&trans, path, b->c.level);
|
||||
bch2_path_put(&trans, path, true);
|
||||
}
|
||||
|
||||
bch2_journal_pin_drop(&c->journal, &as->journal);
|
||||
@ -1002,11 +1028,9 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
if (update_level < BTREE_MAX_DEPTH)
|
||||
nr_nodes[1] += 1;
|
||||
|
||||
if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) {
|
||||
trace_and_count(c, trans_restart_iter_upgrade, trans, _RET_IP_, path);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
|
||||
ret = bch2_btree_path_upgrade(trans, path, U8_MAX);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
if (flags & BTREE_INSERT_GC_LOCK_HELD)
|
||||
lockdep_assert_held(&c->gc_lock);
|
||||
@ -1084,16 +1108,16 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
goto err;
|
||||
|
||||
ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL);
|
||||
if (ret == -EAGAIN ||
|
||||
ret == -ENOMEM) {
|
||||
if (bch2_err_matches(ret, ENOSPC) ||
|
||||
bch2_err_matches(ret, ENOMEM)) {
|
||||
struct closure cl;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
do {
|
||||
ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
closure_sync(&cl);
|
||||
} while (ret == -EAGAIN);
|
||||
}
|
||||
@ -1429,6 +1453,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
struct bch_fs *c = as->c;
|
||||
struct btree *parent = btree_node_parent(path, b);
|
||||
struct btree *n1, *n2 = NULL, *n3 = NULL;
|
||||
struct btree_path *path1 = NULL, *path2 = NULL;
|
||||
u64 start_time = local_clock();
|
||||
|
||||
BUG_ON(!parent && (b != btree_node_root(c, b)));
|
||||
@ -1451,6 +1476,16 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
six_unlock_write(&n2->c.lock);
|
||||
six_unlock_write(&n1->c.lock);
|
||||
|
||||
path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p);
|
||||
six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
|
||||
mark_btree_node_locked(trans, path1, n1->c.level, SIX_LOCK_intent);
|
||||
bch2_btree_path_level_init(trans, path1, n1);
|
||||
|
||||
path2 = get_unlocked_mut_path(trans, path->btree_id, n2->c.level, n2->key.k.p);
|
||||
six_lock_increment(&n2->c.lock, SIX_LOCK_intent);
|
||||
mark_btree_node_locked(trans, path2, n2->c.level, SIX_LOCK_intent);
|
||||
bch2_btree_path_level_init(trans, path2, n2);
|
||||
|
||||
bch2_btree_update_add_new_node(as, n1);
|
||||
|
||||
bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
|
||||
@ -1468,6 +1503,12 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
/* Depth increases, make a new root */
|
||||
n3 = __btree_root_alloc(as, trans, b->c.level + 1);
|
||||
|
||||
path2->locks_want++;
|
||||
BUG_ON(btree_node_locked(path2, n3->c.level));
|
||||
six_lock_increment(&n3->c.lock, SIX_LOCK_intent);
|
||||
mark_btree_node_locked(trans, path2, n3->c.level, SIX_LOCK_intent);
|
||||
bch2_btree_path_level_init(trans, path2, n3);
|
||||
|
||||
n3->sib_u64s[0] = U16_MAX;
|
||||
n3->sib_u64s[1] = U16_MAX;
|
||||
|
||||
@ -1481,6 +1522,11 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
bch2_btree_build_aux_trees(n1);
|
||||
six_unlock_write(&n1->c.lock);
|
||||
|
||||
path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p);
|
||||
six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
|
||||
mark_btree_node_locked(trans, path1, n1->c.level, SIX_LOCK_intent);
|
||||
bch2_btree_path_level_init(trans, path1, n1);
|
||||
|
||||
bch2_btree_update_add_new_node(as, n1);
|
||||
|
||||
bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
|
||||
@ -1527,6 +1573,15 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
six_unlock_intent(&n2->c.lock);
|
||||
six_unlock_intent(&n1->c.lock);
|
||||
|
||||
if (path2) {
|
||||
__bch2_btree_path_unlock(trans, path2);
|
||||
bch2_path_put(trans, path2, true);
|
||||
}
|
||||
if (path1) {
|
||||
__bch2_btree_path_unlock(trans, path1);
|
||||
bch2_path_put(trans, path1, true);
|
||||
}
|
||||
|
||||
bch2_trans_verify_locks(trans);
|
||||
|
||||
bch2_time_stats_update(&c->times[n2
|
||||
@ -1643,7 +1698,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
enum btree_node_sibling sib)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_path *sib_path = NULL;
|
||||
struct btree_path *sib_path = NULL, *new_path = NULL;
|
||||
struct btree_update *as;
|
||||
struct bkey_format_state new_s;
|
||||
struct bkey_format new_f;
|
||||
@ -1767,6 +1822,11 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
bch2_btree_build_aux_trees(n);
|
||||
six_unlock_write(&n->c.lock);
|
||||
|
||||
new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p);
|
||||
six_lock_increment(&n->c.lock, SIX_LOCK_intent);
|
||||
mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
|
||||
bch2_btree_path_level_init(trans, new_path, n);
|
||||
|
||||
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
|
||||
|
||||
bkey_init(&delete.k);
|
||||
@ -1796,6 +1856,8 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time);
|
||||
out:
|
||||
err:
|
||||
if (new_path)
|
||||
bch2_path_put(trans, new_path, true);
|
||||
bch2_path_put(trans, sib_path, true);
|
||||
bch2_trans_verify_locks(trans);
|
||||
return ret;
|
||||
@ -1810,6 +1872,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_path *new_path = NULL;
|
||||
struct btree *n, *parent;
|
||||
struct btree_update *as;
|
||||
int ret;
|
||||
@ -1831,6 +1894,11 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
bch2_btree_build_aux_trees(n);
|
||||
six_unlock_write(&n->c.lock);
|
||||
|
||||
new_path = get_unlocked_mut_path(trans, iter->btree_id, n->c.level, n->key.k.p);
|
||||
six_lock_increment(&n->c.lock, SIX_LOCK_intent);
|
||||
mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
|
||||
bch2_btree_path_level_init(trans, new_path, n);
|
||||
|
||||
trace_and_count(c, btree_node_rewrite, c, b);
|
||||
|
||||
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
|
||||
@ -1851,6 +1919,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
six_unlock_intent(&n->c.lock);
|
||||
|
||||
bch2_btree_update_done(as, trans);
|
||||
bch2_path_put(trans, new_path, true);
|
||||
out:
|
||||
bch2_btree_path_downgrade(trans, iter->path);
|
||||
return ret;
|
||||
@ -2035,9 +2104,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
|
||||
struct closure cl;
|
||||
int ret = 0;
|
||||
|
||||
if (!btree_node_intent_locked(path, b->c.level) &&
|
||||
!bch2_btree_path_upgrade(trans, path, b->c.level + 1))
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
|
||||
ret = bch2_btree_path_upgrade(trans, path, b->c.level + 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
|
@ -734,79 +734,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void path_upgrade_readers(struct btree_trans *trans, struct btree_path *path)
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
for (l = 0; l < BTREE_MAX_DEPTH; l++)
|
||||
if (btree_node_read_locked(path, l))
|
||||
BUG_ON(!bch2_btree_node_upgrade(trans, path, l));
|
||||
}
|
||||
|
||||
static inline void upgrade_readers(struct btree_trans *trans, struct btree_path *path)
|
||||
{
|
||||
struct btree *b = path_l(path)->b;
|
||||
unsigned l;
|
||||
|
||||
do {
|
||||
for (l = 0; l < BTREE_MAX_DEPTH; l++)
|
||||
if (btree_node_read_locked(path, l))
|
||||
path_upgrade_readers(trans, path);
|
||||
} while ((path = prev_btree_path(trans, path)) &&
|
||||
path_l(path)->b == b);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for nodes that we have both read and intent locks on, and upgrade the
|
||||
* readers to intent:
|
||||
*/
|
||||
static inline void normalize_read_intent_locks(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
unsigned i, nr_read = 0, nr_intent = 0;
|
||||
|
||||
trans_for_each_path_inorder(trans, path, i) {
|
||||
struct btree_path *next = i + 1 < trans->nr_sorted
|
||||
? trans->paths + trans->sorted[i + 1]
|
||||
: NULL;
|
||||
|
||||
switch (btree_node_locked_type(path, path->level)) {
|
||||
case BTREE_NODE_READ_LOCKED:
|
||||
nr_read++;
|
||||
break;
|
||||
case BTREE_NODE_INTENT_LOCKED:
|
||||
nr_intent++;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!next || path_l(path)->b != path_l(next)->b) {
|
||||
if (nr_read && nr_intent)
|
||||
upgrade_readers(trans, path);
|
||||
|
||||
nr_read = nr_intent = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_trans_verify_locks(trans);
|
||||
}
|
||||
|
||||
static inline bool have_conflicting_read_lock(struct btree_trans *trans, struct btree_path *pos)
|
||||
{
|
||||
struct btree_path *path;
|
||||
unsigned i;
|
||||
|
||||
trans_for_each_path_inorder(trans, path, i) {
|
||||
//if (path == pos)
|
||||
// break;
|
||||
|
||||
if (btree_node_read_locked(path, path->level) &&
|
||||
!bch2_btree_path_upgrade(trans, path, path->level + 1))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int trans_lock_write(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
@ -816,31 +743,15 @@ static inline int trans_lock_write(struct btree_trans *trans)
|
||||
if (same_leaf_as_prev(trans, i))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* six locks are unfair, and read locks block while a thread
|
||||
* wants a write lock: thus, we need to tell the cycle detector
|
||||
* we have a write lock _before_ taking the lock:
|
||||
*/
|
||||
mark_btree_node_locked_noreset(i->path, i->level, SIX_LOCK_write);
|
||||
|
||||
if (!six_trylock_write(&insert_l(i)->b->c.lock)) {
|
||||
if (have_conflicting_read_lock(trans, i->path))
|
||||
goto fail;
|
||||
|
||||
ret = btree_node_lock_type(trans, i->path,
|
||||
&insert_l(i)->b->c,
|
||||
i->path->pos, i->level,
|
||||
SIX_LOCK_write, NULL, NULL);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
ret = bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c);
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
|
||||
}
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
mark_btree_node_locked_noreset(i->path, i->level, SIX_LOCK_intent);
|
||||
|
||||
while (--i >= trans->updates) {
|
||||
if (same_leaf_as_prev(trans, i))
|
||||
continue;
|
||||
@ -926,8 +837,6 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
normalize_read_intent_locks(trans);
|
||||
|
||||
ret = trans_lock_write(trans);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
@ -1031,9 +940,11 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
|
||||
BUG_ON(ret == -ENOSPC &&
|
||||
!(trans->flags & BTREE_INSERT_NOWAIT) &&
|
||||
(trans->flags & BTREE_INSERT_NOFAIL));
|
||||
|
||||
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) &&
|
||||
!(trans->flags & BTREE_INSERT_NOWAIT) &&
|
||||
(trans->flags & BTREE_INSERT_NOFAIL), c,
|
||||
"%s: incorrectly got %s\n", __func__, bch2_err_str(ret));
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1123,11 +1034,9 @@ int __bch2_trans_commit(struct btree_trans *trans)
|
||||
trans_for_each_update(trans, i) {
|
||||
BUG_ON(!i->path->should_be_locked);
|
||||
|
||||
if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) {
|
||||
trace_and_count(c, trans_restart_upgrade, trans, _RET_IP_, i->path);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
|
||||
ret = bch2_btree_path_upgrade(trans, i->path, i->level + 1);
|
||||
if (unlikely(ret))
|
||||
goto out;
|
||||
}
|
||||
|
||||
BUG_ON(!btree_node_intent_locked(i->path, i->level));
|
||||
|
||||
@ -1191,7 +1100,7 @@ err:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
static int check_pos_snapshot_overwritten(struct btree_trans *trans,
|
||||
static noinline int __check_pos_snapshot_overwritten(struct btree_trans *trans,
|
||||
enum btree_id id,
|
||||
struct bpos pos)
|
||||
{
|
||||
@ -1200,12 +1109,6 @@ static int check_pos_snapshot_overwritten(struct btree_trans *trans,
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
if (!btree_type_has_snapshots(id))
|
||||
return 0;
|
||||
|
||||
if (!snapshot_t(c, pos.snapshot)->children[0])
|
||||
return 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, id, pos,
|
||||
BTREE_ITER_NOT_EXTENTS|
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
@ -1231,6 +1134,18 @@ static int check_pos_snapshot_overwritten(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int check_pos_snapshot_overwritten(struct btree_trans *trans,
|
||||
enum btree_id id,
|
||||
struct bpos pos)
|
||||
{
|
||||
if (!btree_type_has_snapshots(id) ||
|
||||
pos.snapshot == U32_MAX ||
|
||||
!snapshot_t(trans->c, pos.snapshot)->children[0])
|
||||
return 0;
|
||||
|
||||
return __check_pos_snapshot_overwritten(trans, id, pos);
|
||||
}
|
||||
|
||||
int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
struct btree_iter *orig_iter,
|
||||
struct bkey_i *insert,
|
||||
@ -1716,15 +1631,18 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT);
|
||||
retry:
|
||||
while ((k = bch2_btree_iter_peek(&iter)).k &&
|
||||
!(ret = bkey_err(k) ?:
|
||||
btree_trans_too_many_iters(trans)) &&
|
||||
bkey_cmp(iter.pos, end) < 0) {
|
||||
while ((k = bch2_btree_iter_peek(&iter)).k) {
|
||||
struct disk_reservation disk_res =
|
||||
bch2_disk_reservation_init(trans->c, 0);
|
||||
struct bkey_i delete;
|
||||
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (bkey_cmp(iter.pos, end) >= 0)
|
||||
break;
|
||||
|
||||
bkey_init(&delete.k);
|
||||
|
||||
/*
|
||||
@ -1753,23 +1671,27 @@ retry:
|
||||
|
||||
ret = bch2_extent_trim_atomic(trans, &iter, &delete);
|
||||
if (ret)
|
||||
break;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?:
|
||||
bch2_trans_commit(trans, &disk_res, journal_seq,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
bch2_disk_reservation_put(trans->c, &disk_res);
|
||||
err:
|
||||
/*
|
||||
* the bch2_trans_begin() call is in a weird place because we
|
||||
* need to call it after every transaction commit, to avoid path
|
||||
* overflow, but don't want to call it if the delete operation
|
||||
* is a no-op and we have no work to do:
|
||||
*/
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
ret = 0;
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
|
||||
bch2_trans_begin(trans);
|
||||
ret = 0;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (!ret && trans_was_restarted(trans, restart_count))
|
||||
|
@ -1999,7 +1999,7 @@ recalculate:
|
||||
ret = 0;
|
||||
} else {
|
||||
atomic64_set(&c->sectors_available, sectors_available);
|
||||
ret = -ENOSPC;
|
||||
ret = -BCH_ERR_ENOSPC_disk_reservation;
|
||||
}
|
||||
|
||||
mutex_unlock(&c->sectors_available_lock);
|
||||
|
@ -36,7 +36,7 @@ void bch2_sb_counters_to_text(struct printbuf *out, struct bch_sb *sb,
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
if (i < BCH_COUNTER_NR)
|
||||
prt_printf(out, "%s", bch2_counter_names[i]);
|
||||
prt_printf(out, "%s ", bch2_counter_names[i]);
|
||||
else
|
||||
prt_printf(out, "(unknown)");
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "btree_cache.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_iter.h"
|
||||
#include "btree_locking.h"
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
#include "debug.h"
|
||||
@ -534,7 +535,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
|
||||
|
||||
mutex_lock(&c->btree_trans_lock);
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||
if (trans->task->pid <= i->iter)
|
||||
if (trans->locking_wait.task->pid <= i->iter)
|
||||
continue;
|
||||
|
||||
ret = flush_buf(i);
|
||||
@ -546,11 +547,11 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
|
||||
prt_printf(&i->buf, "backtrace:");
|
||||
prt_newline(&i->buf);
|
||||
printbuf_indent_add(&i->buf, 2);
|
||||
prt_backtrace(&i->buf, trans->task);
|
||||
prt_backtrace(&i->buf, trans->locking_wait.task);
|
||||
printbuf_indent_sub(&i->buf, 2);
|
||||
prt_newline(&i->buf);
|
||||
|
||||
i->iter = trans->task->pid;
|
||||
i->iter = trans->locking_wait.task->pid;
|
||||
}
|
||||
mutex_unlock(&c->btree_trans_lock);
|
||||
|
||||
@ -707,6 +708,45 @@ static const struct file_operations lock_held_stats_op = {
|
||||
.read = lock_held_stats_read,
|
||||
};
|
||||
|
||||
static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
|
||||
size_t size, loff_t *ppos)
|
||||
{
|
||||
struct dump_iter *i = file->private_data;
|
||||
struct bch_fs *c = i->c;
|
||||
struct btree_trans *trans;
|
||||
ssize_t ret = 0;
|
||||
|
||||
i->ubuf = buf;
|
||||
i->size = size;
|
||||
i->ret = 0;
|
||||
|
||||
if (i->iter)
|
||||
goto out;
|
||||
|
||||
mutex_lock(&c->btree_trans_lock);
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list)
|
||||
if (bch2_check_for_deadlock(trans, &i->buf)) {
|
||||
i->iter = 1;
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&c->btree_trans_lock);
|
||||
out:
|
||||
if (i->buf.allocation_failure)
|
||||
ret = -ENOMEM;
|
||||
|
||||
if (!ret)
|
||||
ret = flush_buf(i);
|
||||
|
||||
return ret ?: i->ret;
|
||||
}
|
||||
|
||||
static const struct file_operations btree_deadlock_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = bch2_dump_open,
|
||||
.release = bch2_dump_release,
|
||||
.read = bch2_btree_deadlock_read,
|
||||
};
|
||||
|
||||
void bch2_fs_debug_exit(struct bch_fs *c)
|
||||
{
|
||||
if (!IS_ERR_OR_NULL(c->fs_debug_dir))
|
||||
@ -738,6 +778,9 @@ void bch2_fs_debug_init(struct bch_fs *c)
|
||||
debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
|
||||
c, &lock_held_stats_op);
|
||||
|
||||
debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir,
|
||||
c->btree_debug, &btree_deadlock_ops);
|
||||
|
||||
c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
|
||||
if (IS_ERR_OR_NULL(c->btree_debug_dir))
|
||||
return;
|
||||
|
@ -276,7 +276,7 @@ static int __bch2_disk_group_add(struct bch_sb_handle *sb, unsigned parent,
|
||||
|
||||
groups = bch2_sb_resize_disk_groups(sb, u64s);
|
||||
if (!groups)
|
||||
return -ENOSPC;
|
||||
return -BCH_ERR_ENOSPC_disk_label_add;
|
||||
|
||||
nr_groups = disk_groups_nr(groups);
|
||||
}
|
||||
|
@ -731,7 +731,7 @@ static int ec_stripe_bkey_insert(struct btree_trans *trans,
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = -ENOSPC;
|
||||
ret = -BCH_ERR_ENOSPC_stripe_create;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1388,7 +1388,7 @@ static int __bch2_ec_stripe_head_reuse(struct bch_fs *c,
|
||||
idx = get_existing_stripe(c, h);
|
||||
if (idx < 0) {
|
||||
bch_err(c, "failed to find an existing stripe");
|
||||
return -ENOSPC;
|
||||
return -BCH_ERR_ENOSPC_stripe_reuse;
|
||||
}
|
||||
|
||||
h->s->have_existing_stripe = true;
|
||||
|
@ -15,7 +15,7 @@ static const char * const bch2_errcode_strs[] = {
|
||||
#define BCH_ERR_0 0
|
||||
|
||||
static unsigned bch2_errcode_parents[] = {
|
||||
#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = BCH_ERR_##class,
|
||||
#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = class,
|
||||
BCH_ERRCODES()
|
||||
#undef x
|
||||
};
|
||||
@ -49,3 +49,14 @@ bool __bch2_err_matches(int err, int class)
|
||||
|
||||
return err == class;
|
||||
}
|
||||
|
||||
int __bch2_err_class(int err)
|
||||
{
|
||||
err = -err;
|
||||
BUG_ON((unsigned) err >= BCH_ERR_MAX);
|
||||
|
||||
while (err >= BCH_ERR_START && bch2_errcode_parents[err - BCH_ERR_START])
|
||||
err = bch2_errcode_parents[err - BCH_ERR_START];
|
||||
|
||||
return -err;
|
||||
}
|
||||
|
@ -2,53 +2,67 @@
|
||||
#ifndef _BCACHEFS_ERRCODE_H
|
||||
#define _BCACHEFS_ERRCODE_H
|
||||
|
||||
#define BCH_ERRCODES() \
|
||||
x(0, open_buckets_empty) \
|
||||
x(0, freelist_empty) \
|
||||
x(freelist_empty, no_buckets_found) \
|
||||
x(0, insufficient_devices) \
|
||||
x(0, transaction_restart) \
|
||||
x(transaction_restart, transaction_restart_fault_inject) \
|
||||
x(transaction_restart, transaction_restart_relock) \
|
||||
x(transaction_restart, transaction_restart_relock_path) \
|
||||
x(transaction_restart, transaction_restart_relock_path_intent) \
|
||||
x(transaction_restart, transaction_restart_relock_after_fill) \
|
||||
x(transaction_restart, transaction_restart_too_many_iters) \
|
||||
x(transaction_restart, transaction_restart_lock_node_reused) \
|
||||
x(transaction_restart, transaction_restart_fill_relock) \
|
||||
x(transaction_restart, transaction_restart_fill_mem_alloc_fail)\
|
||||
x(transaction_restart, transaction_restart_mem_realloced) \
|
||||
x(transaction_restart, transaction_restart_in_traverse_all) \
|
||||
x(transaction_restart, transaction_restart_would_deadlock) \
|
||||
x(transaction_restart, transaction_restart_would_deadlock_write)\
|
||||
x(transaction_restart, transaction_restart_upgrade) \
|
||||
x(transaction_restart, transaction_restart_key_cache_upgrade) \
|
||||
x(transaction_restart, transaction_restart_key_cache_fill) \
|
||||
x(transaction_restart, transaction_restart_key_cache_raced) \
|
||||
x(transaction_restart, transaction_restart_key_cache_realloced)\
|
||||
x(transaction_restart, transaction_restart_journal_preres_get) \
|
||||
x(transaction_restart, transaction_restart_nested) \
|
||||
x(0, no_btree_node) \
|
||||
x(no_btree_node, no_btree_node_relock) \
|
||||
x(no_btree_node, no_btree_node_upgrade) \
|
||||
x(no_btree_node, no_btree_node_drop) \
|
||||
x(no_btree_node, no_btree_node_lock_root) \
|
||||
x(no_btree_node, no_btree_node_up) \
|
||||
x(no_btree_node, no_btree_node_down) \
|
||||
x(no_btree_node, no_btree_node_init) \
|
||||
x(no_btree_node, no_btree_node_cached) \
|
||||
x(0, backpointer_to_overwritten_btree_node) \
|
||||
x(0, lock_fail_node_reused) \
|
||||
x(0, lock_fail_root_changed) \
|
||||
x(0, journal_reclaim_would_deadlock) \
|
||||
x(0, fsck) \
|
||||
x(fsck, fsck_fix) \
|
||||
x(fsck, fsck_ignore) \
|
||||
x(fsck, fsck_errors_not_fixed) \
|
||||
x(fsck, fsck_repair_unimplemented) \
|
||||
x(fsck, fsck_repair_impossible) \
|
||||
x(0, need_snapshot_cleanup) \
|
||||
x(0, need_topology_repair)
|
||||
#define BCH_ERRCODES() \
|
||||
x(ENOSPC, ENOSPC_disk_reservation) \
|
||||
x(ENOSPC, ENOSPC_bucket_alloc) \
|
||||
x(ENOSPC, ENOSPC_disk_label_add) \
|
||||
x(ENOSPC, ENOSPC_stripe_create) \
|
||||
x(ENOSPC, ENOSPC_stripe_reuse) \
|
||||
x(ENOSPC, ENOSPC_inode_create) \
|
||||
x(ENOSPC, ENOSPC_str_hash_create) \
|
||||
x(ENOSPC, ENOSPC_snapshot_create) \
|
||||
x(ENOSPC, ENOSPC_subvolume_create) \
|
||||
x(ENOSPC, ENOSPC_sb) \
|
||||
x(ENOSPC, ENOSPC_sb_journal) \
|
||||
x(ENOSPC, ENOSPC_sb_quota) \
|
||||
x(ENOSPC, ENOSPC_sb_replicas) \
|
||||
x(ENOSPC, ENOSPC_sb_members) \
|
||||
x(0, open_buckets_empty) \
|
||||
x(0, freelist_empty) \
|
||||
x(BCH_ERR_freelist_empty, no_buckets_found) \
|
||||
x(0, insufficient_devices) \
|
||||
x(0, transaction_restart) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_fault_inject) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_relock) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_relock_path) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_relock_path_intent) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_relock_after_fill) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_too_many_iters) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_lock_node_reused) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_fill_relock) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_fill_mem_alloc_fail)\
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_mem_realloced) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_in_traverse_all) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_deadlock_recursion_limit)\
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_upgrade) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_fill) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \
|
||||
x(BCH_ERR_transaction_restart, transaction_restart_nested) \
|
||||
x(0, no_btree_node) \
|
||||
x(BCH_ERR_no_btree_node, no_btree_node_relock) \
|
||||
x(BCH_ERR_no_btree_node, no_btree_node_upgrade) \
|
||||
x(BCH_ERR_no_btree_node, no_btree_node_drop) \
|
||||
x(BCH_ERR_no_btree_node, no_btree_node_lock_root) \
|
||||
x(BCH_ERR_no_btree_node, no_btree_node_up) \
|
||||
x(BCH_ERR_no_btree_node, no_btree_node_down) \
|
||||
x(BCH_ERR_no_btree_node, no_btree_node_init) \
|
||||
x(BCH_ERR_no_btree_node, no_btree_node_cached) \
|
||||
x(0, backpointer_to_overwritten_btree_node) \
|
||||
x(0, lock_fail_root_changed) \
|
||||
x(0, journal_reclaim_would_deadlock) \
|
||||
x(0, fsck) \
|
||||
x(BCH_ERR_fsck, fsck_fix) \
|
||||
x(BCH_ERR_fsck, fsck_ignore) \
|
||||
x(BCH_ERR_fsck, fsck_errors_not_fixed) \
|
||||
x(BCH_ERR_fsck, fsck_repair_unimplemented) \
|
||||
x(BCH_ERR_fsck, fsck_repair_impossible) \
|
||||
x(0, need_snapshot_cleanup) \
|
||||
x(0, need_topology_repair)
|
||||
|
||||
enum bch_errcode {
|
||||
BCH_ERR_START = 2048,
|
||||
@ -72,4 +86,11 @@ static inline bool _bch2_err_matches(int err, int class)
|
||||
_bch2_err_matches(_err, _class); \
|
||||
})
|
||||
|
||||
int __bch2_err_class(int);
|
||||
|
||||
static inline long bch2_err_class(long err)
|
||||
{
|
||||
return err < 0 ? __bch2_err_class(err) : err;
|
||||
}
|
||||
|
||||
#endif /* _BCACHFES_ERRCODE_H */
|
||||
|
@ -68,102 +68,135 @@ void bch2_io_error(struct bch_dev *ca)
|
||||
#include "tools-util.h"
|
||||
#endif
|
||||
|
||||
int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
|
||||
static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
|
||||
{
|
||||
struct fsck_err_state *s = NULL;
|
||||
va_list args;
|
||||
bool fix = false, print = true, suppressing = false;
|
||||
char _buf[sizeof(s->buf)], *buf = _buf;
|
||||
struct fsck_err_state *s;
|
||||
|
||||
if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
|
||||
va_start(args, fmt);
|
||||
vprintk(fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (c->opts.errors == BCH_ON_ERROR_continue) {
|
||||
bch_err(c, "fixing");
|
||||
return -BCH_ERR_fsck_fix;
|
||||
} else {
|
||||
bch2_inconsistent_error(c);
|
||||
return -BCH_ERR_fsck_errors_not_fixed;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(&c->fsck_error_lock);
|
||||
if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
|
||||
return NULL;
|
||||
|
||||
list_for_each_entry(s, &c->fsck_errors, list)
|
||||
if (s->fmt == fmt)
|
||||
goto found;
|
||||
if (s->fmt == fmt) {
|
||||
/*
|
||||
* move it to the head of the list: repeated fsck errors
|
||||
* are common
|
||||
*/
|
||||
list_move(&s->list, &c->fsck_errors);
|
||||
return s;
|
||||
}
|
||||
|
||||
s = kzalloc(sizeof(*s), GFP_NOFS);
|
||||
if (!s) {
|
||||
if (!c->fsck_alloc_err)
|
||||
bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
|
||||
c->fsck_alloc_err = true;
|
||||
buf = _buf;
|
||||
goto print;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&s->list);
|
||||
s->fmt = fmt;
|
||||
found:
|
||||
list_move(&s->list, &c->fsck_errors);
|
||||
s->nr++;
|
||||
if (c->opts.ratelimit_errors &&
|
||||
!(flags & FSCK_NO_RATELIMIT) &&
|
||||
s->nr >= FSCK_ERR_RATELIMIT_NR) {
|
||||
if (s->nr == FSCK_ERR_RATELIMIT_NR)
|
||||
suppressing = true;
|
||||
else
|
||||
print = false;
|
||||
s->buf = PRINTBUF;
|
||||
list_add(&s->list, &c->fsck_errors);
|
||||
return s;
|
||||
}
|
||||
|
||||
int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
|
||||
{
|
||||
struct fsck_err_state *s = NULL;
|
||||
va_list args;
|
||||
bool print = true, suppressing = false;
|
||||
struct printbuf buf = PRINTBUF, *out = &buf;
|
||||
int ret = -BCH_ERR_fsck_ignore;
|
||||
|
||||
mutex_lock(&c->fsck_error_lock);
|
||||
s = fsck_err_get(c, fmt);
|
||||
if (s) {
|
||||
if (c->opts.ratelimit_errors &&
|
||||
!(flags & FSCK_NO_RATELIMIT) &&
|
||||
s->nr >= FSCK_ERR_RATELIMIT_NR) {
|
||||
if (s->nr == FSCK_ERR_RATELIMIT_NR)
|
||||
suppressing = true;
|
||||
else
|
||||
print = false;
|
||||
}
|
||||
|
||||
printbuf_reset(&s->buf);
|
||||
out = &s->buf;
|
||||
s->nr++;
|
||||
}
|
||||
buf = s->buf;
|
||||
print:
|
||||
|
||||
if (!strncmp(fmt, "bcachefs:", 9))
|
||||
prt_printf(out, bch2_log_msg(c, ""));
|
||||
|
||||
va_start(args, fmt);
|
||||
vscnprintf(buf, sizeof(_buf), fmt, args);
|
||||
prt_vprintf(out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (c->opts.fix_errors == FSCK_OPT_EXIT) {
|
||||
bch_err(c, "%s, exiting", buf);
|
||||
if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
|
||||
if (c->opts.errors != BCH_ON_ERROR_continue ||
|
||||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
|
||||
prt_str(out, ", shutting down");
|
||||
bch2_inconsistent_error(c);
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed;
|
||||
} else if (flags & FSCK_CAN_FIX) {
|
||||
prt_str(out, ", fixing");
|
||||
ret = -BCH_ERR_fsck_fix;
|
||||
} else {
|
||||
prt_str(out, ", continuing");
|
||||
ret = -BCH_ERR_fsck_ignore;
|
||||
}
|
||||
} else if (c->opts.fix_errors == FSCK_OPT_EXIT) {
|
||||
prt_str(out, ", exiting");
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed;
|
||||
} else if (flags & FSCK_CAN_FIX) {
|
||||
if (c->opts.fix_errors == FSCK_OPT_ASK) {
|
||||
printk(KERN_ERR "%s: fix?", buf);
|
||||
fix = ask_yn();
|
||||
prt_str(out, ": fix?");
|
||||
bch2_print_string_as_lines(KERN_ERR, out->buf);
|
||||
print = false;
|
||||
ret = ask_yn()
|
||||
? -BCH_ERR_fsck_fix
|
||||
: -BCH_ERR_fsck_ignore;
|
||||
} else if (c->opts.fix_errors == FSCK_OPT_YES ||
|
||||
(c->opts.nochanges &&
|
||||
!(flags & FSCK_CAN_IGNORE))) {
|
||||
if (print)
|
||||
bch_err(c, "%s, fixing", buf);
|
||||
fix = true;
|
||||
prt_str(out, ", fixing");
|
||||
ret = -BCH_ERR_fsck_fix;
|
||||
} else {
|
||||
if (print)
|
||||
bch_err(c, "%s, not fixing", buf);
|
||||
fix = false;
|
||||
prt_str(out, ", not fixing");
|
||||
}
|
||||
} else if (flags & FSCK_NEED_FSCK) {
|
||||
if (print)
|
||||
bch_err(c, "%s (run fsck to correct)", buf);
|
||||
prt_str(out, " (run fsck to correct)");
|
||||
} else {
|
||||
if (print)
|
||||
bch_err(c, "%s (repair unimplemented)", buf);
|
||||
prt_str(out, " (repair unimplemented)");
|
||||
}
|
||||
|
||||
if (suppressing)
|
||||
if (ret == -BCH_ERR_fsck_ignore &&
|
||||
(c->opts.fix_errors == FSCK_OPT_EXIT ||
|
||||
!(flags & FSCK_CAN_IGNORE)))
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed;
|
||||
|
||||
if (print)
|
||||
bch2_print_string_as_lines(KERN_ERR, out->buf);
|
||||
|
||||
if (!test_bit(BCH_FS_FSCK_DONE, &c->flags) &&
|
||||
(ret != -BCH_ERR_fsck_fix &&
|
||||
ret != -BCH_ERR_fsck_ignore))
|
||||
bch_err(c, "Unable to continue, halting");
|
||||
else if (suppressing)
|
||||
bch_err(c, "Ratelimiting new instances of previous error");
|
||||
|
||||
mutex_unlock(&c->fsck_error_lock);
|
||||
|
||||
if (fix) {
|
||||
printbuf_exit(&buf);
|
||||
|
||||
if (ret == -BCH_ERR_fsck_fix) {
|
||||
set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
|
||||
return -BCH_ERR_fsck_fix;
|
||||
} else {
|
||||
set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags);
|
||||
set_bit(BCH_FS_ERROR, &c->flags);
|
||||
return c->opts.fix_errors == FSCK_OPT_EXIT ||
|
||||
!(flags & FSCK_CAN_IGNORE)
|
||||
? -BCH_ERR_fsck_errors_not_fixed
|
||||
: -BCH_ERR_fsck_ignore;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_flush_fsck_errs(struct bch_fs *c)
|
||||
@ -174,9 +207,10 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
|
||||
|
||||
list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
|
||||
if (s->ratelimited)
|
||||
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->buf);
|
||||
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->buf.buf);
|
||||
|
||||
list_del(&s->list);
|
||||
printbuf_exit(&s->buf);
|
||||
kfree(s);
|
||||
}
|
||||
|
||||
|
@ -103,7 +103,7 @@ struct fsck_err_state {
|
||||
const char *fmt;
|
||||
u64 nr;
|
||||
bool ratelimited;
|
||||
char buf[512];
|
||||
struct printbuf buf;
|
||||
};
|
||||
|
||||
#define FSCK_CAN_FIX (1 << 0)
|
||||
@ -121,7 +121,6 @@ void bch2_flush_fsck_errs(struct bch_fs *);
|
||||
\
|
||||
if (_ret != -BCH_ERR_fsck_fix && \
|
||||
_ret != -BCH_ERR_fsck_ignore) { \
|
||||
bch_err(c, "Unable to continue, halting"); \
|
||||
ret = _ret; \
|
||||
goto fsck_err; \
|
||||
} \
|
||||
|
@ -1213,7 +1213,7 @@ int bch2_read_folio(struct file *file, struct folio *folio)
|
||||
|
||||
ret = bch2_read_single_page(page, page->mapping);
|
||||
folio_unlock(folio);
|
||||
return ret;
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
/* writepages: */
|
||||
@ -1249,8 +1249,6 @@ static void bch2_writepage_io_done(struct closure *cl)
|
||||
struct bio_vec *bvec;
|
||||
unsigned i;
|
||||
|
||||
up(&io->op.c->io_in_flight);
|
||||
|
||||
if (io->op.error) {
|
||||
set_bit(EI_INODE_ERROR, &io->inode->ei_flags);
|
||||
|
||||
@ -1313,8 +1311,6 @@ static void bch2_writepage_do_io(struct bch_writepage_state *w)
|
||||
{
|
||||
struct bch_writepage_io *io = w->io;
|
||||
|
||||
down(&io->op.c->io_in_flight);
|
||||
|
||||
w->io = NULL;
|
||||
closure_call(&io->op.cl, bch2_write, NULL, &io->cl);
|
||||
continue_at(&io->cl, bch2_writepage_io_done, NULL);
|
||||
@ -1501,7 +1497,7 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc
|
||||
if (w.io)
|
||||
bch2_writepage_do_io(&w);
|
||||
blk_finish_plug(&plug);
|
||||
return ret;
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
/* buffered writes: */
|
||||
@ -1586,7 +1582,7 @@ err_unlock:
|
||||
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||
kfree(res);
|
||||
*fsdata = NULL;
|
||||
return ret;
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
int bch2_write_end(struct file *file, struct address_space *mapping,
|
||||
@ -2010,7 +2006,7 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
iocb->ki_pos,
|
||||
iocb->ki_pos + count - 1);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
file_accessed(file);
|
||||
|
||||
@ -2025,8 +2021,8 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
ret = generic_file_read_iter(iocb, iter);
|
||||
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
out:
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
/* O_DIRECT writes */
|
||||
@ -2094,8 +2090,6 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
if (dio->loop)
|
||||
goto loop;
|
||||
|
||||
down(&c->io_in_flight);
|
||||
|
||||
while (1) {
|
||||
iter_count = dio->iter.count;
|
||||
|
||||
@ -2226,7 +2220,6 @@ loop:
|
||||
|
||||
ret = dio->op.error ?: ((long) dio->written << 9);
|
||||
err:
|
||||
up(&c->io_in_flight);
|
||||
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
|
||||
bch2_quota_reservation_put(c, inode, &dio->quota_res);
|
||||
|
||||
@ -2347,8 +2340,10 @@ ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
struct bch_inode_info *inode = file_bch_inode(file);
|
||||
ssize_t ret;
|
||||
|
||||
if (iocb->ki_flags & IOCB_DIRECT)
|
||||
return bch2_direct_write(iocb, from);
|
||||
if (iocb->ki_flags & IOCB_DIRECT) {
|
||||
ret = bch2_direct_write(iocb, from);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* We can write back this queue in page reclaim */
|
||||
current->backing_dev_info = inode_to_bdi(&inode->v);
|
||||
@ -2375,8 +2370,8 @@ unlock:
|
||||
|
||||
if (ret > 0)
|
||||
ret = generic_write_sync(iocb, ret);
|
||||
|
||||
return ret;
|
||||
out:
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
/* fsync: */
|
||||
@ -2410,7 +2405,7 @@ int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
ret2 = sync_inode_metadata(&inode->v, 1);
|
||||
ret3 = bch2_flush_inode(c, inode_inum(inode));
|
||||
|
||||
return ret ?: ret2 ?: ret3;
|
||||
return bch2_err_class(ret ?: ret2 ?: ret3);
|
||||
}
|
||||
|
||||
/* truncate: */
|
||||
@ -2716,7 +2711,7 @@ int bch2_truncate(struct user_namespace *mnt_userns,
|
||||
ret = bch2_setattr_nonsize(mnt_userns, inode, iattr);
|
||||
err:
|
||||
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
|
||||
return ret;
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
/* fallocate: */
|
||||
@ -3044,7 +3039,7 @@ bkey_err:
|
||||
bch2_trans_unlock(&trans); /* lock ordering, before taking pagecache locks: */
|
||||
mark_pagecache_reserved(inode, start_sector, iter.pos.offset);
|
||||
|
||||
if (ret == -ENOSPC && (mode & FALLOC_FL_ZERO_RANGE)) {
|
||||
if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) {
|
||||
struct quota_res quota_res = { 0 };
|
||||
s64 i_sectors_delta = 0;
|
||||
|
||||
@ -3095,7 +3090,7 @@ static long bchfs_fallocate(struct bch_inode_info *inode, int mode,
|
||||
* so that the VFS cache i_size is consistent with the btree i_size:
|
||||
*/
|
||||
if (ret &&
|
||||
!(ret == -ENOSPC && (mode & FALLOC_FL_ZERO_RANGE)))
|
||||
!(bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)))
|
||||
return ret;
|
||||
|
||||
if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size)
|
||||
@ -3146,7 +3141,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
|
||||
inode_unlock(&inode->v);
|
||||
percpu_ref_put(&c->writes);
|
||||
|
||||
return ret;
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
|
||||
@ -3224,7 +3219,7 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
|
||||
err:
|
||||
bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
|
||||
|
||||
return ret;
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
/* fseek: */
|
||||
@ -3447,18 +3442,26 @@ err:
|
||||
|
||||
loff_t bch2_llseek(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
loff_t ret;
|
||||
|
||||
switch (whence) {
|
||||
case SEEK_SET:
|
||||
case SEEK_CUR:
|
||||
case SEEK_END:
|
||||
return generic_file_llseek(file, offset, whence);
|
||||
ret = generic_file_llseek(file, offset, whence);
|
||||
break;
|
||||
case SEEK_DATA:
|
||||
return bch2_seek_data(file, offset);
|
||||
ret = bch2_seek_data(file, offset);
|
||||
break;
|
||||
case SEEK_HOLE:
|
||||
return bch2_seek_hole(file, offset);
|
||||
ret = bch2_seek_hole(file, offset);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
void bch2_fs_fsio_exit(struct bch_fs *c)
|
||||
|
@ -455,51 +455,67 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
{
|
||||
struct bch_inode_info *inode = file_bch_inode(file);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
long ret;
|
||||
|
||||
switch (cmd) {
|
||||
case FS_IOC_GETFLAGS:
|
||||
return bch2_ioc_getflags(inode, (int __user *) arg);
|
||||
ret = bch2_ioc_getflags(inode, (int __user *) arg);
|
||||
break;
|
||||
|
||||
case FS_IOC_SETFLAGS:
|
||||
return bch2_ioc_setflags(c, file, inode, (int __user *) arg);
|
||||
ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg);
|
||||
break;
|
||||
|
||||
case FS_IOC_FSGETXATTR:
|
||||
return bch2_ioc_fsgetxattr(inode, (void __user *) arg);
|
||||
ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg);
|
||||
break;
|
||||
|
||||
case FS_IOC_FSSETXATTR:
|
||||
return bch2_ioc_fssetxattr(c, file, inode,
|
||||
(void __user *) arg);
|
||||
ret = bch2_ioc_fssetxattr(c, file, inode,
|
||||
(void __user *) arg);
|
||||
break;
|
||||
|
||||
case BCHFS_IOC_REINHERIT_ATTRS:
|
||||
return bch2_ioc_reinherit_attrs(c, file, inode,
|
||||
(void __user *) arg);
|
||||
ret = bch2_ioc_reinherit_attrs(c, file, inode,
|
||||
(void __user *) arg);
|
||||
break;
|
||||
|
||||
case FS_IOC_GETVERSION:
|
||||
return -ENOTTY;
|
||||
ret = -ENOTTY;
|
||||
break;
|
||||
|
||||
case FS_IOC_SETVERSION:
|
||||
return -ENOTTY;
|
||||
ret = -ENOTTY;
|
||||
break;
|
||||
|
||||
case FS_IOC_GOINGDOWN:
|
||||
return bch2_ioc_goingdown(c, (u32 __user *) arg);
|
||||
ret = bch2_ioc_goingdown(c, (u32 __user *) arg);
|
||||
break;
|
||||
|
||||
case BCH_IOCTL_SUBVOLUME_CREATE: {
|
||||
struct bch_ioctl_subvolume i;
|
||||
|
||||
if (copy_from_user(&i, (void __user *) arg, sizeof(i)))
|
||||
return -EFAULT;
|
||||
return bch2_ioctl_subvolume_create(c, file, i);
|
||||
ret = copy_from_user(&i, (void __user *) arg, sizeof(i))
|
||||
? -EFAULT
|
||||
: bch2_ioctl_subvolume_create(c, file, i);
|
||||
break;
|
||||
}
|
||||
|
||||
case BCH_IOCTL_SUBVOLUME_DESTROY: {
|
||||
struct bch_ioctl_subvolume i;
|
||||
|
||||
if (copy_from_user(&i, (void __user *) arg, sizeof(i)))
|
||||
return -EFAULT;
|
||||
return bch2_ioctl_subvolume_destroy(c, file, i);
|
||||
ret = copy_from_user(&i, (void __user *) arg, sizeof(i))
|
||||
? -EFAULT
|
||||
: bch2_ioctl_subvolume_destroy(c, file, i);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
return bch2_fs_ioctl(c, cmd, (void __user *) arg);
|
||||
ret = bch2_fs_ioctl(c, cmd, (void __user *) arg);
|
||||
break;
|
||||
}
|
||||
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
@ -769,7 +769,7 @@ err_trans:
|
||||
err:
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
|
||||
return ret;
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
static int bch2_getattr(struct user_namespace *mnt_userns,
|
||||
@ -1453,7 +1453,7 @@ static int bch2_vfs_write_inode(struct inode *vinode,
|
||||
ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
|
||||
return ret;
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
static void bch2_evict_inode(struct inode *vinode)
|
||||
@ -1557,6 +1557,7 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
static int bch2_sync_fs(struct super_block *sb, int wait)
|
||||
{
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
int ret;
|
||||
|
||||
if (c->opts.journal_flush_disabled)
|
||||
return 0;
|
||||
@ -1566,7 +1567,8 @@ static int bch2_sync_fs(struct super_block *sb, int wait)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return bch2_journal_flush(&c->journal);
|
||||
ret = bch2_journal_flush(&c->journal);
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
static struct bch_fs *bch2_path_to_fs(const char *path)
|
||||
@ -1622,7 +1624,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
|
||||
|
||||
ret = bch2_parse_mount_opts(c, &opts, data);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err;
|
||||
|
||||
if (opts.read_only != c->opts.read_only) {
|
||||
down_write(&c->state_lock);
|
||||
@ -1636,7 +1638,8 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
|
||||
if (ret) {
|
||||
bch_err(c, "error going rw: %i", ret);
|
||||
up_write(&c->state_lock);
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
sb->s_flags &= ~SB_RDONLY;
|
||||
@ -1649,8 +1652,8 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
|
||||
|
||||
if (opts.errors >= 0)
|
||||
c->opts.errors = opts.errors;
|
||||
|
||||
return ret;
|
||||
err:
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
|
||||
|
@ -567,7 +567,7 @@ again:
|
||||
}
|
||||
|
||||
if (!ret && start == min)
|
||||
ret = -ENOSPC;
|
||||
ret = -BCH_ERR_ENOSPC_inode_create;
|
||||
|
||||
if (ret) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
|
@ -535,17 +535,11 @@ static void bch2_write_done(struct closure *cl)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* bch_write_index - after a write, update index to point to new data
|
||||
*/
|
||||
static void __bch2_write_index(struct bch_write_op *op)
|
||||
static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
|
||||
{
|
||||
struct bch_fs *c = op->c;
|
||||
struct keylist *keys = &op->insert_keys;
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct bkey_i *src, *dst = keys->keys, *n, *k;
|
||||
unsigned dev;
|
||||
int ret;
|
||||
struct bkey_i *src, *dst = keys->keys, *n;
|
||||
|
||||
for (src = keys->keys; src != keys->top; src = n) {
|
||||
n = bkey_next(src);
|
||||
@ -554,10 +548,8 @@ static void __bch2_write_index(struct bch_write_op *op)
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr,
|
||||
test_bit(ptr->dev, op->failed.d));
|
||||
|
||||
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) {
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src)))
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (dst != src)
|
||||
@ -566,6 +558,25 @@ static void __bch2_write_index(struct bch_write_op *op)
|
||||
}
|
||||
|
||||
keys->top = dst;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch_write_index - after a write, update index to point to new data
|
||||
*/
|
||||
static void __bch2_write_index(struct bch_write_op *op)
|
||||
{
|
||||
struct bch_fs *c = op->c;
|
||||
struct keylist *keys = &op->insert_keys;
|
||||
struct bkey_i *k;
|
||||
unsigned dev;
|
||||
int ret;
|
||||
|
||||
if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) {
|
||||
ret = bch2_write_drop_io_error_ptrs(op);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* probably not the ideal place to hook this in, but I don't
|
||||
@ -640,8 +651,10 @@ static void bch2_write_endio(struct bio *bio)
|
||||
op->pos.inode,
|
||||
op->pos.offset - bio_sectors(bio), /* XXX definitely wrong */
|
||||
"data write error: %s",
|
||||
bch2_blk_status_to_str(bio->bi_status)))
|
||||
bch2_blk_status_to_str(bio->bi_status))) {
|
||||
set_bit(wbio->dev, op->failed.d);
|
||||
op->flags |= BCH_WRITE_IO_ERROR;
|
||||
}
|
||||
|
||||
if (wbio->have_ioref) {
|
||||
bch2_latency_acct(ca, wbio->submit_time, WRITE);
|
||||
|
@ -40,6 +40,7 @@ enum bch_write_flags {
|
||||
BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 10),
|
||||
BCH_WRITE_SKIP_CLOSURE_PUT = (1 << 11),
|
||||
BCH_WRITE_DONE = (1 << 12),
|
||||
BCH_WRITE_IO_ERROR = (1 << 13),
|
||||
};
|
||||
|
||||
static inline u64 *op_journal_seq(struct bch_write_op *op)
|
||||
|
@ -809,14 +809,16 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
if (new_fs) {
|
||||
bu[nr_got] = bch2_bucket_alloc_new_fs(ca);
|
||||
if (bu[nr_got] < 0) {
|
||||
ret = -ENOSPC;
|
||||
ret = -BCH_ERR_ENOSPC_bucket_alloc;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none,
|
||||
false, cl);
|
||||
if (IS_ERR(ob[nr_got])) {
|
||||
ret = cl ? -EAGAIN : -ENOSPC;
|
||||
ret = cl
|
||||
? -EAGAIN
|
||||
: -BCH_ERR_ENOSPC_bucket_alloc;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -943,10 +945,11 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
|
||||
* reservation to ensure we'll actually be able to allocate:
|
||||
*/
|
||||
|
||||
if (bch2_disk_reservation_get(c, &disk_res,
|
||||
bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
|
||||
ret = bch2_disk_reservation_get(c, &disk_res,
|
||||
bucket_to_sector(ca, nr - ja->nr), 1, 0);
|
||||
if (ret) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return -ENOSPC;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl);
|
||||
|
@ -187,30 +187,57 @@ static void journal_entry_null_range(void *start, void *end)
|
||||
#define JOURNAL_ENTRY_NONE 6
|
||||
#define JOURNAL_ENTRY_BAD 7
|
||||
|
||||
#define journal_entry_err(c, msg, ...) \
|
||||
static void journal_entry_err_msg(struct printbuf *out,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry)
|
||||
{
|
||||
prt_str(out, "invalid journal entry ");
|
||||
if (entry)
|
||||
prt_printf(out, "%s ", bch2_jset_entry_types[entry->type]);
|
||||
|
||||
if (!jset)
|
||||
prt_printf(out, "in superblock");
|
||||
else if (!entry)
|
||||
prt_printf(out, "at seq %llu", le64_to_cpu(jset->seq));
|
||||
else
|
||||
prt_printf(out, "at offset %zi/%u seq %llu",
|
||||
(u64 *) entry - jset->_data,
|
||||
le32_to_cpu(jset->u64s),
|
||||
le64_to_cpu(jset->seq));
|
||||
prt_str(out, ": ");
|
||||
}
|
||||
|
||||
#define journal_entry_err(c, jset, entry, msg, ...) \
|
||||
({ \
|
||||
struct printbuf buf = PRINTBUF; \
|
||||
\
|
||||
journal_entry_err_msg(&buf, jset, entry); \
|
||||
prt_printf(&buf, msg, ##__VA_ARGS__); \
|
||||
\
|
||||
switch (write) { \
|
||||
case READ: \
|
||||
mustfix_fsck_err(c, msg, ##__VA_ARGS__); \
|
||||
mustfix_fsck_err(c, "%s", buf.buf); \
|
||||
break; \
|
||||
case WRITE: \
|
||||
bch_err(c, "corrupt metadata before write:\n" \
|
||||
msg, ##__VA_ARGS__); \
|
||||
bch_err(c, "corrupt metadata before write: %s\n", buf.buf);\
|
||||
if (bch2_fs_inconsistent(c)) { \
|
||||
ret = -BCH_ERR_fsck_errors_not_fixed; \
|
||||
goto fsck_err; \
|
||||
} \
|
||||
break; \
|
||||
} \
|
||||
\
|
||||
printbuf_exit(&buf); \
|
||||
true; \
|
||||
})
|
||||
|
||||
#define journal_entry_err_on(cond, c, msg, ...) \
|
||||
((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false)
|
||||
#define journal_entry_err_on(cond, c, jset, entry, msg, ...) \
|
||||
((cond) ? journal_entry_err(c, jset, entry, msg, ##__VA_ARGS__) : false)
|
||||
|
||||
#define FSCK_DELETED_KEY 5
|
||||
|
||||
static int journal_validate_key(struct bch_fs *c, const char *where,
|
||||
static int journal_validate_key(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned level, enum btree_id btree_id,
|
||||
struct bkey_i *k,
|
||||
@ -220,33 +247,24 @@ static int journal_validate_key(struct bch_fs *c, const char *where,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
if (journal_entry_err_on(!k->k.u64s, c,
|
||||
"invalid key in %s at %s offset %zi/%u: k->u64s 0",
|
||||
bch2_jset_entry_types[entry->type], where,
|
||||
(u64 *) k - entry->_data,
|
||||
le16_to_cpu(entry->u64s))) {
|
||||
if (journal_entry_err_on(!k->k.u64s, c, jset, entry, "k->u64s 0")) {
|
||||
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
|
||||
journal_entry_null_range(vstruct_next(entry), next);
|
||||
return FSCK_DELETED_KEY;
|
||||
}
|
||||
|
||||
if (journal_entry_err_on((void *) bkey_next(k) >
|
||||
(void *) vstruct_next(entry), c,
|
||||
"invalid key in %s at %s offset %zi/%u: extends past end of journal entry",
|
||||
bch2_jset_entry_types[entry->type], where,
|
||||
(u64 *) k - entry->_data,
|
||||
le16_to_cpu(entry->u64s))) {
|
||||
(void *) vstruct_next(entry),
|
||||
c, jset, entry,
|
||||
"extends past end of journal entry")) {
|
||||
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
|
||||
journal_entry_null_range(vstruct_next(entry), next);
|
||||
return FSCK_DELETED_KEY;
|
||||
}
|
||||
|
||||
if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
|
||||
"invalid key in %s at %s offset %zi/%u: bad format %u",
|
||||
bch2_jset_entry_types[entry->type], where,
|
||||
(u64 *) k - entry->_data,
|
||||
le16_to_cpu(entry->u64s),
|
||||
k->k.format)) {
|
||||
if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT,
|
||||
c, jset, entry,
|
||||
"bad format %u", k->k.format)) {
|
||||
le16_add_cpu(&entry->u64s, -((u16) k->k.u64s));
|
||||
memmove(k, bkey_next(k), next - (void *) bkey_next(k));
|
||||
journal_entry_null_range(vstruct_next(entry), next);
|
||||
@ -260,10 +278,11 @@ static int journal_validate_key(struct bch_fs *c, const char *where,
|
||||
if (bch2_bkey_invalid(c, bkey_i_to_s_c(k),
|
||||
__btree_node_type(level, btree_id), write, &buf)) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "invalid key in %s at %s offset %zi/%u:",
|
||||
bch2_jset_entry_types[entry->type], where,
|
||||
(u64 *) k - entry->_data,
|
||||
le16_to_cpu(entry->u64s));
|
||||
prt_printf(&buf, "invalid journal entry %s at offset %zi/%u seq %llu:",
|
||||
bch2_jset_entry_types[entry->type],
|
||||
(u64 *) entry - jset->_data,
|
||||
le32_to_cpu(jset->u64s),
|
||||
le64_to_cpu(jset->seq));
|
||||
prt_newline(&buf);
|
||||
printbuf_indent_add(&buf, 2);
|
||||
|
||||
@ -291,14 +310,14 @@ fsck_err:
|
||||
}
|
||||
|
||||
static int journal_entry_btree_keys_validate(struct bch_fs *c,
|
||||
const char *where,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
struct bkey_i *k = entry->start;
|
||||
|
||||
while (k != vstruct_last(entry)) {
|
||||
int ret = journal_validate_key(c, where, entry,
|
||||
int ret = journal_validate_key(c, jset, entry,
|
||||
entry->level,
|
||||
entry->btree_id,
|
||||
k, version, big_endian, write);
|
||||
@ -329,7 +348,7 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs
|
||||
}
|
||||
|
||||
static int journal_entry_btree_root_validate(struct bch_fs *c,
|
||||
const char *where,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
@ -337,7 +356,8 @@ static int journal_entry_btree_root_validate(struct bch_fs *c,
|
||||
int ret = 0;
|
||||
|
||||
if (journal_entry_err_on(!entry->u64s ||
|
||||
le16_to_cpu(entry->u64s) != k->k.u64s, c,
|
||||
le16_to_cpu(entry->u64s) != k->k.u64s,
|
||||
c, jset, entry,
|
||||
"invalid btree root journal entry: wrong number of keys")) {
|
||||
void *next = vstruct_next(entry);
|
||||
/*
|
||||
@ -350,7 +370,7 @@ static int journal_entry_btree_root_validate(struct bch_fs *c,
|
||||
return 0;
|
||||
}
|
||||
|
||||
return journal_validate_key(c, where, entry, 1, entry->btree_id, k,
|
||||
return journal_validate_key(c, jset, entry, 1, entry->btree_id, k,
|
||||
version, big_endian, write);
|
||||
fsck_err:
|
||||
return ret;
|
||||
@ -363,7 +383,7 @@ static void journal_entry_btree_root_to_text(struct printbuf *out, struct bch_fs
|
||||
}
|
||||
|
||||
static int journal_entry_prio_ptrs_validate(struct bch_fs *c,
|
||||
const char *where,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
@ -377,13 +397,14 @@ static void journal_entry_prio_ptrs_to_text(struct printbuf *out, struct bch_fs
|
||||
}
|
||||
|
||||
static int journal_entry_blacklist_validate(struct bch_fs *c,
|
||||
const char *where,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1, c,
|
||||
if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1,
|
||||
c, jset, entry,
|
||||
"invalid journal seq blacklist entry: bad size")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
}
|
||||
@ -401,14 +422,15 @@ static void journal_entry_blacklist_to_text(struct printbuf *out, struct bch_fs
|
||||
}
|
||||
|
||||
static int journal_entry_blacklist_v2_validate(struct bch_fs *c,
|
||||
const char *where,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
struct jset_entry_blacklist_v2 *bl_entry;
|
||||
int ret = 0;
|
||||
|
||||
if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2, c,
|
||||
if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2,
|
||||
c, jset, entry,
|
||||
"invalid journal seq blacklist entry: bad size")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
goto out;
|
||||
@ -417,7 +439,8 @@ static int journal_entry_blacklist_v2_validate(struct bch_fs *c,
|
||||
bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
|
||||
|
||||
if (journal_entry_err_on(le64_to_cpu(bl_entry->start) >
|
||||
le64_to_cpu(bl_entry->end), c,
|
||||
le64_to_cpu(bl_entry->end),
|
||||
c, jset, entry,
|
||||
"invalid journal seq blacklist entry: start > end")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
}
|
||||
@ -438,7 +461,7 @@ static void journal_entry_blacklist_v2_to_text(struct printbuf *out, struct bch_
|
||||
}
|
||||
|
||||
static int journal_entry_usage_validate(struct bch_fs *c,
|
||||
const char *where,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
@ -448,7 +471,7 @@ static int journal_entry_usage_validate(struct bch_fs *c,
|
||||
int ret = 0;
|
||||
|
||||
if (journal_entry_err_on(bytes < sizeof(*u),
|
||||
c,
|
||||
c, jset, entry,
|
||||
"invalid journal entry usage: bad size")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return ret;
|
||||
@ -470,7 +493,7 @@ static void journal_entry_usage_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
}
|
||||
|
||||
static int journal_entry_data_usage_validate(struct bch_fs *c,
|
||||
const char *where,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
@ -481,7 +504,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
|
||||
|
||||
if (journal_entry_err_on(bytes < sizeof(*u) ||
|
||||
bytes < sizeof(*u) + u->r.nr_devs,
|
||||
c,
|
||||
c, jset, entry,
|
||||
"invalid journal entry usage: bad size")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return ret;
|
||||
@ -502,7 +525,7 @@ static void journal_entry_data_usage_to_text(struct printbuf *out, struct bch_fs
|
||||
}
|
||||
|
||||
static int journal_entry_clock_validate(struct bch_fs *c,
|
||||
const char *where,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
@ -512,13 +535,13 @@ static int journal_entry_clock_validate(struct bch_fs *c,
|
||||
int ret = 0;
|
||||
|
||||
if (journal_entry_err_on(bytes != sizeof(*clock),
|
||||
c, "invalid journal entry clock: bad size")) {
|
||||
c, jset, entry, "bad size")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (journal_entry_err_on(clock->rw > 1,
|
||||
c, "invalid journal entry clock: bad rw")) {
|
||||
c, jset, entry, "bad rw")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return ret;
|
||||
}
|
||||
@ -537,7 +560,7 @@ static void journal_entry_clock_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
}
|
||||
|
||||
static int journal_entry_dev_usage_validate(struct bch_fs *c,
|
||||
const char *where,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
@ -549,7 +572,7 @@ static int journal_entry_dev_usage_validate(struct bch_fs *c,
|
||||
int ret = 0;
|
||||
|
||||
if (journal_entry_err_on(bytes < expected,
|
||||
c, "invalid journal entry dev usage: bad size (%u < %u)",
|
||||
c, jset, entry, "bad size (%u < %u)",
|
||||
bytes, expected)) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return ret;
|
||||
@ -558,13 +581,13 @@ static int journal_entry_dev_usage_validate(struct bch_fs *c,
|
||||
dev = le32_to_cpu(u->dev);
|
||||
|
||||
if (journal_entry_err_on(!bch2_dev_exists2(c, dev),
|
||||
c, "invalid journal entry dev usage: bad dev")) {
|
||||
c, jset, entry, "bad dev")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (journal_entry_err_on(u->pad,
|
||||
c, "invalid journal entry dev usage: bad pad")) {
|
||||
c, jset, entry, "bad pad")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return ret;
|
||||
}
|
||||
@ -597,7 +620,7 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
|
||||
}
|
||||
|
||||
static int journal_entry_log_validate(struct bch_fs *c,
|
||||
const char *where,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
@ -613,11 +636,12 @@ static void journal_entry_log_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, "%.*s", bytes, l->d);
|
||||
}
|
||||
|
||||
static int journal_entry_overwrite_validate(struct bch_fs *c, const char *where,
|
||||
static int journal_entry_overwrite_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
return journal_entry_btree_keys_validate(c, where, entry, version, big_endian, write);
|
||||
return journal_entry_btree_keys_validate(c, jset, entry, version, big_endian, write);
|
||||
}
|
||||
|
||||
static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
@ -627,7 +651,7 @@ static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs
|
||||
}
|
||||
|
||||
struct jset_entry_ops {
|
||||
int (*validate)(struct bch_fs *, const char *,
|
||||
int (*validate)(struct bch_fs *, struct jset *,
|
||||
struct jset_entry *, unsigned, int, int);
|
||||
void (*to_text)(struct printbuf *, struct bch_fs *, struct jset_entry *);
|
||||
};
|
||||
@ -642,12 +666,13 @@ static const struct jset_entry_ops bch2_jset_entry_ops[] = {
|
||||
#undef x
|
||||
};
|
||||
|
||||
int bch2_journal_entry_validate(struct bch_fs *c, const char *where,
|
||||
int bch2_journal_entry_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian, int write)
|
||||
{
|
||||
return entry->type < BCH_JSET_ENTRY_NR
|
||||
? bch2_jset_entry_ops[entry->type].validate(c, where, entry,
|
||||
? bch2_jset_entry_ops[entry->type].validate(c, jset, entry,
|
||||
version, big_endian, write)
|
||||
: 0;
|
||||
}
|
||||
@ -666,24 +691,18 @@ void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
|
||||
int write)
|
||||
{
|
||||
char buf[100];
|
||||
struct jset_entry *entry;
|
||||
int ret = 0;
|
||||
|
||||
vstruct_for_each(jset, entry) {
|
||||
scnprintf(buf, sizeof(buf), "jset %llu entry offset %zi/%u",
|
||||
le64_to_cpu(jset->seq),
|
||||
(u64 *) entry - jset->_data,
|
||||
le32_to_cpu(jset->u64s));
|
||||
|
||||
if (journal_entry_err_on(vstruct_next(entry) >
|
||||
vstruct_last(jset), c,
|
||||
vstruct_last(jset), c, jset, entry,
|
||||
"journal entry extends past end of jset")) {
|
||||
jset->u64s = cpu_to_le32((u64 *) entry - jset->_data);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = bch2_journal_entry_validate(c, buf, entry,
|
||||
ret = bch2_journal_entry_validate(c, jset, entry,
|
||||
le32_to_cpu(jset->version),
|
||||
JSET_BIG_ENDIAN(jset), write);
|
||||
if (ret)
|
||||
@ -711,7 +730,8 @@ static int jset_validate(struct bch_fs *c,
|
||||
version = le32_to_cpu(jset->version);
|
||||
if (journal_entry_err_on((version != BCH_JSET_VERSION_OLD &&
|
||||
version < bcachefs_metadata_version_min) ||
|
||||
version >= bcachefs_metadata_version_max, c,
|
||||
version >= bcachefs_metadata_version_max,
|
||||
c, jset, NULL,
|
||||
"%s sector %llu seq %llu: unknown journal entry version %u",
|
||||
ca ? ca->name : c->name,
|
||||
sector, le64_to_cpu(jset->seq),
|
||||
@ -724,7 +744,8 @@ static int jset_validate(struct bch_fs *c,
|
||||
sectors_read < bucket_sectors_left)
|
||||
return JOURNAL_ENTRY_REREAD;
|
||||
|
||||
if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
|
||||
if (journal_entry_err_on(bytes > bucket_sectors_left << 9,
|
||||
c, jset, NULL,
|
||||
"%s sector %llu seq %llu: journal entry too big (%zu bytes)",
|
||||
ca ? ca->name : c->name,
|
||||
sector, le64_to_cpu(jset->seq), bytes)) {
|
||||
@ -733,7 +754,8 @@ static int jset_validate(struct bch_fs *c,
|
||||
-((bytes - (bucket_sectors_left << 9)) / 8));
|
||||
}
|
||||
|
||||
if (journal_entry_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
|
||||
if (journal_entry_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)),
|
||||
c, jset, NULL,
|
||||
"%s sector %llu seq %llu: journal entry with unknown csum type %llu",
|
||||
ca ? ca->name : c->name,
|
||||
sector, le64_to_cpu(jset->seq),
|
||||
@ -746,7 +768,8 @@ static int jset_validate(struct bch_fs *c,
|
||||
goto csum_done;
|
||||
|
||||
csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
|
||||
if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
|
||||
if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum),
|
||||
c, jset, NULL,
|
||||
"%s sector %llu seq %llu: journal checksum bad",
|
||||
ca ? ca->name : c->name,
|
||||
sector, le64_to_cpu(jset->seq)))
|
||||
@ -760,7 +783,8 @@ static int jset_validate(struct bch_fs *c,
|
||||
csum_done:
|
||||
/* last_seq is ignored when JSET_NO_FLUSH is true */
|
||||
if (journal_entry_err_on(!JSET_NO_FLUSH(jset) &&
|
||||
le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
|
||||
le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq),
|
||||
c, jset, NULL,
|
||||
"invalid journal entry: last_seq > seq (%llu > %llu)",
|
||||
le64_to_cpu(jset->last_seq),
|
||||
le64_to_cpu(jset->seq))) {
|
||||
|
@ -44,7 +44,7 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
|
||||
for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \
|
||||
vstruct_for_each_safe(entry, k, _n)
|
||||
|
||||
int bch2_journal_entry_validate(struct bch_fs *, const char *,
|
||||
int bch2_journal_entry_validate(struct bch_fs *, struct jset *,
|
||||
struct jset_entry *, unsigned, int, int);
|
||||
void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct jset_entry *);
|
||||
|
@ -197,7 +197,7 @@ int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca)
|
||||
j = bch2_sb_resize_journal_v2(&ca->disk_sb,
|
||||
(sizeof(*j) + sizeof(j->d[0]) * nr) / sizeof(u64));
|
||||
if (!j)
|
||||
return -ENOSPC;
|
||||
return -BCH_ERR_ENOSPC_sb_journal;
|
||||
|
||||
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
|
||||
|
||||
|
@ -665,7 +665,7 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
|
||||
sb_quota = bch2_sb_resize_quota(&c->disk_sb,
|
||||
sizeof(*sb_quota) / sizeof(u64));
|
||||
if (!sb_quota)
|
||||
return -ENOSPC;
|
||||
return -BCH_ERR_ENOSPC_sb_quota;
|
||||
}
|
||||
|
||||
if (info->i_fieldmask & QC_SPC_TIMER)
|
||||
|
@ -478,7 +478,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
|
||||
bch2_fs_usage_read_one(c, &c->usage_base->replicas[i])) {
|
||||
n = cpu_replicas_add_entry(&c->replicas_gc, e);
|
||||
if (!n.entries) {
|
||||
ret = -ENOSPC;
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -487,10 +487,9 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
|
||||
}
|
||||
}
|
||||
|
||||
if (bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc)) {
|
||||
ret = -ENOSPC;
|
||||
ret = bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = replicas_table_update(c, &c->replicas_gc);
|
||||
err:
|
||||
@ -593,10 +592,9 @@ retry:
|
||||
|
||||
bch2_cpu_replicas_sort(&new);
|
||||
|
||||
if (bch2_cpu_replicas_to_sb_replicas(c, &new)) {
|
||||
ret = -ENOSPC;
|
||||
ret = bch2_cpu_replicas_to_sb_replicas(c, &new);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = replicas_table_update(c, &new);
|
||||
err:
|
||||
@ -751,7 +749,7 @@ static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
|
||||
sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb,
|
||||
DIV_ROUND_UP(bytes, sizeof(u64)));
|
||||
if (!sb_r)
|
||||
return -ENOSPC;
|
||||
return -BCH_ERR_ENOSPC_sb_replicas;
|
||||
|
||||
bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
|
||||
sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb);
|
||||
@ -796,7 +794,7 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
|
||||
sb_r = bch2_sb_resize_replicas(&c->disk_sb,
|
||||
DIV_ROUND_UP(bytes, sizeof(u64)));
|
||||
if (!sb_r)
|
||||
return -ENOSPC;
|
||||
return -BCH_ERR_ENOSPC_sb_replicas;
|
||||
|
||||
bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
|
||||
sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
|
||||
|
@ -207,7 +207,7 @@ bch2_hash_hole(struct btree_trans *trans,
|
||||
return 0;
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
|
||||
return ret ?: -ENOSPC;
|
||||
return ret ?: -BCH_ERR_ENOSPC_str_hash_create;
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
@ -277,7 +277,7 @@ int bch2_hash_set_snapshot(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
ret = -ENOSPC;
|
||||
ret = -BCH_ERR_ENOSPC_str_hash_create;
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &slot);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
@ -517,7 +517,7 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
|
||||
goto err;
|
||||
|
||||
if (!k.k || !k.k->p.offset) {
|
||||
ret = -ENOSPC;
|
||||
ret = -BCH_ERR_ENOSPC_snapshot_create;
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1031,7 +1031,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
ret = -ENOSPC;
|
||||
ret = -BCH_ERR_ENOSPC_subvolume_create;
|
||||
goto err;
|
||||
found_slot:
|
||||
snapshot_subvols[0] = dst_iter.pos.offset;
|
||||
|
@ -132,7 +132,7 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
|
||||
|
||||
pr_err("%s: superblock too big: want %zu but have %llu",
|
||||
bdevname(sb->bdev, buf), new_bytes, max_bytes);
|
||||
return -ENOSPC;
|
||||
return -BCH_ERR_ENOSPC_sb;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1156,7 +1156,7 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle
|
||||
for (entry = clean->start;
|
||||
entry < (struct jset_entry *) vstruct_end(&clean->field);
|
||||
entry = vstruct_next(entry)) {
|
||||
ret = bch2_journal_entry_validate(c, "superblock", entry,
|
||||
ret = bch2_journal_entry_validate(c, NULL, entry,
|
||||
le16_to_cpu(c->disk_sb.sb->version),
|
||||
BCH_SB_BIG_ENDIAN(c->disk_sb.sb),
|
||||
write);
|
||||
@ -1477,7 +1477,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
|
||||
unsigned nr_devices = 0;
|
||||
|
||||
if (!out->nr_tabstops)
|
||||
printbuf_tabstop_push(out, 32);
|
||||
printbuf_tabstop_push(out, 44);
|
||||
|
||||
mi = bch2_sb_get_members(sb);
|
||||
if (mi) {
|
||||
|
@ -686,8 +686,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
|
||||
seqcount_init(&c->usage_lock);
|
||||
|
||||
sema_init(&c->io_in_flight, 64);
|
||||
|
||||
c->copy_gc_enabled = 1;
|
||||
c->rebalance.enabled = 1;
|
||||
c->promote_whole_extents = true;
|
||||
@ -785,7 +783,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_io_clock_init(&c->io_clock[READ]) ?:
|
||||
ret = bch2_fs_counters_init(c) ?:
|
||||
bch2_io_clock_init(&c->io_clock[READ]) ?:
|
||||
bch2_io_clock_init(&c->io_clock[WRITE]) ?:
|
||||
bch2_fs_journal_init(&c->journal) ?:
|
||||
bch2_fs_replicas_init(c) ?:
|
||||
@ -799,8 +798,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
bch2_fs_encryption_init(c) ?:
|
||||
bch2_fs_compress_init(c) ?:
|
||||
bch2_fs_ec_init(c) ?:
|
||||
bch2_fs_fsio_init(c) ?:
|
||||
bch2_fs_counters_init(c);
|
||||
bch2_fs_fsio_init(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -1592,7 +1590,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
|
||||
le32_to_cpu(mi->field.u64s) +
|
||||
sizeof(dev_mi) / sizeof(u64))) {
|
||||
bch_err(c, "device add error: new device superblock too small");
|
||||
ret = -ENOSPC;
|
||||
ret = -BCH_ERR_ENOSPC_sb_members;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
@ -1605,7 +1603,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
|
||||
goto have_slot;
|
||||
no_slot:
|
||||
bch_err(c, "device add error: already have maximum number of devices");
|
||||
ret = -ENOSPC;
|
||||
ret = -BCH_ERR_ENOSPC_sb_members;
|
||||
goto err_unlock;
|
||||
|
||||
have_slot:
|
||||
@ -1616,7 +1614,7 @@ have_slot:
|
||||
mi = bch2_sb_resize_members(&c->disk_sb, u64s);
|
||||
if (!mi) {
|
||||
bch_err(c, "device add error: no room in superblock for member info");
|
||||
ret = -ENOSPC;
|
||||
ret = -BCH_ERR_ENOSPC_sb_members;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
|
@ -41,14 +41,14 @@
|
||||
#include "util.h"
|
||||
|
||||
#define SYSFS_OPS(type) \
|
||||
const struct sysfs_ops type ## _sysfs_ops = { \
|
||||
const struct sysfs_ops type ## _sysfs_ops = { \
|
||||
.show = type ## _show, \
|
||||
.store = type ## _store \
|
||||
}
|
||||
|
||||
#define SHOW(fn) \
|
||||
static ssize_t fn ## _to_text(struct printbuf *, \
|
||||
struct kobject *, struct attribute *);\
|
||||
struct kobject *, struct attribute *); \
|
||||
\
|
||||
static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
|
||||
char *buf) \
|
||||
@ -67,15 +67,24 @@ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
|
||||
memcpy(buf, out.buf, ret); \
|
||||
} \
|
||||
printbuf_exit(&out); \
|
||||
return ret; \
|
||||
return bch2_err_class(ret); \
|
||||
} \
|
||||
\
|
||||
static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\
|
||||
struct attribute *attr)
|
||||
|
||||
#define STORE(fn) \
|
||||
static ssize_t fn ## _store_inner(struct kobject *, struct attribute *,\
|
||||
const char *, size_t); \
|
||||
\
|
||||
static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
|
||||
const char *buf, size_t size) \
|
||||
{ \
|
||||
return bch2_err_class(fn##_store_inner(kobj, attr, buf, size)); \
|
||||
} \
|
||||
\
|
||||
static ssize_t fn ## _store_inner(struct kobject *kobj, struct attribute *attr,\
|
||||
const char *buf, size_t size)
|
||||
|
||||
#define __sysfs_attribute(_name, _mode) \
|
||||
static struct attribute sysfs_##_name = \
|
||||
@ -157,6 +166,7 @@ write_attribute(trigger_gc);
|
||||
write_attribute(trigger_discards);
|
||||
write_attribute(trigger_invalidates);
|
||||
write_attribute(prune_cache);
|
||||
write_attribute(btree_wakeup);
|
||||
rw_attribute(btree_gc_periodic);
|
||||
rw_attribute(gc_gens_pos);
|
||||
|
||||
@ -363,6 +373,21 @@ static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
prt_printf(out, "\n");
|
||||
}
|
||||
|
||||
static void bch2_btree_wakeup_all(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans;
|
||||
|
||||
mutex_lock(&c->btree_trans_lock);
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||
struct btree_bkey_cached_common *b = READ_ONCE(trans->locking);
|
||||
|
||||
if (b)
|
||||
six_lock_wakeup_all(&b->lock);
|
||||
|
||||
}
|
||||
mutex_unlock(&c->btree_trans_lock);
|
||||
}
|
||||
|
||||
SHOW(bch2_fs)
|
||||
{
|
||||
struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
|
||||
@ -480,6 +505,9 @@ STORE(bch2_fs)
|
||||
c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
|
||||
}
|
||||
|
||||
if (attr == &sysfs_btree_wakeup)
|
||||
bch2_btree_wakeup_all(c);
|
||||
|
||||
if (attr == &sysfs_trigger_gc) {
|
||||
/*
|
||||
* Full gc is currently incompatible with btree key cache:
|
||||
@ -610,6 +638,7 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_trigger_discards,
|
||||
&sysfs_trigger_invalidates,
|
||||
&sysfs_prune_cache,
|
||||
&sysfs_btree_wakeup,
|
||||
|
||||
&sysfs_gc_gens_pos,
|
||||
|
||||
|
@ -675,7 +675,7 @@ static int rand_mixed_trans(struct btree_trans *trans,
|
||||
|
||||
bch2_btree_iter_set_pos(iter, SPOS(0, pos, U32_MAX));
|
||||
|
||||
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(iter)));
|
||||
k = bch2_btree_iter_peek(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
bch_err(trans->c, "lookup error in rand_mixed: %s", bch2_err_str(ret));
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/freezer.h>
|
||||
@ -274,6 +275,27 @@ void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits)
|
||||
prt_char(out, '0' + ((v >> --nr_bits) & 1));
|
||||
}
|
||||
|
||||
void bch2_print_string_as_lines(const char *prefix, const char *lines)
|
||||
{
|
||||
const char *p;
|
||||
|
||||
if (!lines) {
|
||||
printk("%s (null)\n", prefix);
|
||||
return;
|
||||
}
|
||||
|
||||
console_lock();
|
||||
while (1) {
|
||||
p = strchrnul(lines, '\n');
|
||||
printk("%s%.*s\n", prefix, (int) (p - lines), lines);
|
||||
if (!*p)
|
||||
break;
|
||||
lines = p + 1;
|
||||
prefix = KERN_CONT;
|
||||
}
|
||||
console_unlock();
|
||||
}
|
||||
|
||||
/* time stats: */
|
||||
|
||||
static void bch2_time_stats_update_one(struct time_stats *stats,
|
||||
|
@ -355,6 +355,8 @@ u64 bch2_read_flag_list(char *, const char * const[]);
|
||||
|
||||
void bch2_prt_u64_binary(struct printbuf *, u64, unsigned);
|
||||
|
||||
void bch2_print_string_as_lines(const char *prefix, const char *lines);
|
||||
|
||||
#define NR_QUANTILES 15
|
||||
#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES)
|
||||
#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
|
||||
|
@ -350,17 +350,19 @@ err:
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
return buf.used;
|
||||
out:
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
static int bch2_xattr_get_handler(const struct xattr_handler *handler,
|
||||
|
@ -2,15 +2,15 @@
|
||||
#include <stdio.h>
|
||||
#include <linux/printbuf.h>
|
||||
|
||||
void prt_printf(struct printbuf *out, const char *fmt, ...)
|
||||
void prt_vprintf(struct printbuf *out, const char *fmt, va_list args)
|
||||
{
|
||||
va_list args;
|
||||
int len;
|
||||
|
||||
do {
|
||||
va_start(args, fmt);
|
||||
len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args);
|
||||
va_end(args);
|
||||
va_list args2;
|
||||
|
||||
va_copy(args2, args);
|
||||
len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args2);
|
||||
} while (len + 1 >= printbuf_remaining(out) &&
|
||||
!printbuf_make_room(out, len + 1));
|
||||
|
||||
@ -18,3 +18,12 @@ void prt_printf(struct printbuf *out, const char *fmt, ...)
|
||||
printbuf_remaining(out) ? printbuf_remaining(out) - 1 : 0);
|
||||
out->pos += len;
|
||||
}
|
||||
|
||||
void prt_printf(struct printbuf *out, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(out, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
60
linux/six.c
60
linux/six.c
@ -6,6 +6,7 @@
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/six.h>
|
||||
#include <linux/slab.h>
|
||||
@ -16,7 +17,7 @@
|
||||
#define EBUG_ON(cond) do {} while (0)
|
||||
#endif
|
||||
|
||||
#define six_acquire(l, t) lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
|
||||
#define six_acquire(l, t, r) lock_acquire(l, 0, t, r, 1, NULL, _RET_IP_)
|
||||
#define six_release(l) lock_release(l, _RET_IP_)
|
||||
|
||||
static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
|
||||
@ -124,7 +125,6 @@ static int __do_six_trylock_type(struct six_lock *lock,
|
||||
*/
|
||||
|
||||
if (type == SIX_LOCK_read && lock->readers) {
|
||||
retry:
|
||||
preempt_disable();
|
||||
this_cpu_inc(*lock->readers); /* signal that we own lock */
|
||||
|
||||
@ -136,27 +136,6 @@ retry:
|
||||
this_cpu_sub(*lock->readers, !ret);
|
||||
preempt_enable();
|
||||
|
||||
/*
|
||||
* If we failed from the lock path and the waiting bit wasn't
|
||||
* set, set it:
|
||||
*/
|
||||
if (!try && !ret) {
|
||||
v = old.v;
|
||||
|
||||
do {
|
||||
new.v = old.v = v;
|
||||
|
||||
if (!(old.v & l[type].lock_fail))
|
||||
goto retry;
|
||||
|
||||
if (new.waiters & (1 << type))
|
||||
break;
|
||||
|
||||
new.waiters |= 1 << type;
|
||||
} while ((v = atomic64_cmpxchg(&lock->state.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we failed because a writer was trying to take the
|
||||
* lock, issue a wakeup because we might have caused a
|
||||
@ -300,7 +279,7 @@ static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
return false;
|
||||
|
||||
if (type != SIX_LOCK_write)
|
||||
six_acquire(&lock->dep_map, 1);
|
||||
six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -337,7 +316,7 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
six_lock_wakeup(lock, old, SIX_LOCK_write);
|
||||
|
||||
if (ret)
|
||||
six_acquire(&lock->dep_map, 1);
|
||||
six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -354,7 +333,7 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
|
||||
six_set_owner(lock, type, old, current);
|
||||
if (type != SIX_LOCK_write)
|
||||
six_acquire(&lock->dep_map, 1);
|
||||
six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -436,13 +415,27 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty
|
||||
wait->lock_acquired = false;
|
||||
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
if (!(lock->state.waiters & (1 << type)))
|
||||
set_bit(waitlist_bitnr(type), (unsigned long *) &lock->state.v);
|
||||
/*
|
||||
* Retry taking the lock after taking waitlist lock, have raced with an
|
||||
* unlock:
|
||||
*/
|
||||
ret = __do_six_trylock_type(lock, type, current, false);
|
||||
if (ret <= 0)
|
||||
if (ret <= 0) {
|
||||
wait->start_time = local_clock();
|
||||
|
||||
if (!list_empty(&lock->wait_list)) {
|
||||
struct six_lock_waiter *last =
|
||||
list_last_entry(&lock->wait_list,
|
||||
struct six_lock_waiter, list);
|
||||
|
||||
if (time_before_eq64(wait->start_time, last->start_time))
|
||||
wait->start_time = last->start_time + 1;
|
||||
}
|
||||
|
||||
list_add_tail(&wait->list, &lock->wait_list);
|
||||
}
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
if (unlikely(ret > 0)) {
|
||||
@ -481,7 +474,7 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
out:
|
||||
if (ret && type == SIX_LOCK_write) {
|
||||
if (ret && type == SIX_LOCK_write && lock->state.write_locking) {
|
||||
old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1),
|
||||
&lock->state.counter);
|
||||
six_lock_wakeup(lock, old, SIX_LOCK_read);
|
||||
@ -497,8 +490,10 @@ static int __six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type
|
||||
{
|
||||
int ret;
|
||||
|
||||
wait->start_time = 0;
|
||||
|
||||
if (type != SIX_LOCK_write)
|
||||
six_acquire(&lock->dep_map, 0);
|
||||
six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read);
|
||||
|
||||
ret = do_six_trylock_type(lock, type, true) ? 0
|
||||
: __six_lock_type_slowpath(lock, type, wait, should_sleep_fn, p);
|
||||
@ -668,7 +663,7 @@ void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
const struct six_lock_vals l[] = LOCK_VALS;
|
||||
|
||||
six_acquire(&lock->dep_map, 0);
|
||||
six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read);
|
||||
|
||||
/* XXX: assert already locked, and that we don't overflow: */
|
||||
|
||||
@ -695,8 +690,13 @@ EXPORT_SYMBOL_GPL(six_lock_increment);
|
||||
|
||||
void six_lock_wakeup_all(struct six_lock *lock)
|
||||
{
|
||||
union six_lock_state state = lock->state;
|
||||
struct six_lock_waiter *w;
|
||||
|
||||
six_lock_wakeup(lock, state, SIX_LOCK_read);
|
||||
six_lock_wakeup(lock, state, SIX_LOCK_intent);
|
||||
six_lock_wakeup(lock, state, SIX_LOCK_write);
|
||||
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
list_for_each_entry(w, &lock->wait_list, list)
|
||||
wake_up_process(w->task);
|
||||
|
Loading…
Reference in New Issue
Block a user