Update bcachefs sources to f26267fc82 bcachefs: kill bset_tree->max_key

This commit is contained in:
Kent Overstreet 2021-04-04 22:12:56 -04:00
parent f46437f06e
commit 209695dedf
35 changed files with 864 additions and 991 deletions

View File

@ -1 +1 @@
9922afc8b6d6227f4193feef6442f8c3d881f78c
f26267fc82539ef3390cf2bb2bc818436dd504c7

View File

@ -690,10 +690,11 @@ struct bch_fs {
struct bch_fs_usage *usage_base;
struct bch_fs_usage __percpu *usage[JOURNAL_BUF_NR];
struct bch_fs_usage __percpu *usage_gc;
u64 __percpu *online_reserved;
/* single element mempool: */
struct mutex usage_scratch_lock;
struct bch_fs_usage *usage_scratch;
struct bch_fs_usage_online *usage_scratch;
struct io_clock io_clock[2];
@ -804,6 +805,9 @@ struct bch_fs {
struct bio_set dio_write_bioset;
struct bio_set dio_read_bioset;
atomic64_t btree_writes_nr;
atomic64_t btree_writes_sectors;
struct bio_list btree_write_error_list;
struct work_struct btree_write_error_work;
spinlock_t btree_write_error_lock;

View File

@ -1398,11 +1398,17 @@ enum bch_sb_feature {
BCH_FEATURE_NR,
};
#define BCH_SB_COMPAT() \
x(alloc_info, 0) \
x(alloc_metadata, 1) \
x(extents_above_btree_updates_done, 2) \
x(bformat_overflow_done, 3)
enum bch_sb_compat {
BCH_COMPAT_FEAT_ALLOC_INFO = 0,
BCH_COMPAT_FEAT_ALLOC_METADATA = 1,
BCH_COMPAT_FEAT_EXTENTS_ABOVE_BTREE_UPDATES_DONE = 2,
BCH_COMPAT_FEAT_BFORMAT_OVERFLOW_DONE = 3,
#define x(f, n) BCH_COMPAT_##f,
BCH_SB_COMPAT()
#undef x
BCH_COMPAT_NR,
};
/* options: */

View File

@ -698,7 +698,7 @@ static void make_bfloat(struct btree *b, struct bset_tree *t,
if (!bkey_pack_pos(max_key, b->data->max_key, b)) {
k = (void *) max_key;
bkey_init(&k->k);
k->k.p = t->max_key;
k->k.p = b->data->max_key;
}
}
@ -782,8 +782,6 @@ retry:
while (k != btree_bkey_last(b, t))
prev = k, k = bkey_next(k);
t->max_key = bkey_unpack_pos(b, prev);
if (!bkey_pack_pos(bkey_to_packed(&min_key), b->data->min_key, b)) {
bkey_init(&min_key.k);
min_key.k.p = b->data->min_key;
@ -791,7 +789,7 @@ retry:
if (!bkey_pack_pos(bkey_to_packed(&max_key), b->data->max_key, b)) {
bkey_init(&max_key.k);
max_key.k.p = t->max_key;
max_key.k.p = b->data->max_key;
}
/* Then we build the tree */
@ -970,8 +968,6 @@ static void ro_aux_tree_fix_invalidated_key(struct btree *b,
min_key.u64s = max_key.u64s = 0;
if (bkey_next(k) == btree_bkey_last(b, t)) {
t->max_key = bkey_unpack_pos(b, k);
for (j = 1; j < t->size; j = j * 2 + 1)
make_bfloat(b, t, j, &min_key, &max_key);
}
@ -1311,16 +1307,6 @@ struct bkey_packed *__bch2_bset_search(struct btree *b,
case BSET_RW_AUX_TREE:
return bset_search_write_set(b, t, search);
case BSET_RO_AUX_TREE:
/*
* Each node in the auxiliary search tree covers a certain range
* of bits, and keys above and below the set it covers might
* differ outside those bits - so we have to special case the
* start and end - handle that here:
*/
if (bpos_cmp(*search, t->max_key) > 0)
return btree_bkey_last(b, t);
return bset_search_tree(b, t, search, lossy_packed_search);
default:
unreachable();
@ -1357,23 +1343,6 @@ struct bkey_packed *bch2_bset_search_linear(struct btree *b,
return m;
}
/*
* Returns the first key greater than or equal to @search
*/
static __always_inline __flatten
struct bkey_packed *bch2_bset_search(struct btree *b,
struct bset_tree *t,
struct bpos *search,
struct bkey_packed *packed_search,
const struct bkey_packed *lossy_packed_search)
{
struct bkey_packed *m = __bch2_bset_search(b, t, search,
lossy_packed_search);
return bch2_bset_search_linear(b, t, search,
packed_search, lossy_packed_search, m);
}
/* Btree node iterator */
static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter,
@ -1469,6 +1438,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
unsigned i;
EBUG_ON(bpos_cmp(*search, b->data->min_key) < 0);
EBUG_ON(bpos_cmp(*search, b->data->max_key) > 0);
bset_aux_tree_verify(b);
memset(iter, 0, sizeof(*iter));

View File

@ -906,136 +906,6 @@ out:
return b;
}
struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
struct btree_iter *iter,
struct btree *b,
enum btree_node_sibling sib)
{
struct btree_trans *trans = iter->trans;
struct btree *parent;
struct btree_node_iter node_iter;
struct bkey_packed *k;
struct bkey_buf tmp;
struct btree *ret = NULL;
unsigned level = b->c.level;
bch2_bkey_buf_init(&tmp);
parent = btree_iter_node(iter, level + 1);
if (!parent)
return NULL;
/*
* There's a corner case where a btree_iter might have a node locked
* that is just outside its current pos - when
* bch2_btree_iter_set_pos_same_leaf() gets to the end of the node.
*
* But the lock ordering checks in __bch2_btree_node_lock() go off of
* iter->pos, not the node's key: so if the iterator is marked as
* needing to be traversed, we risk deadlock if we don't bail out here:
*/
if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
return ERR_PTR(-EINTR);
if (!bch2_btree_node_relock(iter, level + 1)) {
ret = ERR_PTR(-EINTR);
goto out;
}
node_iter = iter->l[parent->c.level].iter;
k = bch2_btree_node_iter_peek_all(&node_iter, parent);
BUG_ON(bkey_cmp_left_packed(parent, k, &b->key.k.p));
k = sib == btree_prev_sib
? bch2_btree_node_iter_prev(&node_iter, parent)
: (bch2_btree_node_iter_advance(&node_iter, parent),
bch2_btree_node_iter_peek(&node_iter, parent));
if (!k)
goto out;
bch2_bkey_buf_unpack(&tmp, c, parent, k);
ret = bch2_btree_node_get(c, iter, tmp.k, level,
SIX_LOCK_intent, _THIS_IP_);
if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
struct btree_iter *linked;
if (!bch2_btree_node_relock(iter, level + 1))
goto out;
/*
* We might have got -EINTR because trylock failed, and we're
* holding other locks that would cause us to deadlock:
*/
trans_for_each_iter(trans, linked)
if (btree_iter_lock_cmp(iter, linked) < 0)
__bch2_btree_iter_unlock(linked);
if (sib == btree_prev_sib)
btree_node_unlock(iter, level);
ret = bch2_btree_node_get(c, iter, tmp.k, level,
SIX_LOCK_intent, _THIS_IP_);
/*
* before btree_iter_relock() calls btree_iter_verify_locks():
*/
if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
btree_node_unlock(iter, level + 1);
if (!bch2_btree_node_relock(iter, level)) {
btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
if (!IS_ERR(ret)) {
six_unlock_intent(&ret->c.lock);
ret = ERR_PTR(-EINTR);
}
}
bch2_trans_relock(trans);
}
out:
if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
btree_node_unlock(iter, level + 1);
if (PTR_ERR_OR_ZERO(ret) == -EINTR)
bch2_btree_iter_upgrade(iter, level + 2);
BUG_ON(!IS_ERR(ret) && !btree_node_locked(iter, level));
if (!IS_ERR_OR_NULL(ret)) {
struct btree *n1 = ret, *n2 = b;
if (sib != btree_prev_sib)
swap(n1, n2);
if (bpos_cmp(bpos_successor(n1->key.k.p),
n2->data->min_key)) {
char buf1[200], buf2[200];
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&n1->key));
bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&n2->key));
bch2_fs_inconsistent(c, "btree topology error at btree %s level %u:\n"
"prev: %s\n"
"next: %s\n",
bch2_btree_ids[iter->btree_id], level,
buf1, buf2);
six_unlock_intent(&ret->c.lock);
ret = NULL;
}
}
bch2_btree_trans_verify_locks(trans);
bch2_bkey_buf_exit(&tmp, c);
return ret;
}
void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
const struct bkey_i *k,
enum btree_id btree_id, unsigned level)
@ -1075,7 +945,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
" format: u64s %u fields %u %u %u %u %u\n"
" unpack fn len: %u\n"
" bytes used %zu/%zu (%zu%% full)\n"
" sib u64s: %u, %u (merge threshold %zu)\n"
" sib u64s: %u, %u (merge threshold %u)\n"
" nr packed keys %u\n"
" nr unpacked keys %u\n"
" floats %zu\n"
@ -1092,7 +962,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
b->nr.live_u64s * 100 / btree_max_u64s(c),
b->sib_u64s[0],
b->sib_u64s[1],
BTREE_FOREGROUND_MERGE_THRESHOLD(c),
c->btree_foreground_merge_threshold,
b->nr.packed_keys,
b->nr.unpacked_keys,
stats.floats,

View File

@ -26,9 +26,6 @@ struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
enum btree_id, unsigned, bool);
struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
struct btree *, enum btree_node_sibling);
void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
const struct bkey_i *, enum btree_id, unsigned);
@ -92,7 +89,7 @@ static inline unsigned btree_blocks(struct bch_fs *c)
#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3)
#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \
(BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \
(BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2))
(BTREE_FOREGROUND_MERGE_THRESHOLD(c) >> 2))
#define btree_node_root(_c, _b) ((_c)->btree_roots[(_b)->c.btree_id].b)

View File

@ -779,7 +779,7 @@ static int bch2_gc_done(struct bch_fs *c,
{
struct bch_dev *ca;
bool verify = (!initial ||
(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)));
(c->sb.compat & (1ULL << BCH_COMPAT_alloc_info)));
unsigned i, dev;
int ret = 0;
@ -1297,11 +1297,10 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
return;
}
as = bch2_btree_update_start(iter->trans, iter->btree_id,
as = bch2_btree_update_start(iter, old_nodes[0]->c.level,
btree_update_reserve_required(c, parent) + nr_old_nodes,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE,
NULL);
BTREE_INSERT_USE_RESERVE);
if (IS_ERR(as)) {
trace_btree_gc_coalesce_fail(c,
BTREE_GC_COALESCE_FAIL_RESERVE_GET);

View File

@ -1547,6 +1547,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
b->written += sectors_to_write;
atomic64_inc(&c->btree_writes_nr);
atomic64_add(sectors_to_write, &c->btree_writes_sectors);
/* XXX: submitting IO with btree locks held: */
bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, k.k);
bch2_bkey_buf_exit(&k, c);

View File

@ -12,6 +12,7 @@
#include "error.h"
#include "extents.h"
#include "journal.h"
#include "replicas.h"
#include <linux/prefetch.h>
#include <trace/events/bcachefs.h>
@ -238,6 +239,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
struct btree_iter *linked, *deadlock_iter = NULL;
u64 start_time = local_clock();
unsigned reason = 9;
bool ret;
/* Check if it's safe to block: */
trans_for_each_iter(trans, linked) {
@ -258,17 +260,12 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
*/
if (type == SIX_LOCK_intent &&
linked->nodes_locked != linked->nodes_intent_locked) {
if (!(trans->nounlock)) {
linked->locks_want = max_t(unsigned,
linked->locks_want,
__fls(linked->nodes_locked) + 1);
if (!btree_iter_get_locks(linked, true, false)) {
deadlock_iter = linked;
reason = 1;
}
} else {
linked->locks_want = max_t(unsigned,
linked->locks_want,
__fls(linked->nodes_locked) + 1);
if (!btree_iter_get_locks(linked, true, false)) {
deadlock_iter = linked;
reason = 2;
reason = 1;
}
}
@ -298,18 +295,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
* we're about to lock, it must have the ancestors locked too:
*/
if (level > __fls(linked->nodes_locked)) {
if (!(trans->nounlock)) {
linked->locks_want =
max(level + 1, max_t(unsigned,
linked->locks_want,
iter->locks_want));
if (!btree_iter_get_locks(linked, true, false)) {
deadlock_iter = linked;
reason = 5;
}
} else {
linked->locks_want =
max(level + 1, max_t(unsigned,
linked->locks_want,
iter->locks_want));
if (!btree_iter_get_locks(linked, true, false)) {
deadlock_iter = linked;
reason = 6;
reason = 5;
}
}
@ -346,12 +338,23 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
if (six_trylock_type(&b->c.lock, type))
return true;
if (six_lock_type(&b->c.lock, type, should_sleep_fn, p))
return false;
#ifdef CONFIG_BCACHEFS_DEBUG
trans->locking_iter_idx = iter->idx;
trans->locking_pos = pos;
trans->locking_btree_id = iter->btree_id;
trans->locking_level = level;
trans->locking = b;
#endif
bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
start_time);
return true;
ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0;
#ifdef CONFIG_BCACHEFS_DEBUG
trans->locking = NULL;
#endif
if (ret)
bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
start_time);
return ret;
}
/* Btree iterator locking: */
@ -421,50 +424,25 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
return false;
}
bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *iter,
unsigned new_locks_want)
void __bch2_btree_iter_downgrade(struct btree_iter *iter,
unsigned new_locks_want)
{
unsigned l = iter->level;
unsigned l;
EBUG_ON(iter->locks_want >= new_locks_want);
EBUG_ON(iter->locks_want < new_locks_want);
iter->locks_want = new_locks_want;
do {
if (!btree_iter_node(iter, l))
break;
if (!bch2_btree_node_upgrade(iter, l)) {
iter->locks_want = l;
return false;
}
l++;
} while (l < iter->locks_want);
return true;
}
void __bch2_btree_iter_downgrade(struct btree_iter *iter,
unsigned downgrade_to)
{
unsigned l, new_locks_want = downgrade_to ?:
(iter->flags & BTREE_ITER_INTENT ? 1 : 0);
if (iter->locks_want < downgrade_to) {
iter->locks_want = new_locks_want;
while (iter->nodes_locked &&
(l = __fls(iter->nodes_locked)) >= iter->locks_want) {
if (l > iter->level) {
btree_node_unlock(iter, l);
} else {
if (btree_node_intent_locked(iter, l)) {
six_lock_downgrade(&iter->l[l].b->c.lock);
iter->nodes_intent_locked ^= 1 << l;
}
break;
while (iter->nodes_locked &&
(l = __fls(iter->nodes_locked)) >= iter->locks_want) {
if (l > iter->level) {
btree_node_unlock(iter, l);
} else {
if (btree_node_intent_locked(iter, l)) {
six_lock_downgrade(&iter->l[l].b->c.lock);
iter->nodes_intent_locked ^= 1 << l;
}
break;
}
}
@ -484,13 +462,12 @@ void bch2_trans_downgrade(struct btree_trans *trans)
bool bch2_trans_relock(struct btree_trans *trans)
{
struct btree_iter *iter;
bool ret = true;
trans_for_each_iter(trans, iter)
if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
ret &= bch2_btree_iter_relock(iter, true);
return ret;
if (btree_iter_keep(trans, iter) &&
!bch2_btree_iter_relock(iter, true))
return false;
return true;
}
void bch2_trans_unlock(struct btree_trans *trans)
@ -1027,7 +1004,7 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
trans_for_each_iter(iter->trans, linked)
if (linked->l[level].b == b) {
__btree_node_unlock(linked, level);
btree_node_unlock(linked, level);
linked->l[level].b = BTREE_ITER_NO_NODE_DROP;
}
}
@ -2008,6 +1985,8 @@ static inline void btree_iter_copy(struct btree_iter *dst,
struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
unsigned btree_id, struct bpos pos,
unsigned locks_want,
unsigned depth,
unsigned flags)
{
struct btree_iter *iter, *best = NULL;
@ -2020,10 +1999,6 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
pos.snapshot = btree_type_has_snapshots(btree_id)
? U32_MAX : 0;
/* We always want a fresh iterator for node iterators: */
if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES)
goto alloc_iter;
trans_for_each_iter(trans, iter) {
if (btree_iter_type(iter) != (flags & BTREE_ITER_TYPE))
continue;
@ -2038,7 +2013,7 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
best = iter;
}
alloc_iter:
if (!best) {
iter = btree_trans_iter_alloc(trans);
bch2_btree_iter_init(trans, iter, btree_id);
@ -2062,10 +2037,25 @@ alloc_iter:
iter->snapshot = pos.snapshot;
if (!(iter->flags & BTREE_ITER_INTENT))
bch2_btree_iter_downgrade(iter);
else if (!iter->locks_want)
__bch2_btree_iter_upgrade_nounlock(iter, 1);
locks_want = min(locks_want, BTREE_MAX_DEPTH);
if (locks_want > iter->locks_want) {
iter->locks_want = locks_want;
btree_iter_get_locks(iter, true, false);
} else if (locks_want < iter->locks_want) {
__bch2_btree_iter_downgrade(iter, locks_want);
}
while (iter->level < depth) {
btree_node_unlock(iter, iter->level);
iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT;
iter->level++;
}
while (iter->level > depth)
iter->l[--iter->level].b = BTREE_ITER_NO_NODE_INIT;
iter->min_depth = depth;
bch2_btree_iter_set_pos(iter, pos);
btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
@ -2082,21 +2072,16 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
{
struct btree_iter *iter =
__bch2_trans_get_iter(trans, btree_id, pos,
BTREE_ITER_NODES|
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_ALL_SNAPSHOTS|
flags);
unsigned i;
locks_want, depth,
BTREE_ITER_NODES|
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_ALL_SNAPSHOTS|
flags);
BUG_ON(bkey_cmp(iter->pos, pos));
iter->locks_want = locks_want;
iter->level = depth;
iter->min_depth = depth;
for (i = 0; i < ARRAY_SIZE(iter->l); i++)
iter->l[i].b = NULL;
iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT;
BUG_ON(iter->locks_want != min(locks_want, BTREE_MAX_DEPTH));
BUG_ON(iter->level != depth);
BUG_ON(iter->min_depth != depth);
iter->ip_allocated = _RET_IP_;
return iter;
@ -2304,11 +2289,24 @@ bch2_btree_iter_node_to_text(struct printbuf *out,
struct btree_bkey_cached_common *_b,
enum btree_iter_type type)
{
pr_buf(out, " %px l=%u %s:",
_b, _b->level, bch2_btree_ids[_b->btree_id]);
pr_buf(out, " l=%u %s:",
_b->level, bch2_btree_ids[_b->btree_id]);
bch2_bpos_to_text(out, btree_node_pos(_b, type));
}
#ifdef CONFIG_BCACHEFS_DEBUG
static bool trans_has_btree_nodes_locked(struct btree_trans *trans)
{
struct btree_iter *iter;
trans_for_each_iter(trans, iter)
if (btree_iter_type(iter) != BTREE_ITER_CACHED &&
iter->nodes_locked)
return true;
return false;
}
#endif
void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
{
#ifdef CONFIG_BCACHEFS_DEBUG
@ -2319,14 +2317,18 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
mutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) {
pr_buf(out, "%i %px %ps\n", trans->pid, trans, (void *) trans->ip);
if (!trans_has_btree_nodes_locked(trans))
continue;
pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip);
trans_for_each_iter(trans, iter) {
if (!iter->nodes_locked)
continue;
pr_buf(out, " iter %u %s:",
pr_buf(out, " iter %u %c %s:",
iter->idx,
btree_iter_type(iter) == BTREE_ITER_CACHED ? 'c' : 'b',
bch2_btree_ids[iter->btree_id]);
bch2_bpos_to_text(out, iter->pos);
pr_buf(out, "\n");
@ -2345,17 +2347,18 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
b = READ_ONCE(trans->locking);
if (b) {
pr_buf(out, " locking iter %u l=%u %s:",
iter = &trans->iters[trans->locking_iter_idx];
pr_buf(out, " locking iter %u %c l=%u %s:",
trans->locking_iter_idx,
btree_iter_type(iter) == BTREE_ITER_CACHED ? 'c' : 'b',
trans->locking_level,
bch2_btree_ids[trans->locking_btree_id]);
bch2_bpos_to_text(out, trans->locking_pos);
pr_buf(out, " node ");
bch2_btree_iter_node_to_text(out,
(void *) b,
btree_iter_type(&trans->iters[trans->locking_iter_idx]));
btree_iter_type(iter));
pr_buf(out, "\n");
}
}

View File

@ -116,7 +116,6 @@ bool bch2_trans_relock(struct btree_trans *);
void bch2_trans_unlock(struct btree_trans *);
bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
unsigned new_locks_want)
@ -124,9 +123,7 @@ static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
return iter->locks_want < new_locks_want
? (!iter->trans->nounlock
? __bch2_btree_iter_upgrade(iter, new_locks_want)
: __bch2_btree_iter_upgrade_nounlock(iter, new_locks_want))
? __bch2_btree_iter_upgrade(iter, new_locks_want)
: iter->uptodate <= BTREE_ITER_NEED_PEEK;
}
@ -134,8 +131,10 @@ void __bch2_btree_iter_downgrade(struct btree_iter *, unsigned);
static inline void bch2_btree_iter_downgrade(struct btree_iter *iter)
{
if (iter->locks_want > (iter->flags & BTREE_ITER_INTENT) ? 1 : 0)
__bch2_btree_iter_downgrade(iter, 0);
unsigned new_locks_want = (iter->flags & BTREE_ITER_INTENT ? 1 : 0);
if (iter->locks_want > new_locks_want)
__bch2_btree_iter_downgrade(iter, new_locks_want);
}
void bch2_trans_downgrade(struct btree_trans *);
@ -175,8 +174,11 @@ static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos
if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
new_pos.snapshot = iter->snapshot;
bkey_init(&iter->k);
iter->k.p = iter->pos = new_pos;
iter->k.type = KEY_TYPE_deleted;
iter->k.p.inode = iter->pos.inode = new_pos.inode;
iter->k.p.offset = iter->pos.offset = new_pos.offset;
iter->k.p.snapshot = iter->pos.snapshot = new_pos.snapshot;
iter->k.size = 0;
}
/* Sort order for locking btree iterators: */
@ -261,14 +263,17 @@ int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
void bch2_trans_unlink_iters(struct btree_trans *);
struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id,
struct bpos, unsigned);
struct bpos, unsigned,
unsigned, unsigned);
static inline struct btree_iter *
bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id,
struct bpos pos, unsigned flags)
{
struct btree_iter *iter =
__bch2_trans_get_iter(trans, btree_id, pos, flags);
__bch2_trans_get_iter(trans, btree_id, pos,
(flags & BTREE_ITER_INTENT) != 0, 0,
flags);
iter->ip_allocated = _THIS_IP_;
return iter;
}

View File

@ -352,6 +352,7 @@ err:
static int btree_key_cache_flush_pos(struct btree_trans *trans,
struct bkey_cached_key key,
u64 journal_seq,
unsigned commit_flags,
bool evict)
{
struct bch_fs *c = trans->c;
@ -390,12 +391,17 @@ retry:
BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_RESERVED|
BTREE_INSERT_JOURNAL_RECLAIM);
(ck->journal.seq == journal_last_seq(j)
? BTREE_INSERT_JOURNAL_RESERVED
: 0)|
commit_flags);
err:
if (ret == -EINTR)
goto retry;
if (ret == -EAGAIN)
goto out;
if (ret) {
bch2_fs_fatal_err_on(!bch2_journal_error(j), c,
"error flushing key cache: %i", ret);
@ -438,15 +444,15 @@ out:
return ret;
}
static void btree_key_cache_journal_flush(struct journal *j,
struct journal_entry_pin *pin,
u64 seq)
int bch2_btree_key_cache_journal_flush(struct journal *j,
struct journal_entry_pin *pin, u64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bkey_cached *ck =
container_of(pin, struct bkey_cached, journal);
struct bkey_cached_key key;
struct btree_trans trans;
int ret = 0;
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
@ -461,10 +467,13 @@ static void btree_key_cache_journal_flush(struct journal *j,
six_unlock_read(&ck->c.lock);
bch2_trans_init(&trans, c, 0, 0);
btree_key_cache_flush_pos(&trans, key, seq, false);
ret = btree_key_cache_flush_pos(&trans, key, seq,
BTREE_INSERT_JOURNAL_RECLAIM, false);
bch2_trans_exit(&trans);
unlock:
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
return ret;
}
/*
@ -480,7 +489,7 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans,
if (!bch2_btree_key_cache_find(c, id, pos))
return 0;
return btree_key_cache_flush_pos(trans, key, 0, true);
return btree_key_cache_flush_pos(trans, key, 0, 0, true);
}
bool bch2_btree_insert_key_cached(struct btree_trans *trans,
@ -517,7 +526,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
}
bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
&ck->journal, btree_key_cache_journal_flush);
&ck->journal, bch2_btree_key_cache_journal_flush);
if (kick_reclaim)
journal_reclaim_kick(&c->journal);
@ -581,9 +590,14 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
do {
struct rhash_head *pos, *next;
rht_for_each_entry_safe(ck, pos, next, tbl, bc->shrink_iter, hash) {
pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter));
while (!rht_is_a_nulls(pos)) {
next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
ck = container_of(pos, struct bkey_cached, hash);
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
continue;
goto next;
if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
@ -595,6 +609,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
scanned++;
if (scanned >= nr)
break;
next:
pos = next;
}
bc->shrink_iter++;

View File

@ -1,15 +1,6 @@
#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
#define _BCACHEFS_BTREE_KEY_CACHE_H
static inline size_t bch2_nr_btree_keys_want_flush(struct bch_fs *c)
{
size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
size_t max_dirty = nr_keys / 4;
return max_t(ssize_t, 0, nr_dirty - max_dirty);
}
static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
{
size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
@ -29,6 +20,9 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
test_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
}
int bch2_btree_key_cache_journal_flush(struct journal *,
struct journal_entry_pin *, u64);
struct bkey_cached *
bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos);

View File

@ -95,7 +95,7 @@ btree_lock_want(struct btree_iter *iter, int level)
return BTREE_NODE_UNLOCKED;
}
static inline void __btree_node_unlock(struct btree_iter *iter, unsigned level)
static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
{
int lock_type = btree_node_locked_type(iter, level);
@ -106,13 +106,6 @@ static inline void __btree_node_unlock(struct btree_iter *iter, unsigned level)
mark_btree_node_unlocked(iter, level);
}
static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
{
EBUG_ON(!level && iter->trans->nounlock);
__btree_node_unlock(iter, level);
}
static inline void __bch2_btree_iter_unlock(struct btree_iter *iter)
{
btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
@ -187,27 +180,14 @@ static inline bool btree_node_lock(struct btree *b,
unsigned long ip)
{
struct btree_trans *trans = iter->trans;
bool ret;
EBUG_ON(level >= BTREE_MAX_DEPTH);
EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
#ifdef CONFIG_BCACHEFS_DEBUG
trans->locking = b;
trans->locking_iter_idx = iter->idx;
trans->locking_pos = pos;
trans->locking_btree_id = iter->btree_id;
trans->locking_level = level;
#endif
ret = likely(six_trylock_type(&b->c.lock, type)) ||
return likely(six_trylock_type(&b->c.lock, type)) ||
btree_node_lock_increment(trans, b, level, type) ||
__bch2_btree_node_lock(b, pos, level, iter, type,
should_sleep_fn, p, ip);
#ifdef CONFIG_BCACHEFS_DEBUG
trans->locking = NULL;
#endif
return ret;
}
bool __bch2_btree_node_relock(struct btree_iter *, unsigned);

View File

@ -47,8 +47,6 @@ struct bset_tree {
u16 data_offset;
u16 aux_data_offset;
u16 end_offset;
struct bpos max_key;
};
struct btree_write {
@ -98,6 +96,11 @@ struct btree {
u8 byte_order;
u8 unpack_fn_len;
struct btree_write writes[2];
/* Key/pointer for this btree node */
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
/*
* XXX: add a delete sequence number, so when bch2_btree_node_relock()
* fails because the lock sequence number has changed - i.e. the
@ -128,11 +131,6 @@ struct btree {
/* lru list */
struct list_head list;
struct btree_write writes[2];
/* Key/pointer for this btree node */
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
};
struct btree_cache {
@ -372,7 +370,6 @@ struct btree_trans {
u8 nr_updates2;
unsigned used_mempool:1;
unsigned error:1;
unsigned nounlock:1;
unsigned in_traverse_all:1;
u64 iters_linked;

View File

@ -437,10 +437,6 @@ static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes,
goto err_free;
}
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
if (ret)
goto err_free;
as->prealloc_nodes[as->nr_prealloc_nodes++] = b;
}
@ -458,6 +454,10 @@ static void bch2_btree_update_free(struct btree_update *as)
{
struct bch_fs *c = as->c;
if (as->took_gc_lock)
up_read(&c->gc_lock);
as->took_gc_lock = false;
bch2_journal_preres_put(&c->journal, &as->journal_preres);
bch2_journal_pin_drop(&c->journal, &as->journal);
@ -893,24 +893,33 @@ void bch2_btree_update_done(struct btree_update *as)
{
BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
if (as->took_gc_lock)
up_read(&as->c->gc_lock);
as->took_gc_lock = false;
bch2_btree_reserve_put(as);
continue_at(&as->cl, btree_update_set_nodes_written, system_freezable_wq);
}
struct btree_update *
bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
unsigned nr_nodes, unsigned flags,
struct closure *cl)
bch2_btree_update_start(struct btree_iter *iter, unsigned level,
unsigned nr_nodes, unsigned flags)
{
struct btree_trans *trans = iter->trans;
struct bch_fs *c = trans->c;
struct btree_update *as;
struct closure cl;
int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
? BCH_DISK_RESERVATION_NOFAIL : 0;
int journal_flags = (flags & BTREE_INSERT_JOURNAL_RESERVED)
? JOURNAL_RES_GET_RECLAIM : 0;
int journal_flags = 0;
int ret = 0;
if (flags & BTREE_INSERT_JOURNAL_RESERVED)
journal_flags |= JOURNAL_RES_GET_RESERVED;
closure_init_stack(&cl);
retry:
/*
* This check isn't necessary for correctness - it's just to potentially
* prevent us from doing a lot of work that'll end up being wasted:
@ -919,12 +928,36 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
if (ret)
return ERR_PTR(ret);
/*
* XXX: figure out how far we might need to split,
* instead of locking/reserving all the way to the root:
*/
if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
trace_trans_restart_iter_upgrade(trans->ip);
return ERR_PTR(-EINTR);
}
if (flags & BTREE_INSERT_GC_LOCK_HELD)
lockdep_assert_held(&c->gc_lock);
else if (!down_read_trylock(&c->gc_lock)) {
if (flags & BTREE_INSERT_NOUNLOCK)
return ERR_PTR(-EINTR);
bch2_trans_unlock(trans);
down_read(&c->gc_lock);
if (!bch2_trans_relock(trans)) {
up_read(&c->gc_lock);
return ERR_PTR(-EINTR);
}
}
as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO);
memset(as, 0, sizeof(*as));
closure_init(&as->cl, NULL);
as->c = c;
as->mode = BTREE_INTERIOR_NO_UPDATE;
as->btree_id = id;
as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD);
as->btree_id = iter->btree_id;
INIT_LIST_HEAD(&as->list);
INIT_LIST_HEAD(&as->unwritten_list);
INIT_LIST_HEAD(&as->write_blocked_list);
@ -936,16 +969,25 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
BTREE_UPDATE_JOURNAL_RES,
journal_flags|JOURNAL_RES_GET_NONBLOCK);
if (ret == -EAGAIN) {
if (flags & BTREE_INSERT_NOUNLOCK)
return ERR_PTR(-EINTR);
/*
* this would be cleaner if bch2_journal_preres_get() took a
* closure argument
*/
if (flags & BTREE_INSERT_NOUNLOCK) {
ret = -EINTR;
goto err;
}
bch2_trans_unlock(trans);
if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
goto err;
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
BTREE_UPDATE_JOURNAL_RES,
journal_flags);
if (ret)
return ERR_PTR(ret);
goto err;
if (!bch2_trans_relock(trans)) {
ret = -EINTR;
@ -960,7 +1002,8 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
if (ret)
goto err;
ret = bch2_btree_reserve_get(as, nr_nodes, flags, cl);
ret = bch2_btree_reserve_get(as, nr_nodes, flags,
!(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
if (ret)
goto err;
@ -975,6 +1018,18 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
return as;
err:
bch2_btree_update_free(as);
if (ret == -EAGAIN) {
BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
bch2_trans_unlock(trans);
closure_sync(&cl);
ret = -EINTR;
}
if (ret == -EINTR && bch2_trans_relock(trans))
goto retry;
return ERR_PTR(ret);
}
@ -1419,6 +1474,7 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
lockdep_assert_held(&c->gc_lock);
BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
BUG_ON(!b->c.level);
BUG_ON(!as || as->b);
@ -1450,14 +1506,6 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
bch2_btree_node_unlock_write(b, iter);
btree_node_interior_verify(c, b);
/*
* when called from the btree_split path the new nodes aren't added to
* the btree iterator yet, so the merge path's unlock/wait/relock dance
* won't work:
*/
bch2_foreground_maybe_merge(c, iter, b->c.level,
flags|BTREE_INSERT_NOUNLOCK);
return;
split:
btree_split(as, b, iter, keys, flags);
@ -1466,109 +1514,73 @@ split:
int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
unsigned flags)
{
struct btree_trans *trans = iter->trans;
struct btree *b = iter_l(iter)->b;
struct btree_update *as;
struct closure cl;
unsigned l;
int ret = 0;
closure_init_stack(&cl);
/* Hack, because gc and splitting nodes doesn't mix yet: */
if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
!down_read_trylock(&c->gc_lock)) {
if (flags & BTREE_INSERT_NOUNLOCK) {
trace_transaction_restart_ip(trans->ip, _THIS_IP_);
return -EINTR;
}
bch2_trans_unlock(trans);
down_read(&c->gc_lock);
if (!bch2_trans_relock(trans))
ret = -EINTR;
}
/*
* XXX: figure out how far we might need to split,
* instead of locking/reserving all the way to the root:
*/
if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
trace_trans_restart_iter_upgrade(trans->ip);
ret = -EINTR;
goto out;
}
as = bch2_btree_update_start(trans, iter->btree_id,
btree_update_reserve_required(c, b), flags,
!(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
if (IS_ERR(as)) {
ret = PTR_ERR(as);
if (ret == -EAGAIN) {
BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
bch2_trans_unlock(trans);
ret = -EINTR;
trace_transaction_restart_ip(trans->ip, _THIS_IP_);
}
goto out;
}
as = bch2_btree_update_start(iter, iter->level,
btree_update_reserve_required(c, b), flags);
if (IS_ERR(as))
return PTR_ERR(as);
btree_split(as, b, iter, NULL, flags);
bch2_btree_update_done(as);
/*
* We haven't successfully inserted yet, so don't downgrade all the way
* back to read locks;
*/
__bch2_btree_iter_downgrade(iter, 1);
out:
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
up_read(&c->gc_lock);
closure_sync(&cl);
for (l = iter->level + 1; btree_iter_node(iter, l) && !ret; l++)
ret = bch2_foreground_maybe_merge(c, iter, l, flags);
return ret;
}
void __bch2_foreground_maybe_merge(struct bch_fs *c,
struct btree_iter *iter,
unsigned level,
unsigned flags,
enum btree_node_sibling sib)
int __bch2_foreground_maybe_merge(struct bch_fs *c,
struct btree_iter *iter,
unsigned level,
unsigned flags,
enum btree_node_sibling sib)
{
struct btree_trans *trans = iter->trans;
struct btree_iter *sib_iter = NULL;
struct btree_update *as;
struct bkey_format_state new_s;
struct bkey_format new_f;
struct bkey_i delete;
struct btree *b, *m, *n, *prev, *next, *parent;
struct closure cl;
struct bpos sib_pos;
size_t sib_u64s;
int ret = 0;
int ret = 0, ret2 = 0;
BUG_ON(!btree_node_locked(iter, level));
closure_init_stack(&cl);
retry:
ret = bch2_btree_iter_traverse(iter);
if (ret)
goto err;
BUG_ON(!btree_node_locked(iter, level));
b = iter->l[level].b;
parent = btree_node_parent(iter, b);
if (!parent)
if ((sib == btree_prev_sib && !bpos_cmp(b->data->min_key, POS_MIN)) ||
(sib == btree_next_sib && !bpos_cmp(b->data->max_key, POS_MAX))) {
b->sib_u64s[sib] = U16_MAX;
goto out;
if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c))
goto out;
/* XXX: can't be holding read locks */
m = bch2_btree_node_get_sibling(c, iter, b, sib);
if (IS_ERR(m)) {
ret = PTR_ERR(m);
goto err;
}
/* NULL means no sibling: */
if (!m) {
sib_pos = sib == btree_prev_sib
? bpos_predecessor(b->data->min_key)
: bpos_successor(b->data->max_key);
sib_iter = bch2_trans_get_node_iter(trans, iter->btree_id,
sib_pos, U8_MAX, level,
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(sib_iter);
if (ret)
goto err;
m = sib_iter->l[level].b;
if (btree_node_parent(iter, b) !=
btree_node_parent(sib_iter, m)) {
b->sib_u64s[sib] = U16_MAX;
goto out;
}
@ -1581,6 +1593,8 @@ retry:
next = m;
}
BUG_ON(bkey_cmp(bpos_successor(prev->data->max_key), next->data->min_key));
bch2_bkey_format_init(&new_s);
bch2_bkey_format_add_pos(&new_s, prev->data->min_key);
__bch2_btree_calc_format(&new_s, prev);
@ -1598,33 +1612,21 @@ retry:
}
sib_u64s = min(sib_u64s, btree_max_u64s(c));
sib_u64s = min(sib_u64s, (size_t) U16_MAX - 1);
b->sib_u64s[sib] = sib_u64s;
if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) {
six_unlock_intent(&m->c.lock);
if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
goto out;
}
/* We're changing btree topology, doesn't mix with gc: */
if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
!down_read_trylock(&c->gc_lock))
goto err_cycle_gc_lock;
if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
ret = -EINTR;
goto err_unlock;
}
as = bch2_btree_update_start(trans, iter->btree_id,
parent = btree_node_parent(iter, b);
as = bch2_btree_update_start(iter, level,
btree_update_reserve_required(c, parent) + 1,
flags|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE,
!(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
if (IS_ERR(as)) {
ret = PTR_ERR(as);
goto err_unlock;
}
BTREE_INSERT_USE_RESERVE);
ret = PTR_ERR_OR_ZERO(as);
if (ret)
goto err;
trace_btree_merge(c, b);
@ -1658,6 +1660,7 @@ retry:
bch2_btree_update_get_open_buckets(as, n);
six_lock_increment(&b->c.lock, SIX_LOCK_intent);
six_lock_increment(&m->c.lock, SIX_LOCK_intent);
bch2_btree_iter_node_drop(iter, b);
bch2_btree_iter_node_drop(iter, m);
@ -1671,11 +1674,9 @@ retry:
six_unlock_intent(&n->c.lock);
bch2_btree_update_done(as);
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
up_read(&c->gc_lock);
out:
bch2_btree_trans_verify_locks(trans);
bch2_trans_iter_free(trans, sib_iter);
/*
* Don't downgrade locks here: we're called after successful insert,
@ -1686,58 +1687,56 @@ out:
* split path, and downgrading to read locks in there is potentially
* confusing:
*/
closure_sync(&cl);
return;
err_cycle_gc_lock:
six_unlock_intent(&m->c.lock);
if (flags & BTREE_INSERT_NOUNLOCK)
goto out;
bch2_trans_unlock(trans);
down_read(&c->gc_lock);
up_read(&c->gc_lock);
ret = -EINTR;
goto err;
err_unlock:
six_unlock_intent(&m->c.lock);
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
up_read(&c->gc_lock);
return ret ?: ret2;
err:
BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK));
if ((ret == -EAGAIN || ret == -EINTR) &&
!(flags & BTREE_INSERT_NOUNLOCK)) {
bch2_trans_unlock(trans);
closure_sync(&cl);
ret = bch2_btree_iter_traverse(iter);
if (ret)
goto out;
bch2_trans_iter_put(trans, sib_iter);
sib_iter = NULL;
if (ret == -EINTR && bch2_trans_relock(trans))
goto retry;
if (ret == -EINTR && !(flags & BTREE_INSERT_NOUNLOCK)) {
ret2 = ret;
ret = bch2_btree_iter_traverse_all(trans);
if (!ret)
goto retry;
}
goto out;
}
static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
struct btree *b, unsigned flags,
struct closure *cl)
/**
* bch_btree_node_rewrite - Rewrite/move a btree node
*/
int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
__le64 seq, unsigned flags)
{
struct btree *n, *parent = btree_node_parent(iter, b);
struct btree *b, *n, *parent;
struct btree_update *as;
int ret;
as = bch2_btree_update_start(iter->trans, iter->btree_id,
flags |= BTREE_INSERT_NOFAIL;
retry:
ret = bch2_btree_iter_traverse(iter);
if (ret)
goto out;
b = bch2_btree_iter_peek_node(iter);
if (!b || b->data->keys.seq != seq)
goto out;
parent = btree_node_parent(iter, b);
as = bch2_btree_update_start(iter, b->c.level,
(parent
? btree_update_reserve_required(c, parent)
: 0) + 1,
flags, cl);
if (IS_ERR(as)) {
flags);
ret = PTR_ERR_OR_ZERO(as);
if (ret == -EINTR)
goto retry;
if (ret) {
trace_btree_gc_rewrite_node_fail(c, b);
return PTR_ERR(as);
goto out;
}
bch2_btree_interior_update_will_free_node(as, b);
@ -1768,60 +1767,8 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
six_unlock_intent(&n->c.lock);
bch2_btree_update_done(as);
return 0;
}
/**
* bch_btree_node_rewrite - Rewrite/move a btree node
*
* Returns 0 on success, -EINTR or -EAGAIN on failure (i.e.
* btree_check_reserve() has to wait)
*/
int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
__le64 seq, unsigned flags)
{
struct btree_trans *trans = iter->trans;
struct closure cl;
struct btree *b;
int ret;
flags |= BTREE_INSERT_NOFAIL;
closure_init_stack(&cl);
bch2_btree_iter_upgrade(iter, U8_MAX);
if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) {
if (!down_read_trylock(&c->gc_lock)) {
bch2_trans_unlock(trans);
down_read(&c->gc_lock);
}
}
while (1) {
ret = bch2_btree_iter_traverse(iter);
if (ret)
break;
b = bch2_btree_iter_peek_node(iter);
if (!b || b->data->keys.seq != seq)
break;
ret = __btree_node_rewrite(c, iter, b, flags, &cl);
if (ret != -EAGAIN &&
ret != -EINTR)
break;
bch2_trans_unlock(trans);
closure_sync(&cl);
}
out:
bch2_btree_iter_downgrade(iter);
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
up_read(&c->gc_lock);
closure_sync(&cl);
return ret;
}
@ -1892,71 +1839,34 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
struct btree_update *as = NULL;
struct btree *new_hash = NULL;
struct closure cl;
int ret;
int ret = 0;
closure_init_stack(&cl);
if (!bch2_btree_iter_upgrade(iter, U8_MAX))
return -EINTR;
if (!down_read_trylock(&c->gc_lock)) {
bch2_trans_unlock(iter->trans);
down_read(&c->gc_lock);
if (!bch2_trans_relock(iter->trans)) {
ret = -EINTR;
goto err;
}
}
/*
* check btree_ptr_hash_val() after @b is locked by
* btree_iter_traverse():
*/
if (btree_ptr_hash_val(new_key) != b->hash_val) {
/* bch2_btree_reserve_get will unlock */
ret = bch2_btree_cache_cannibalize_lock(c, &cl);
if (ret) {
bch2_trans_unlock(iter->trans);
up_read(&c->gc_lock);
closure_sync(&cl);
down_read(&c->gc_lock);
if (!bch2_trans_relock(iter->trans)) {
ret = -EINTR;
goto err;
}
if (!bch2_trans_relock(iter->trans))
return -EINTR;
}
new_hash = bch2_btree_node_mem_alloc(c);
}
retry:
as = bch2_btree_update_start(iter->trans, iter->btree_id,
parent ? btree_update_reserve_required(c, parent) : 0,
BTREE_INSERT_NOFAIL, &cl);
as = bch2_btree_update_start(iter, b->c.level,
parent ? btree_update_reserve_required(c, parent) : 0,
BTREE_INSERT_NOFAIL);
if (IS_ERR(as)) {
ret = PTR_ERR(as);
if (ret == -EAGAIN)
ret = -EINTR;
if (ret == -EINTR) {
bch2_trans_unlock(iter->trans);
up_read(&c->gc_lock);
closure_sync(&cl);
down_read(&c->gc_lock);
if (bch2_trans_relock(iter->trans))
goto retry;
}
goto err;
}
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(new_key));
if (ret)
goto err_free_update;
__bch2_btree_node_update_key(c, as, iter, b, new_hash, new_key);
bch2_btree_iter_downgrade(iter);
@ -1969,12 +1879,9 @@ err:
six_unlock_write(&new_hash->c.lock);
six_unlock_intent(&new_hash->c.lock);
}
up_read(&c->gc_lock);
closure_sync(&cl);
bch2_btree_cache_cannibalize_unlock(c);
return ret;
err_free_update:
bch2_btree_update_free(as);
goto err;
}
/* Init code: */

View File

@ -48,6 +48,7 @@ struct btree_update {
} mode;
unsigned nodes_written:1;
unsigned took_gc_lock:1;
enum btree_id btree_id;
@ -120,8 +121,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
void bch2_btree_update_done(struct btree_update *);
struct btree_update *
bch2_btree_update_start(struct btree_trans *, enum btree_id, unsigned,
unsigned, struct closure *);
bch2_btree_update_start(struct btree_iter *, unsigned, unsigned, unsigned);
void bch2_btree_interior_update_will_free_node(struct btree_update *,
struct btree *);
@ -132,10 +132,10 @@ void bch2_btree_insert_node(struct btree_update *, struct btree *,
unsigned);
int bch2_btree_split_leaf(struct bch_fs *, struct btree_iter *, unsigned);
void __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *,
unsigned, unsigned, enum btree_node_sibling);
int __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *,
unsigned, unsigned, enum btree_node_sibling);
static inline void bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
static inline int bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
struct btree_iter *iter,
unsigned level, unsigned flags,
enum btree_node_sibling sib)
@ -143,27 +143,27 @@ static inline void bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
struct btree *b;
if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
return;
return 0;
if (!bch2_btree_node_relock(iter, level))
return;
return 0;
b = iter->l[level].b;
if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
return;
return 0;
__bch2_foreground_maybe_merge(c, iter, level, flags, sib);
return __bch2_foreground_maybe_merge(c, iter, level, flags, sib);
}
static inline void bch2_foreground_maybe_merge(struct bch_fs *c,
static inline int bch2_foreground_maybe_merge(struct bch_fs *c,
struct btree_iter *iter,
unsigned level,
unsigned flags)
{
bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
btree_prev_sib);
bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
btree_next_sib);
return bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
btree_prev_sib) ?:
bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
btree_next_sib);
}
void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);

View File

@ -134,7 +134,7 @@ fix_iter:
return true;
}
static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
unsigned i, u64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
@ -145,14 +145,15 @@ static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
bch2_btree_node_write_cond(c, b,
(btree_current_write(b) == w && w->journal.seq == seq));
six_unlock_read(&b->c.lock);
return 0;
}
static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
static int btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
{
return __btree_node_flush(j, pin, 0, seq);
}
static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
static int btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
{
return __btree_node_flush(j, pin, 1, seq);
}
@ -375,7 +376,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
struct btree_insert_entry **stopped_at)
{
struct bch_fs *c = trans->c;
struct bch_fs_usage *fs_usage = NULL;
struct btree_insert_entry *i;
struct btree_trans_commit_hook *h;
unsigned u64s = 0;
@ -423,7 +423,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
if (marking) {
percpu_down_read(&c->mark_lock);
fs_usage = bch2_fs_usage_scratch_get(c);
}
/* Must be called under mark_lock: */
if (marking && trans->fs_usage_deltas &&
!bch2_replicas_delta_list_marked(c, trans->fs_usage_deltas)) {
ret = BTREE_INSERT_NEED_MARK_REPLICAS;
goto err;
}
/*
@ -462,21 +468,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
i->k->k.version = MAX_VERSION;
}
/* Must be called under mark_lock: */
if (marking && trans->fs_usage_deltas &&
bch2_replicas_delta_list_apply(c, fs_usage,
trans->fs_usage_deltas)) {
ret = BTREE_INSERT_NEED_MARK_REPLICAS;
goto err;
}
trans_for_each_update(trans, i)
if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type))
bch2_mark_update(trans, i->iter, i->k,
fs_usage, i->trigger_flags);
NULL, i->trigger_flags);
if (marking)
bch2_trans_fs_usage_apply(trans, fs_usage);
if (marking && trans->fs_usage_deltas)
bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas);
if (unlikely(c->gc_pos.phase))
bch2_trans_mark_gc(trans);
@ -485,31 +483,85 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
do_btree_insert_one(trans, i->iter, i->k);
err:
if (marking) {
bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
}
return ret;
}
static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree_iter *iter)
{
struct btree_insert_entry *i;
struct btree *b = iter_l(iter)->b;
struct bkey_s_c old;
int u64s_delta = 0;
int ret;
/*
* Inserting directly into interior nodes is an uncommon operation with
* various weird edge cases: also, a lot of things about
* BTREE_ITER_NODES iters need to be audited
*/
if (unlikely(btree_iter_type(iter) != BTREE_ITER_KEYS))
return 0;
BUG_ON(iter->level);
trans_for_each_update2(trans, i) {
if (iter_l(i->iter)->b != b)
continue;
old = bch2_btree_iter_peek_slot(i->iter);
ret = bkey_err(old);
if (ret)
return ret;
u64s_delta += !bkey_deleted(&i->k->k) ? i->k->k.u64s : 0;
u64s_delta -= !bkey_deleted(old.k) ? old.k->u64s : 0;
}
return u64s_delta <= 0
? (bch2_foreground_maybe_merge(trans->c, iter, iter->level,
trans->flags & ~BTREE_INSERT_NOUNLOCK) ?: -EINTR)
: 0;
}
/*
* Get journal reservation, take write locks, and attempt to do btree update(s):
*/
static inline int do_bch2_trans_commit(struct btree_trans *trans,
struct btree_insert_entry **stopped_at)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
struct btree_iter *iter;
int ret;
trans_for_each_update2(trans, i)
BUG_ON(!btree_node_intent_locked(i->iter, i->iter->level));
trans_for_each_update2(trans, i) {
struct btree *b;
ret = bch2_journal_preres_get(&trans->c->journal,
BUG_ON(!btree_node_intent_locked(i->iter, i->level));
if (btree_iter_type(i->iter) == BTREE_ITER_CACHED)
continue;
b = iter_l(i->iter)->b;
if (b->sib_u64s[0] < c->btree_foreground_merge_threshold ||
b->sib_u64s[1] < c->btree_foreground_merge_threshold) {
ret = maybe_do_btree_merge(trans, i->iter);
if (unlikely(ret))
return ret;
}
}
trans_for_each_update2(trans, i)
BUG_ON(!btree_node_intent_locked(i->iter, i->level));
ret = bch2_journal_preres_get(&c->journal,
&trans->journal_preres, trans->journal_preres_u64s,
JOURNAL_RES_GET_NONBLOCK|
((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM)
? JOURNAL_RES_GET_RECLAIM : 0));
((trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
? JOURNAL_RES_GET_RESERVED : 0));
if (unlikely(ret == -EAGAIN))
ret = bch2_trans_journal_preres_get_cold(trans,
trans->journal_preres_u64s);
@ -547,7 +599,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
trans_for_each_update2(trans, i)
if (!same_leaf_as_prev(trans, i))
bch2_btree_node_lock_for_insert(trans->c,
bch2_btree_node_lock_for_insert(c,
iter_l(i->iter)->b, i->iter);
ret = bch2_trans_commit_write_locked(trans, stopped_at);
@ -558,35 +610,45 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
i->iter);
if (!ret && trans->journal_pin)
bch2_journal_pin_add(&trans->c->journal, trans->journal_res.seq,
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
trans->journal_pin, NULL);
/*
* Drop journal reservation after dropping write locks, since dropping
* the journal reservation may kick off a journal write:
*/
bch2_journal_res_put(&trans->c->journal, &trans->journal_res);
bch2_journal_res_put(&c->journal, &trans->journal_res);
if (unlikely(ret))
return ret;
if (trans->flags & BTREE_INSERT_NOUNLOCK)
trans->nounlock = true;
if (!(trans->flags & BTREE_INSERT_NOUNLOCK))
trans_for_each_update2(trans, i)
if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
!same_leaf_as_prev(trans, i))
bch2_foreground_maybe_merge(trans->c, i->iter,
0, trans->flags);
trans->nounlock = false;
bch2_trans_downgrade(trans);
return 0;
}
static int journal_reclaim_wait_done(struct bch_fs *c)
{
int ret;
ret = bch2_journal_error(&c->journal);
if (ret)
return ret;
ret = !bch2_btree_key_cache_must_wait(c);
if (ret)
return ret;
if (mutex_trylock(&c->journal.reclaim_lock)) {
ret = bch2_journal_reclaim(&c->journal);
mutex_unlock(&c->journal.reclaim_lock);
}
if (!ret)
ret = !bch2_btree_key_cache_must_wait(c);
return ret;
}
static noinline
int bch2_trans_commit_error(struct btree_trans *trans,
struct btree_insert_entry *i,
@ -641,11 +703,9 @@ int bch2_trans_commit_error(struct btree_trans *trans,
case BTREE_INSERT_NEED_MARK_REPLICAS:
bch2_trans_unlock(trans);
trans_for_each_update(trans, i) {
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k));
if (ret)
return ret;
}
ret = bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas);
if (ret)
return ret;
if (bch2_trans_relock(trans))
return 0;
@ -656,6 +716,10 @@ int bch2_trans_commit_error(struct btree_trans *trans,
case BTREE_INSERT_NEED_JOURNAL_RES:
bch2_trans_unlock(trans);
if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
!(trans->flags & BTREE_INSERT_JOURNAL_RESERVED))
return -EAGAIN;
ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
if (ret)
return ret;
@ -669,11 +733,8 @@ int bch2_trans_commit_error(struct btree_trans *trans,
case BTREE_INSERT_NEED_JOURNAL_RECLAIM:
bch2_trans_unlock(trans);
do {
mutex_lock(&c->journal.reclaim_lock);
ret = bch2_journal_reclaim(&c->journal);
mutex_unlock(&c->journal.reclaim_lock);
} while (!ret && bch2_btree_key_cache_must_wait(c));
wait_event(c->journal.reclaim_wait,
(ret = journal_reclaim_wait_done(c)));
if (!ret && bch2_trans_relock(trans))
return 0;
@ -920,17 +981,14 @@ int __bch2_trans_commit(struct btree_trans *trans)
goto out;
}
/*
* We're not using bch2_btree_iter_upgrade here because
* we know trans->nounlock can't be set:
*/
if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) &&
!__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) {
if (unlikely(!bch2_btree_iter_upgrade(i->iter, i->level + 1))) {
trace_trans_restart_upgrade(trans->ip);
ret = -EINTR;
goto out;
}
BUG_ON(!btree_node_intent_locked(i->iter, i->level));
u64s = jset_u64s(i->k->k.u64s);
if (btree_iter_type(i->iter) == BTREE_ITER_CACHED &&
likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)))

View File

@ -167,37 +167,6 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
percpu_up_write(&c->mark_lock);
}
void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage)
{
if (fs_usage == c->usage_scratch)
mutex_unlock(&c->usage_scratch_lock);
else
kfree(fs_usage);
}
struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c)
{
struct bch_fs_usage *ret;
unsigned bytes = fs_usage_u64s(c) * sizeof(u64);
ret = kzalloc(bytes, GFP_NOWAIT|__GFP_NOWARN);
if (ret)
return ret;
if (mutex_trylock(&c->usage_scratch_lock))
goto out_pool;
ret = kzalloc(bytes, GFP_NOFS);
if (ret)
return ret;
mutex_lock(&c->usage_scratch_lock);
out_pool:
ret = c->usage_scratch;
memset(ret, 0, bytes);
return ret;
}
static inline struct bch_dev_usage *dev_usage_ptr(struct bch_dev *ca,
unsigned journal_seq,
bool gc)
@ -252,30 +221,28 @@ u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
return ret;
}
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c)
{
struct bch_fs_usage *ret;
unsigned seq, i, v, u64s = fs_usage_u64s(c);
retry:
ret = kmalloc(u64s * sizeof(u64), GFP_NOFS);
if (unlikely(!ret))
return NULL;
struct bch_fs_usage_online *ret;
unsigned seq, i, u64s;
percpu_down_read(&c->mark_lock);
v = fs_usage_u64s(c);
if (unlikely(u64s != v)) {
u64s = v;
ret = kmalloc(sizeof(struct bch_fs_usage_online) +
sizeof(u64) + c->replicas.nr, GFP_NOFS);
if (unlikely(!ret)) {
percpu_up_read(&c->mark_lock);
kfree(ret);
goto retry;
return NULL;
}
ret->online_reserved = percpu_u64_get(c->online_reserved);
u64s = fs_usage_u64s(c);
do {
seq = read_seqcount_begin(&c->usage_lock);
memcpy(ret, c->usage_base, u64s * sizeof(u64));
memcpy(&ret->u, c->usage_base, u64s * sizeof(u64));
for (i = 0; i < ARRAY_SIZE(c->usage); i++)
acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[i], u64s);
acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i], u64s);
} while (read_seqcount_retry(&c->usage_lock, seq));
return ret;
@ -311,31 +278,31 @@ void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
void bch2_fs_usage_to_text(struct printbuf *out,
struct bch_fs *c,
struct bch_fs_usage *fs_usage)
struct bch_fs_usage_online *fs_usage)
{
unsigned i;
pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity);
pr_buf(out, "hidden:\t\t\t\t%llu\n",
fs_usage->hidden);
fs_usage->u.hidden);
pr_buf(out, "data:\t\t\t\t%llu\n",
fs_usage->data);
fs_usage->u.data);
pr_buf(out, "cached:\t\t\t\t%llu\n",
fs_usage->cached);
fs_usage->u.cached);
pr_buf(out, "reserved:\t\t\t%llu\n",
fs_usage->reserved);
fs_usage->u.reserved);
pr_buf(out, "nr_inodes:\t\t\t%llu\n",
fs_usage->nr_inodes);
fs_usage->u.nr_inodes);
pr_buf(out, "online reserved:\t\t%llu\n",
fs_usage->online_reserved);
for (i = 0;
i < ARRAY_SIZE(fs_usage->persistent_reserved);
i < ARRAY_SIZE(fs_usage->u.persistent_reserved);
i++) {
pr_buf(out, "%u replicas:\n", i + 1);
pr_buf(out, "\treserved:\t\t%llu\n",
fs_usage->persistent_reserved[i]);
fs_usage->u.persistent_reserved[i]);
}
for (i = 0; i < c->replicas.nr; i++) {
@ -344,7 +311,7 @@ void bch2_fs_usage_to_text(struct printbuf *out,
pr_buf(out, "\t");
bch2_replicas_entry_to_text(out, e);
pr_buf(out, ":\t%llu\n", fs_usage->replicas[i]);
pr_buf(out, ":\t%llu\n", fs_usage->u.replicas[i]);
}
}
@ -360,12 +327,12 @@ static u64 avail_factor(u64 r)
return div_u64(r << RESERVE_FACTOR, (1 << RESERVE_FACTOR) + 1);
}
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage_online *fs_usage)
{
return min(fs_usage->hidden +
fs_usage->btree +
fs_usage->data +
reserve_factor(fs_usage->reserved +
return min(fs_usage->u.hidden +
fs_usage->u.btree +
fs_usage->u.data +
reserve_factor(fs_usage->u.reserved +
fs_usage->online_reserved),
c->capacity);
}
@ -382,7 +349,7 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
data = bch2_fs_usage_read_one(c, &c->usage_base->data) +
bch2_fs_usage_read_one(c, &c->usage_base->btree);
reserved = bch2_fs_usage_read_one(c, &c->usage_base->reserved) +
bch2_fs_usage_read_one(c, &c->usage_base->online_reserved);
percpu_u64_get(c->online_reserved);
ret.used = min(ret.capacity, data + reserve_factor(reserved));
ret.free = ret.capacity - ret.used;
@ -436,43 +403,6 @@ static bool bucket_became_unavailable(struct bucket_mark old,
!is_available_bucket(new);
}
int bch2_fs_usage_apply(struct bch_fs *c,
struct bch_fs_usage *fs_usage,
struct disk_reservation *disk_res,
unsigned journal_seq)
{
s64 added = fs_usage->data + fs_usage->reserved;
s64 should_not_have_added;
int ret = 0;
percpu_rwsem_assert_held(&c->mark_lock);
/*
* Not allowed to reduce sectors_available except by getting a
* reservation:
*/
should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0);
if (WARN_ONCE(should_not_have_added > 0,
"disk usage increased by %lli more than reservation of %llu",
added, disk_res ? disk_res->sectors : 0)) {
atomic64_sub(should_not_have_added, &c->sectors_available);
added -= should_not_have_added;
ret = -1;
}
if (added > 0) {
disk_res->sectors -= added;
fs_usage->online_reserved -= added;
}
preempt_disable();
acc_u64s((u64 *) fs_usage_ptr(c, journal_seq, false),
(u64 *) fs_usage, fs_usage_u64s(c));
preempt_enable();
return ret;
}
static inline void account_bucket(struct bch_fs_usage *fs_usage,
struct bch_dev_usage *dev_usage,
enum bch_data_type type,
@ -494,6 +424,8 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
percpu_rwsem_assert_held(&c->mark_lock);
preempt_disable();
if (!fs_usage)
fs_usage = fs_usage_ptr(c, journal_seq, gc);
u = dev_usage_ptr(ca, journal_seq, gc);
if (bucket_type(old))
@ -504,8 +436,6 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
account_bucket(fs_usage, u, bucket_type(new),
1, ca->mi.bucket_size);
u->buckets_alloc +=
(int) new.owned_by_allocator - (int) old.owned_by_allocator;
u->buckets_ec += (int) new.stripe - (int) old.stripe;
u->buckets_unavailable +=
is_unavailable_bucket(new) - is_unavailable_bucket(old);
@ -524,22 +454,17 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_wake_allocator(ca);
}
static inline int update_replicas(struct bch_fs *c,
struct bch_fs_usage *fs_usage,
struct bch_replicas_entry *r,
s64 sectors)
static inline void update_replicas(struct bch_fs *c,
struct bch_fs_usage *fs_usage,
struct bch_replicas_entry *r,
s64 sectors)
{
int idx = bch2_replicas_entry_idx(c, r);
if (idx < 0)
return -1;
if (!fs_usage)
return 0;
BUG_ON(idx < 0);
fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
fs_usage->replicas[idx] += sectors;
return 0;
}
static inline void update_cached_sectors(struct bch_fs *c,
@ -586,6 +511,7 @@ static inline void update_replicas_list(struct btree_trans *trans,
n = (void *) d->d + d->used;
n->delta = sectors;
memcpy(&n->r, r, replicas_entry_bytes(r));
bch2_replicas_entry_sort(&n->r);
d->used += b;
}
@ -599,43 +525,6 @@ static inline void update_cached_sectors_list(struct btree_trans *trans,
update_replicas_list(trans, &r.e, sectors);
}
static inline struct replicas_delta *
replicas_delta_next(struct replicas_delta *d)
{
return (void *) d + replicas_entry_bytes(&d->r) + 8;
}
int bch2_replicas_delta_list_apply(struct bch_fs *c,
struct bch_fs_usage *fs_usage,
struct replicas_delta_list *r)
{
struct replicas_delta *d = r->d;
struct replicas_delta *top = (void *) r->d + r->used;
unsigned i;
for (d = r->d; d != top; d = replicas_delta_next(d))
if (update_replicas(c, fs_usage, &d->r, d->delta)) {
top = d;
goto unwind;
}
if (!fs_usage)
return 0;
fs_usage->nr_inodes += r->nr_inodes;
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
fs_usage->reserved += r->persistent_reserved[i];
fs_usage->persistent_reserved[i] += r->persistent_reserved[i];
}
return 0;
unwind:
for (d = r->d; d != top; d = replicas_delta_next(d))
update_replicas(c, fs_usage, &d->r, -d->delta);
return -1;
}
#define do_mark_fn(fn, c, pos, flags, ...) \
({ \
int gc, ret = 0; \
@ -653,7 +542,6 @@ static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, bool owned_by_allocator,
bool gc)
{
struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc);
struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark old, new;
@ -661,13 +549,6 @@ static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
new.owned_by_allocator = owned_by_allocator;
}));
/*
* XXX: this is wrong, this means we'll be doing updates to the percpu
* buckets_alloc counter that don't have an open journal buffer and
* we'll race with the machinery that accumulates that to ca->usage_base
*/
bch2_dev_usage_update(c, ca, fs_usage, old, new, 0, gc);
BUG_ON(!gc &&
!owned_by_allocator && !old.owned_by_allocator);
@ -1416,22 +1297,15 @@ int bch2_mark_update(struct btree_trans *trans,
return ret;
}
void bch2_trans_fs_usage_apply(struct btree_trans *trans,
struct bch_fs_usage *fs_usage)
static noinline __cold
void fs_usage_apply_warn(struct btree_trans *trans,
unsigned disk_res_sectors)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
static int warned_disk_usage = 0;
u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
char buf[200];
if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res,
trans->journal_res.seq) ||
warned_disk_usage ||
xchg(&warned_disk_usage, 1))
return;
bch_err(c, "disk usage increased more than %llu sectors reserved",
bch_err(c, "disk usage increased more than %u sectors reserved",
disk_res_sectors);
trans_for_each_update(trans, i) {
@ -1466,6 +1340,65 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
}
}
void bch2_trans_fs_usage_apply(struct btree_trans *trans,
struct replicas_delta_list *deltas)
{
struct bch_fs *c = trans->c;
static int warned_disk_usage = 0;
bool warn = false;
unsigned disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
struct replicas_delta *d = deltas->d;
struct replicas_delta *top = (void *) deltas->d + deltas->used;
struct bch_fs_usage *dst;
s64 added = 0, should_not_have_added;
unsigned i;
percpu_rwsem_assert_held(&c->mark_lock);
preempt_disable();
dst = fs_usage_ptr(c, trans->journal_res.seq, false);
for (d = deltas->d; d != top; d = replicas_delta_next(d)) {
switch (d->r.data_type) {
case BCH_DATA_btree:
case BCH_DATA_user:
case BCH_DATA_parity:
added += d->delta;
}
update_replicas(c, dst, &d->r, d->delta);
}
dst->nr_inodes += deltas->nr_inodes;
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
added += deltas->persistent_reserved[i];
dst->reserved += deltas->persistent_reserved[i];
dst->persistent_reserved[i] += deltas->persistent_reserved[i];
}
/*
* Not allowed to reduce sectors_available except by getting a
* reservation:
*/
should_not_have_added = added - (s64) disk_res_sectors;
if (unlikely(should_not_have_added > 0)) {
atomic64_sub(should_not_have_added, &c->sectors_available);
added -= should_not_have_added;
warn = true;
}
if (added > 0) {
trans->disk_res->sectors -= added;
this_cpu_sub(*c->online_reserved, added);
}
preempt_enable();
if (unlikely(warn) && !xchg(&warned_disk_usage, 1))
fs_usage_apply_warn(trans, disk_res_sectors);
}
/* trans_mark: */
static struct btree_iter *trans_get_update(struct btree_trans *trans,
@ -2197,16 +2130,6 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c,
/* Disk reservations: */
void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
{
percpu_down_read(&c->mark_lock);
this_cpu_sub(c->usage[0]->online_reserved,
res->sectors);
percpu_up_read(&c->mark_lock);
res->sectors = 0;
}
#define SECTORS_CACHE 1024
int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
@ -2240,7 +2163,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
out:
pcpu->sectors_available -= sectors;
this_cpu_add(c->usage[0]->online_reserved, sectors);
this_cpu_add(*c->online_reserved, sectors);
res->sectors += sectors;
preempt_enable();
@ -2257,7 +2180,7 @@ recalculate:
(flags & BCH_DISK_RESERVATION_NOFAIL)) {
atomic64_set(&c->sectors_available,
max_t(s64, 0, sectors_available - sectors));
this_cpu_add(c->usage[0]->online_reserved, sectors);
this_cpu_add(*c->online_reserved, sectors);
res->sectors += sectors;
ret = 0;
} else {

View File

@ -210,19 +210,16 @@ static inline unsigned dev_usage_u64s(void)
return sizeof(struct bch_dev_usage) / sizeof(u64);
}
void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *);
struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *);
u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *);
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *);
void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned);
void bch2_fs_usage_to_text(struct printbuf *,
struct bch_fs *, struct bch_fs_usage *);
struct bch_fs *, struct bch_fs_usage_online *);
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage_online *);
struct bch_fs_usage_short
bch2_fs_usage_read_short(struct bch_fs *);
@ -240,20 +237,15 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned,
s64, struct bch_fs_usage *, u64, unsigned);
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *, unsigned);
int bch2_mark_update(struct btree_trans *, struct btree_iter *,
struct bkey_i *, struct bch_fs_usage *, unsigned);
int bch2_replicas_delta_list_apply(struct bch_fs *,
struct bch_fs_usage *,
struct replicas_delta_list *);
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c,
unsigned, s64, unsigned);
int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
struct bkey_i *insert, unsigned);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
int bch2_trans_mark_metadata_bucket(struct btree_trans *,
struct disk_reservation *, struct bch_dev *,
@ -263,13 +255,11 @@ int bch2_trans_mark_dev_sb(struct bch_fs *, struct disk_reservation *,
/* disk reservations: */
void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
static inline void bch2_disk_reservation_put(struct bch_fs *c,
struct disk_reservation *res)
{
if (res->sectors)
__bch2_disk_reservation_put(c, res);
this_cpu_sub(*c->online_reserved, res->sectors);
res->sectors = 0;
}
#define BCH_DISK_RESERVATION_NOFAIL (1 << 0)

View File

@ -53,7 +53,6 @@ struct bucket_array {
};
struct bch_dev_usage {
u64 buckets_alloc;
u64 buckets_ec;
u64 buckets_unavailable;
@ -66,12 +65,6 @@ struct bch_dev_usage {
struct bch_fs_usage {
/* all fields are in units of 512 byte sectors: */
u64 online_reserved;
/* fields after online_reserved are cleared/recalculated by gc: */
u64 gc_start[0];
u64 hidden;
u64 btree;
u64 data;
@ -91,6 +84,11 @@ struct bch_fs_usage {
u64 replicas[];
};
struct bch_fs_usage_online {
u64 online_reserved;
struct bch_fs_usage u;
};
struct bch_fs_usage_short {
u64 capacity;
u64 used;
@ -98,22 +96,6 @@ struct bch_fs_usage_short {
u64 nr_inodes;
};
struct replicas_delta {
s64 delta;
struct bch_replicas_entry r;
} __packed;
struct replicas_delta_list {
unsigned size;
unsigned used;
struct {} memset_start;
u64 nr_inodes;
u64 persistent_reserved[BCH_REPLICAS_MAX];
struct {} memset_end;
struct replicas_delta d[0];
};
/*
* A reservation for space on disk:
*/

View File

@ -379,7 +379,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
{
struct bch_ioctl_fs_usage *arg = NULL;
struct bch_replicas_usage *dst_e, *dst_end;
struct bch_fs_usage *src;
struct bch_fs_usage_online *src;
u32 replica_entries_bytes;
unsigned i;
int ret = 0;
@ -405,7 +405,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
arg->online_reserved = src->online_reserved;
for (i = 0; i < BCH_REPLICAS_MAX; i++)
arg->persistent_reserved[i] = src->persistent_reserved[i];
arg->persistent_reserved[i] = src->u.persistent_reserved[i];
dst_e = arg->replicas;
dst_end = (void *) arg->replicas + replica_entries_bytes;
@ -419,7 +419,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
break;
}
dst_e->sectors = src->replicas[i];
dst_e->sectors = src->u.replicas[i];
dst_e->r = *src_e;
/* recheck after setting nr_devs: */

View File

@ -11,6 +11,7 @@
#include "btree_gc.h"
#include "btree_update.h"
#include "buckets.h"
#include "error.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
@ -59,21 +60,23 @@ journal_seq_to_buf(struct journal *j, u64 seq)
return buf;
}
static void journal_pin_new_entry(struct journal *j, int count)
static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
{
struct journal_entry_pin_list *p;
INIT_LIST_HEAD(&p->list);
INIT_LIST_HEAD(&p->key_cache_list);
INIT_LIST_HEAD(&p->flushed);
atomic_set(&p->count, count);
p->devs.nr = 0;
}
static void journal_pin_new_entry(struct journal *j)
{
/*
* The fifo_push() needs to happen at the same time as j->seq is
* incremented for journal_last_seq() to be calculated correctly
*/
atomic64_inc(&j->seq);
p = fifo_push_ref(&j->pin);
INIT_LIST_HEAD(&p->list);
INIT_LIST_HEAD(&p->flushed);
atomic_set(&p->count, count);
p->devs.nr = 0;
journal_pin_list_init(fifo_push_ref(&j->pin), 1);
}
static void bch2_journal_buf_init(struct journal *j)
@ -192,7 +195,7 @@ static bool __journal_entry_close(struct journal *j)
__bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq));
/* Initialize new buffer: */
journal_pin_new_entry(j, 1);
journal_pin_new_entry(j);
bch2_journal_buf_init(j);
@ -450,6 +453,27 @@ unlock:
if (!ret)
goto retry;
if ((ret == cur_entry_journal_full ||
ret == cur_entry_journal_pin_full) &&
!can_discard &&
j->reservations.idx == j->reservations.unwritten_idx &&
(flags & JOURNAL_RES_GET_RESERVED)) {
char *journal_debug_buf = kmalloc(4096, GFP_ATOMIC);
bch_err(c, "Journal stuck!");
if (journal_debug_buf) {
bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j);
bch_err(c, "%s", journal_debug_buf);
bch2_journal_pins_to_text(&_PBUF(journal_debug_buf, 4096), j);
bch_err(c, "Journal pins:\n%s", journal_debug_buf);
kfree(journal_debug_buf);
}
bch2_fatal_error(c);
dump_stack();
}
/*
* Journal is full - can't rely on reclaim from work item due to
* freezing:
@ -499,7 +523,7 @@ static bool journal_preres_available(struct journal *j,
unsigned new_u64s,
unsigned flags)
{
bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags);
bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true);
if (!ret && mutex_trylock(&j->reclaim_lock)) {
bch2_journal_reclaim(j);
@ -1009,12 +1033,8 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
j->pin.back = cur_seq;
atomic64_set(&j->seq, cur_seq - 1);
fifo_for_each_entry_ptr(p, &j->pin, seq) {
INIT_LIST_HEAD(&p->list);
INIT_LIST_HEAD(&p->flushed);
atomic_set(&p->count, 1);
p->devs.nr = 0;
}
fifo_for_each_entry_ptr(p, &j->pin, seq)
journal_pin_list_init(p, 1);
list_for_each_entry(i, journal_entries, list) {
unsigned ptr;
@ -1037,7 +1057,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
set_bit(JOURNAL_STARTED, &j->flags);
j->last_flush_write = jiffies;
journal_pin_new_entry(j, 1);
journal_pin_new_entry(j);
j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j);
@ -1114,6 +1134,7 @@ int bch2_fs_journal_init(struct journal *j)
spin_lock_init(&j->err_lock);
init_waitqueue_head(&j->wait);
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
init_waitqueue_head(&j->reclaim_wait);
init_waitqueue_head(&j->pin_flush_wait);
mutex_init(&j->reclaim_lock);
mutex_init(&j->discard_lock);
@ -1166,6 +1187,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
"last_seq_ondisk:\t%llu\n"
"flushed_seq_ondisk:\t%llu\n"
"prereserved:\t\t%u/%u\n"
"each entry reserved:\t%u\n"
"nr flush writes:\t%llu\n"
"nr noflush writes:\t%llu\n"
"nr direct reclaim:\t%llu\n"
@ -1180,6 +1202,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
j->flushed_seq_ondisk,
j->prereserved.reserved,
j->prereserved.remaining,
j->entry_u64s_reserved,
j->nr_flush_writes,
j->nr_noflush_writes,
j->nr_direct_reclaim,

View File

@ -213,11 +213,13 @@ static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type
enum btree_id id, unsigned level,
const void *data, unsigned u64s)
{
memset(entry, 0, sizeof(*entry));
entry->u64s = cpu_to_le16(u64s);
entry->type = type;
entry->btree_id = id;
entry->level = level;
entry->type = type;
entry->pad[0] = 0;
entry->pad[1] = 0;
entry->pad[2] = 0;
memcpy_u64s_small(entry->_data, data, u64s);
return jset_u64s(u64s);
@ -306,7 +308,6 @@ int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
#define JOURNAL_RES_GET_NONBLOCK (1 << 0)
#define JOURNAL_RES_GET_CHECK (1 << 1)
#define JOURNAL_RES_GET_RESERVED (1 << 2)
#define JOURNAL_RES_GET_RECLAIM (1 << 3)
static inline int journal_res_get_fast(struct journal *j,
struct journal_res *res,
@ -410,7 +411,12 @@ static inline void bch2_journal_preres_put(struct journal *j,
s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
res->u64s = 0;
closure_wake_up(&j->preres_wait);
if (unlikely(s.waiting)) {
clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)),
(unsigned long *) &j->prereserved.v);
closure_wake_up(&j->preres_wait);
}
if (s.reserved <= s.remaining &&
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
@ -426,32 +432,32 @@ int __bch2_journal_preres_get(struct journal *,
static inline int bch2_journal_preres_get_fast(struct journal *j,
struct journal_preres *res,
unsigned new_u64s,
unsigned flags)
unsigned flags,
bool set_waiting)
{
int d = new_u64s - res->u64s;
union journal_preres_state old, new;
u64 v = atomic64_read(&j->prereserved.counter);
int ret;
do {
old.v = new.v = v;
ret = 0;
new.reserved += d;
/*
* If we're being called from the journal reclaim path, we have
* to unconditionally give out the pre-reservation, there's
* nothing else sensible we can do - otherwise we'd recurse back
* into the reclaim path and deadlock:
*/
if (!(flags & JOURNAL_RES_GET_RECLAIM) &&
new.reserved > new.remaining)
if ((flags & JOURNAL_RES_GET_RESERVED) ||
new.reserved + d < new.remaining) {
new.reserved += d;
ret = 1;
} else if (set_waiting && !new.waiting)
new.waiting = true;
else
return 0;
} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
old.v, new.v)) != old.v);
res->u64s += d;
return 1;
if (ret)
res->u64s += d;
return ret;
}
static inline int bch2_journal_preres_get(struct journal *j,
@ -462,7 +468,7 @@ static inline int bch2_journal_preres_get(struct journal *j,
if (new_u64s <= res->u64s)
return 0;
if (bch2_journal_preres_get_fast(j, res, new_u64s, flags))
if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false))
return 0;
if (flags & JOURNAL_RES_GET_NONBLOCK)

View File

@ -239,7 +239,7 @@ void bch2_journal_space_available(struct journal *j)
u64s_remaining = (u64) clean << 6;
u64s_remaining -= (u64) total << 3;
u64s_remaining = max(0LL, u64s_remaining);
u64s_remaining /= 2;
u64s_remaining /= 4;
u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
out:
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
@ -353,6 +353,9 @@ static inline void __journal_pin_drop(struct journal *j,
if (!journal_pin_active(pin))
return;
if (j->flush_in_progress == pin)
j->flush_in_progress_dropped = true;
pin_list = journal_seq_pin(j, pin->seq);
pin->seq = 0;
list_del_init(&pin->list);
@ -404,7 +407,12 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
pin->seq = seq;
pin->flush = flush_fn;
list_add(&pin->list, flush_fn ? &pin_list->list : &pin_list->flushed);
if (flush_fn == bch2_btree_key_cache_journal_flush)
list_add(&pin->list, &pin_list->key_cache_list);
else if (flush_fn)
list_add(&pin->list, &pin_list->list);
else
list_add(&pin->list, &pin_list->flushed);
spin_unlock(&j->lock);
/*
@ -434,39 +442,49 @@ void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
*/
static struct journal_entry_pin *
journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
journal_get_next_pin(struct journal *j,
bool get_any,
bool get_key_cache,
u64 max_seq, u64 *seq)
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *ret = NULL;
if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
return NULL;
spin_lock(&j->lock);
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq)
if (*seq > max_seq ||
(ret = list_first_entry_or_null(&pin_list->list,
struct journal_entry_pin, list)))
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) {
if (*seq > max_seq && !get_any && !get_key_cache)
break;
if (ret) {
list_move(&ret->list, &pin_list->flushed);
BUG_ON(j->flush_in_progress);
j->flush_in_progress = ret;
if (*seq <= max_seq || get_any) {
ret = list_first_entry_or_null(&pin_list->list,
struct journal_entry_pin, list);
if (ret)
return ret;
}
if (*seq <= max_seq || get_any || get_key_cache) {
ret = list_first_entry_or_null(&pin_list->key_cache_list,
struct journal_entry_pin, list);
if (ret)
return ret;
}
}
spin_unlock(&j->lock);
return ret;
return NULL;
}
/* returns true if we did work */
static u64 journal_flush_pins(struct journal *j, u64 seq_to_flush,
unsigned min_nr)
static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
unsigned min_any,
unsigned min_key_cache)
{
struct journal_entry_pin *pin;
u64 seq, ret = 0;
size_t nr_flushed = 0;
journal_pin_flush_fn flush_fn;
u64 seq;
int err;
if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
return 0;
lockdep_assert_held(&j->reclaim_lock);
@ -475,23 +493,47 @@ static u64 journal_flush_pins(struct journal *j, u64 seq_to_flush,
j->last_flushed = jiffies;
pin = journal_get_next_pin(j, min_nr
? U64_MAX : seq_to_flush, &seq);
spin_lock(&j->lock);
pin = journal_get_next_pin(j,
min_any != 0,
min_key_cache != 0,
seq_to_flush, &seq);
if (pin) {
BUG_ON(j->flush_in_progress);
j->flush_in_progress = pin;
j->flush_in_progress_dropped = false;
flush_fn = pin->flush;
}
spin_unlock(&j->lock);
if (!pin)
break;
if (min_nr)
min_nr--;
if (min_key_cache && pin->flush == bch2_btree_key_cache_journal_flush)
min_key_cache--;
pin->flush(j, pin, seq);
if (min_any)
min_any--;
BUG_ON(j->flush_in_progress != pin);
err = flush_fn(j, pin, seq);
spin_lock(&j->lock);
/* Pin might have been dropped or rearmed: */
if (likely(!err && !j->flush_in_progress_dropped))
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed);
j->flush_in_progress = NULL;
j->flush_in_progress_dropped = false;
spin_unlock(&j->lock);
wake_up(&j->pin_flush_wait);
ret++;
if (err)
break;
nr_flushed++;
}
return ret;
return nr_flushed;
}
static u64 journal_seq_to_flush(struct journal *j)
@ -556,8 +598,8 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
bool kthread = (current->flags & PF_KTHREAD) != 0;
u64 seq_to_flush, nr_flushed = 0;
size_t min_nr;
u64 seq_to_flush;
size_t min_nr, nr_flushed;
unsigned flags;
int ret = 0;
@ -595,15 +637,9 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
if (j->prereserved.reserved * 2 > j->prereserved.remaining)
min_nr = 1;
if (atomic_read(&c->btree_cache.dirty) * 4 >
c->btree_cache.used * 3)
min_nr = 1;
if (fifo_free(&j->pin) <= 32)
min_nr = 1;
min_nr = max(min_nr, bch2_nr_btree_keys_want_flush(c));
trace_journal_reclaim_start(c,
min_nr,
j->prereserved.reserved,
@ -613,14 +649,19 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
atomic_long_read(&c->btree_key_cache.nr_dirty),
atomic_long_read(&c->btree_key_cache.nr_keys));
nr_flushed = journal_flush_pins(j, seq_to_flush, min_nr);
nr_flushed = journal_flush_pins(j, seq_to_flush,
min_nr,
min(bch2_nr_btree_keys_need_flush(c), 128UL));
if (direct)
j->nr_direct_reclaim += nr_flushed;
else
j->nr_background_reclaim += nr_flushed;
trace_journal_reclaim_finish(c, nr_flushed);
} while (min_nr && nr_flushed);
if (nr_flushed)
wake_up(&j->reclaim_wait);
} while (min_nr && nr_flushed && !direct);
memalloc_noreclaim_restore(flags);
@ -713,7 +754,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
mutex_lock(&j->reclaim_lock);
*did_work = journal_flush_pins(j, seq_to_flush, 0) != 0;
*did_work = journal_flush_pins(j, seq_to_flush, 0, 0) != 0;
spin_lock(&j->lock);
/*

View File

@ -43,6 +43,7 @@ struct journal_buf {
struct journal_entry_pin_list {
struct list_head list;
struct list_head key_cache_list;
struct list_head flushed;
atomic_t count;
struct bch_devs_list devs;
@ -50,7 +51,7 @@ struct journal_entry_pin_list {
struct journal;
struct journal_entry_pin;
typedef void (*journal_pin_flush_fn)(struct journal *j,
typedef int (*journal_pin_flush_fn)(struct journal *j,
struct journal_entry_pin *, u64);
struct journal_entry_pin {
@ -105,8 +106,9 @@ union journal_preres_state {
};
struct {
u32 reserved;
u32 remaining;
u64 waiting:1,
reserved:31,
remaining:32;
};
};
@ -243,6 +245,7 @@ struct journal {
spinlock_t err_lock;
struct mutex reclaim_lock;
wait_queue_head_t reclaim_wait;
struct task_struct *reclaim_thread;
bool reclaim_kicked;
u64 nr_direct_reclaim;
@ -250,6 +253,7 @@ struct journal {
unsigned long last_flushed;
struct journal_entry_pin *flush_in_progress;
bool flush_in_progress_dropped;
wait_queue_head_t pin_flush_wait;
/* protects advancing ja->discard_idx: */

View File

@ -88,6 +88,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
if (ret)
break;
}
bch2_trans_iter_put(&trans, iter);
ret = bch2_trans_exit(&trans) ?: ret;
bch2_bkey_buf_exit(&sk, c);
@ -135,20 +136,24 @@ retry:
dev_idx, flags, true);
if (ret) {
bch_err(c, "Cannot drop device without losing data");
goto err;
break;
}
ret = bch2_btree_node_update_key(c, iter, b, k.k);
if (ret == -EINTR) {
b = bch2_btree_iter_peek_node(iter);
ret = 0;
goto retry;
}
if (ret) {
bch_err(c, "Error updating btree node key: %i", ret);
goto err;
break;
}
}
bch2_trans_iter_free(&trans, iter);
if (ret)
goto err;
}
/* flush relevant btree updates */

View File

@ -793,6 +793,9 @@ next:
out:
bch2_trans_exit(&trans);
if (ret)
bch_err(c, "error %i in bch2_move_btree", ret);
return ret;
}
@ -916,8 +919,8 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
rewrite_old_nodes_pred, c, stats);
if (!ret) {
mutex_lock(&c->sb_lock);
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_EXTENTS_ABOVE_BTREE_UPDATES_DONE;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_BFORMAT_OVERFLOW_DONE;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_extents_above_btree_updates_done;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_bformat_overflow_done;
c->disk_sb.sb->version_min = c->disk_sb.sb->version;
bch2_write_super(c);
mutex_unlock(&c->sb_lock);

View File

@ -21,6 +21,11 @@ const char * const bch2_sb_features[] = {
NULL
};
const char * const bch2_sb_compat[] = {
BCH_SB_COMPAT()
NULL
};
const char * const bch2_btree_ids[] = {
BCH_BTREE_IDS()
NULL

View File

@ -10,6 +10,7 @@
extern const char * const bch2_error_actions[];
extern const char * const bch2_sb_features[];
extern const char * const bch2_sb_compat[];
extern const char * const bch2_btree_ids[];
extern const char * const bch2_csum_opts[];
extern const char * const bch2_compression_opts[];

View File

@ -935,7 +935,7 @@ static int read_btree_roots(struct bch_fs *c)
if (i == BTREE_ID_alloc &&
c->opts.reconstruct_alloc) {
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
continue;
}
@ -945,7 +945,7 @@ static int read_btree_roots(struct bch_fs *c)
"invalid btree root %s",
bch2_btree_ids[i]);
if (i == BTREE_ID_alloc)
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
}
ret = bch2_btree_root_read(c, i, &r->key, r->level);
@ -955,7 +955,7 @@ static int read_btree_roots(struct bch_fs *c)
"error reading btree root %s",
bch2_btree_ids[i]);
if (i == BTREE_ID_alloc)
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
}
}
@ -998,7 +998,7 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
if (!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_BFORMAT_OVERFLOW_DONE))) {
if (!(c->sb.compat & (1ULL << BCH_COMPAT_bformat_overflow_done))) {
bch_err(c, "filesystem may have incompatible bkey formats; run fsck from the compat branch to fix");
ret = -EINVAL;
goto err;
@ -1041,7 +1041,7 @@ int bch2_fs_recovery(struct bch_fs *c)
last_journal_entry &&
!journal_entry_empty(last_journal_entry), c,
"filesystem marked clean but journal not empty")) {
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->sb.clean = false;
}
@ -1075,7 +1075,7 @@ use_clean:
}
if (c->opts.reconstruct_alloc) {
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
drop_alloc_keys(&c->journal_keys);
}
@ -1128,8 +1128,8 @@ use_clean:
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
if (c->opts.fsck ||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA)) ||
!(c->sb.compat & (1ULL << BCH_COMPAT_alloc_info)) ||
!(c->sb.compat & (1ULL << BCH_COMPAT_alloc_metadata)) ||
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
bch_info(c, "starting mark and sweep");
err = "error in mark and sweep";
@ -1215,11 +1215,11 @@ use_clean:
bch_verbose(c, "quotas done");
}
if (!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_EXTENTS_ABOVE_BTREE_UPDATES_DONE)) ||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_BFORMAT_OVERFLOW_DONE))) {
if (!(c->sb.compat & (1ULL << BCH_COMPAT_extents_above_btree_updates_done)) ||
!(c->sb.compat & (1ULL << BCH_COMPAT_bformat_overflow_done))) {
struct bch_move_stats stats = { 0 };
bch_verbose(c, "scanning for old btree nodes");
bch_info(c, "scanning for old btree nodes");
ret = bch2_fs_read_write(c);
if (ret)
goto err;
@ -1227,7 +1227,7 @@ use_clean:
ret = bch2_scan_old_btree_nodes(c, &stats);
if (ret)
goto err;
bch_verbose(c, "scanning for old btree nodes done");
bch_info(c, "scanning for old btree nodes done");
}
mutex_lock(&c->sb_lock);
@ -1238,7 +1238,7 @@ use_clean:
}
if (!test_bit(BCH_FS_ERROR, &c->flags)) {
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_alloc_info;
write_sb = true;
}
@ -1289,8 +1289,8 @@ int bch2_fs_initialize(struct bch_fs *c)
bch_notice(c, "initializing new filesystem");
mutex_lock(&c->sb_lock);
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_EXTENTS_ABOVE_BTREE_UPDATES_DONE;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_BFORMAT_OVERFLOW_DONE;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_extents_above_btree_updates_done;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_bformat_overflow_done;
if (c->opts.version_upgrade) {
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);

View File

@ -271,11 +271,13 @@ static int replicas_table_update(struct bch_fs *c,
struct bch_replicas_cpu *new_r)
{
struct bch_fs_usage __percpu *new_usage[JOURNAL_BUF_NR];
struct bch_fs_usage *new_scratch = NULL;
struct bch_fs_usage_online *new_scratch = NULL;
struct bch_fs_usage __percpu *new_gc = NULL;
struct bch_fs_usage *new_base = NULL;
unsigned i, bytes = sizeof(struct bch_fs_usage) +
sizeof(u64) * new_r->nr;
unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) +
sizeof(u64) * new_r->nr;
int ret = 0;
memset(new_usage, 0, sizeof(new_usage));
@ -286,7 +288,7 @@ static int replicas_table_update(struct bch_fs *c,
goto err;
if (!(new_base = kzalloc(bytes, GFP_KERNEL)) ||
!(new_scratch = kmalloc(bytes, GFP_KERNEL)) ||
!(new_scratch = kmalloc(scratch_bytes, GFP_KERNEL)) ||
(c->usage_gc &&
!(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_KERNEL))))
goto err;
@ -462,6 +464,36 @@ static int __bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k,
return 0;
}
/* replicas delta list: */
bool bch2_replicas_delta_list_marked(struct bch_fs *c,
struct replicas_delta_list *r)
{
struct replicas_delta *d = r->d;
struct replicas_delta *top = (void *) r->d + r->used;
percpu_rwsem_assert_held(&c->mark_lock);
for (d = r->d; d != top; d = replicas_delta_next(d))
if (bch2_replicas_entry_idx(c, &d->r) < 0)
return false;
return true;
}
int bch2_replicas_delta_list_mark(struct bch_fs *c,
struct replicas_delta_list *r)
{
struct replicas_delta *d = r->d;
struct replicas_delta *top = (void *) r->d + r->used;
int ret = 0;
for (d = r->d; !ret && d != top; d = replicas_delta_next(d))
ret = bch2_mark_replicas(c, &d->r);
return ret;
}
/* bkey replicas: */
bool bch2_bkey_replicas_marked(struct bch_fs *c,
struct bkey_s_c k)
{
@ -473,6 +505,11 @@ int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
return __bch2_mark_bkey_replicas(c, k, false);
}
/*
* Old replicas_gc mechanism: only used for journal replicas entries now, should
* die at some point:
*/
int bch2_replicas_gc_end(struct bch_fs *c, int ret)
{
unsigned i;
@ -566,6 +603,8 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
return 0;
}
/* New much simpler mechanism for clearing out unneeded replicas entries: */
int bch2_replicas_gc2(struct bch_fs *c)
{
struct bch_replicas_cpu new = { 0 };
@ -966,11 +1005,18 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
percpu_down_read(&c->mark_lock);
for_each_cpu_replicas_entry(&c->replicas, e) {
unsigned i, nr_online = 0, dflags = 0;
unsigned i, nr_online = 0, nr_failed = 0, dflags = 0;
bool metadata = e->data_type < BCH_DATA_user;
for (i = 0; i < e->nr_devs; i++)
for (i = 0; i < e->nr_devs; i++) {
struct bch_dev *ca = bch_dev_bkey_exists(c, e->devs[i]);
nr_online += test_bit(e->devs[i], devs.d);
nr_failed += ca->mi.state == BCH_MEMBER_STATE_failed;
}
if (nr_failed == e->nr_devs)
continue;
if (nr_online < e->nr_required)
dflags |= metadata

View File

@ -26,6 +26,31 @@ bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry *);
int bch2_mark_replicas(struct bch_fs *,
struct bch_replicas_entry *);
struct replicas_delta {
s64 delta;
struct bch_replicas_entry r;
} __packed;
struct replicas_delta_list {
unsigned size;
unsigned used;
struct {} memset_start;
u64 nr_inodes;
u64 persistent_reserved[BCH_REPLICAS_MAX];
struct {} memset_end;
struct replicas_delta d[0];
};
static inline struct replicas_delta *
replicas_delta_next(struct replicas_delta *d)
{
return (void *) d + replicas_entry_bytes(&d->r) + 8;
}
bool bch2_replicas_delta_list_marked(struct bch_fs *, struct replicas_delta_list *);
int bch2_replicas_delta_list_mark(struct bch_fs *, struct replicas_delta_list *);
void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
bool bch2_bkey_replicas_marked(struct bch_fs *, struct bkey_s_c);
int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);

View File

@ -377,7 +377,6 @@ static void bch2_sb_update(struct bch_fs *c)
ca->mi = bch2_mi_to_cpu(mi->members + i);
}
/* doesn't copy member info */
static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
{
struct bch_sb_field *src_f, *dst_f;
@ -996,7 +995,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
struct bch_dev *ca;
unsigned i, dev;
percpu_down_write(&c->mark_lock);
percpu_down_read(&c->mark_lock);
if (!journal_seq) {
for (i = 0; i < ARRAY_SIZE(c->usage); i++)
@ -1067,7 +1066,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
}
}
percpu_up_write(&c->mark_lock);
percpu_up_read(&c->mark_lock);
for (i = 0; i < 2; i++) {
struct jset_entry_clock *clock =
@ -1093,8 +1092,8 @@ void bch2_fs_mark_clean(struct bch_fs *c)
SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_alloc_info;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_alloc_metadata;
c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_extents_above_btree_updates);
c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_btree_updates_journalled);

View File

@ -153,6 +153,8 @@ read_attribute(io_latency_stats_read);
read_attribute(io_latency_stats_write);
read_attribute(congested);
read_attribute(btree_avg_write_size);
read_attribute(bucket_quantiles_last_read);
read_attribute(bucket_quantiles_last_write);
read_attribute(bucket_quantiles_fragmentation);
@ -230,9 +232,17 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
return ret;
}
static size_t bch2_btree_avg_write_size(struct bch_fs *c)
{
u64 nr = atomic64_read(&c->btree_writes_nr);
u64 sectors = atomic64_read(&c->btree_writes_sectors);
return nr ? div64_u64(sectors, nr) : 0;
}
static int fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
{
struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
struct bch_fs_usage_online *fs_usage = bch2_fs_usage_read(c);
if (!fs_usage)
return -ENOMEM;
@ -318,6 +328,7 @@ SHOW(bch2_fs)
sysfs_print(block_size, block_bytes(c));
sysfs_print(btree_node_size, btree_bytes(c));
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
sysfs_hprint(btree_avg_write_size, bch2_btree_avg_write_size(c));
sysfs_print(read_realloc_races,
atomic_long_read(&c->read_realloc_races));
@ -513,6 +524,7 @@ struct attribute *bch2_fs_files[] = {
&sysfs_block_size,
&sysfs_btree_node_size,
&sysfs_btree_cache_size,
&sysfs_btree_avg_write_size,
&sysfs_journal_write_delay_ms,
&sysfs_journal_reclaim_delay_ms,
@ -800,7 +812,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
pr_buf(out,
"ec\t%16llu\n"
"available%15llu\n"
"alloc\t%16llu\n"
"\n"
"free_inc\t\t%zu/%zu\n"
"free[RESERVE_MOVINGGC]\t%zu/%zu\n"
@ -813,7 +824,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
"btree reserve cache\t%u\n",
stats.buckets_ec,
__dev_buckets_available(ca, stats),
stats.buckets_alloc,
fifo_used(&ca->free_inc), ca->free_inc.size,
fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size,

View File

@ -252,12 +252,13 @@ retry:
old.v, new.v)) != old.v);
ret = !(old.v & l[type].lock_fail);
EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
}
if (ret)
six_set_owner(lock, type, old);
EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
EBUG_ON(type == SIX_LOCK_write && (try || ret) && (lock->state.write_locking));
return ret;