mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to f26267fc82 bcachefs: kill bset_tree->max_key
This commit is contained in:
parent
f46437f06e
commit
209695dedf
@ -1 +1 @@
|
||||
9922afc8b6d6227f4193feef6442f8c3d881f78c
|
||||
f26267fc82539ef3390cf2bb2bc818436dd504c7
|
||||
|
@ -690,10 +690,11 @@ struct bch_fs {
|
||||
struct bch_fs_usage *usage_base;
|
||||
struct bch_fs_usage __percpu *usage[JOURNAL_BUF_NR];
|
||||
struct bch_fs_usage __percpu *usage_gc;
|
||||
u64 __percpu *online_reserved;
|
||||
|
||||
/* single element mempool: */
|
||||
struct mutex usage_scratch_lock;
|
||||
struct bch_fs_usage *usage_scratch;
|
||||
struct bch_fs_usage_online *usage_scratch;
|
||||
|
||||
struct io_clock io_clock[2];
|
||||
|
||||
@ -804,6 +805,9 @@ struct bch_fs {
|
||||
struct bio_set dio_write_bioset;
|
||||
struct bio_set dio_read_bioset;
|
||||
|
||||
|
||||
atomic64_t btree_writes_nr;
|
||||
atomic64_t btree_writes_sectors;
|
||||
struct bio_list btree_write_error_list;
|
||||
struct work_struct btree_write_error_work;
|
||||
spinlock_t btree_write_error_lock;
|
||||
|
@ -1398,11 +1398,17 @@ enum bch_sb_feature {
|
||||
BCH_FEATURE_NR,
|
||||
};
|
||||
|
||||
#define BCH_SB_COMPAT() \
|
||||
x(alloc_info, 0) \
|
||||
x(alloc_metadata, 1) \
|
||||
x(extents_above_btree_updates_done, 2) \
|
||||
x(bformat_overflow_done, 3)
|
||||
|
||||
enum bch_sb_compat {
|
||||
BCH_COMPAT_FEAT_ALLOC_INFO = 0,
|
||||
BCH_COMPAT_FEAT_ALLOC_METADATA = 1,
|
||||
BCH_COMPAT_FEAT_EXTENTS_ABOVE_BTREE_UPDATES_DONE = 2,
|
||||
BCH_COMPAT_FEAT_BFORMAT_OVERFLOW_DONE = 3,
|
||||
#define x(f, n) BCH_COMPAT_##f,
|
||||
BCH_SB_COMPAT()
|
||||
#undef x
|
||||
BCH_COMPAT_NR,
|
||||
};
|
||||
|
||||
/* options: */
|
||||
|
@ -698,7 +698,7 @@ static void make_bfloat(struct btree *b, struct bset_tree *t,
|
||||
if (!bkey_pack_pos(max_key, b->data->max_key, b)) {
|
||||
k = (void *) max_key;
|
||||
bkey_init(&k->k);
|
||||
k->k.p = t->max_key;
|
||||
k->k.p = b->data->max_key;
|
||||
}
|
||||
}
|
||||
|
||||
@ -782,8 +782,6 @@ retry:
|
||||
while (k != btree_bkey_last(b, t))
|
||||
prev = k, k = bkey_next(k);
|
||||
|
||||
t->max_key = bkey_unpack_pos(b, prev);
|
||||
|
||||
if (!bkey_pack_pos(bkey_to_packed(&min_key), b->data->min_key, b)) {
|
||||
bkey_init(&min_key.k);
|
||||
min_key.k.p = b->data->min_key;
|
||||
@ -791,7 +789,7 @@ retry:
|
||||
|
||||
if (!bkey_pack_pos(bkey_to_packed(&max_key), b->data->max_key, b)) {
|
||||
bkey_init(&max_key.k);
|
||||
max_key.k.p = t->max_key;
|
||||
max_key.k.p = b->data->max_key;
|
||||
}
|
||||
|
||||
/* Then we build the tree */
|
||||
@ -970,8 +968,6 @@ static void ro_aux_tree_fix_invalidated_key(struct btree *b,
|
||||
min_key.u64s = max_key.u64s = 0;
|
||||
|
||||
if (bkey_next(k) == btree_bkey_last(b, t)) {
|
||||
t->max_key = bkey_unpack_pos(b, k);
|
||||
|
||||
for (j = 1; j < t->size; j = j * 2 + 1)
|
||||
make_bfloat(b, t, j, &min_key, &max_key);
|
||||
}
|
||||
@ -1311,16 +1307,6 @@ struct bkey_packed *__bch2_bset_search(struct btree *b,
|
||||
case BSET_RW_AUX_TREE:
|
||||
return bset_search_write_set(b, t, search);
|
||||
case BSET_RO_AUX_TREE:
|
||||
/*
|
||||
* Each node in the auxiliary search tree covers a certain range
|
||||
* of bits, and keys above and below the set it covers might
|
||||
* differ outside those bits - so we have to special case the
|
||||
* start and end - handle that here:
|
||||
*/
|
||||
|
||||
if (bpos_cmp(*search, t->max_key) > 0)
|
||||
return btree_bkey_last(b, t);
|
||||
|
||||
return bset_search_tree(b, t, search, lossy_packed_search);
|
||||
default:
|
||||
unreachable();
|
||||
@ -1357,23 +1343,6 @@ struct bkey_packed *bch2_bset_search_linear(struct btree *b,
|
||||
return m;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the first key greater than or equal to @search
|
||||
*/
|
||||
static __always_inline __flatten
|
||||
struct bkey_packed *bch2_bset_search(struct btree *b,
|
||||
struct bset_tree *t,
|
||||
struct bpos *search,
|
||||
struct bkey_packed *packed_search,
|
||||
const struct bkey_packed *lossy_packed_search)
|
||||
{
|
||||
struct bkey_packed *m = __bch2_bset_search(b, t, search,
|
||||
lossy_packed_search);
|
||||
|
||||
return bch2_bset_search_linear(b, t, search,
|
||||
packed_search, lossy_packed_search, m);
|
||||
}
|
||||
|
||||
/* Btree node iterator */
|
||||
|
||||
static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter,
|
||||
@ -1469,6 +1438,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
|
||||
unsigned i;
|
||||
|
||||
EBUG_ON(bpos_cmp(*search, b->data->min_key) < 0);
|
||||
EBUG_ON(bpos_cmp(*search, b->data->max_key) > 0);
|
||||
bset_aux_tree_verify(b);
|
||||
|
||||
memset(iter, 0, sizeof(*iter));
|
||||
|
@ -906,136 +906,6 @@ out:
|
||||
return b;
|
||||
}
|
||||
|
||||
struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
|
||||
struct btree_iter *iter,
|
||||
struct btree *b,
|
||||
enum btree_node_sibling sib)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct btree *parent;
|
||||
struct btree_node_iter node_iter;
|
||||
struct bkey_packed *k;
|
||||
struct bkey_buf tmp;
|
||||
struct btree *ret = NULL;
|
||||
unsigned level = b->c.level;
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
parent = btree_iter_node(iter, level + 1);
|
||||
if (!parent)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* There's a corner case where a btree_iter might have a node locked
|
||||
* that is just outside its current pos - when
|
||||
* bch2_btree_iter_set_pos_same_leaf() gets to the end of the node.
|
||||
*
|
||||
* But the lock ordering checks in __bch2_btree_node_lock() go off of
|
||||
* iter->pos, not the node's key: so if the iterator is marked as
|
||||
* needing to be traversed, we risk deadlock if we don't bail out here:
|
||||
*/
|
||||
if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
|
||||
return ERR_PTR(-EINTR);
|
||||
|
||||
if (!bch2_btree_node_relock(iter, level + 1)) {
|
||||
ret = ERR_PTR(-EINTR);
|
||||
goto out;
|
||||
}
|
||||
|
||||
node_iter = iter->l[parent->c.level].iter;
|
||||
|
||||
k = bch2_btree_node_iter_peek_all(&node_iter, parent);
|
||||
BUG_ON(bkey_cmp_left_packed(parent, k, &b->key.k.p));
|
||||
|
||||
k = sib == btree_prev_sib
|
||||
? bch2_btree_node_iter_prev(&node_iter, parent)
|
||||
: (bch2_btree_node_iter_advance(&node_iter, parent),
|
||||
bch2_btree_node_iter_peek(&node_iter, parent));
|
||||
if (!k)
|
||||
goto out;
|
||||
|
||||
bch2_bkey_buf_unpack(&tmp, c, parent, k);
|
||||
|
||||
ret = bch2_btree_node_get(c, iter, tmp.k, level,
|
||||
SIX_LOCK_intent, _THIS_IP_);
|
||||
|
||||
if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
|
||||
struct btree_iter *linked;
|
||||
|
||||
if (!bch2_btree_node_relock(iter, level + 1))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* We might have got -EINTR because trylock failed, and we're
|
||||
* holding other locks that would cause us to deadlock:
|
||||
*/
|
||||
trans_for_each_iter(trans, linked)
|
||||
if (btree_iter_lock_cmp(iter, linked) < 0)
|
||||
__bch2_btree_iter_unlock(linked);
|
||||
|
||||
if (sib == btree_prev_sib)
|
||||
btree_node_unlock(iter, level);
|
||||
|
||||
ret = bch2_btree_node_get(c, iter, tmp.k, level,
|
||||
SIX_LOCK_intent, _THIS_IP_);
|
||||
|
||||
/*
|
||||
* before btree_iter_relock() calls btree_iter_verify_locks():
|
||||
*/
|
||||
if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
|
||||
btree_node_unlock(iter, level + 1);
|
||||
|
||||
if (!bch2_btree_node_relock(iter, level)) {
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
|
||||
|
||||
if (!IS_ERR(ret)) {
|
||||
six_unlock_intent(&ret->c.lock);
|
||||
ret = ERR_PTR(-EINTR);
|
||||
}
|
||||
}
|
||||
|
||||
bch2_trans_relock(trans);
|
||||
}
|
||||
out:
|
||||
if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
|
||||
btree_node_unlock(iter, level + 1);
|
||||
|
||||
if (PTR_ERR_OR_ZERO(ret) == -EINTR)
|
||||
bch2_btree_iter_upgrade(iter, level + 2);
|
||||
|
||||
BUG_ON(!IS_ERR(ret) && !btree_node_locked(iter, level));
|
||||
|
||||
if (!IS_ERR_OR_NULL(ret)) {
|
||||
struct btree *n1 = ret, *n2 = b;
|
||||
|
||||
if (sib != btree_prev_sib)
|
||||
swap(n1, n2);
|
||||
|
||||
if (bpos_cmp(bpos_successor(n1->key.k.p),
|
||||
n2->data->min_key)) {
|
||||
char buf1[200], buf2[200];
|
||||
|
||||
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&n1->key));
|
||||
bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&n2->key));
|
||||
|
||||
bch2_fs_inconsistent(c, "btree topology error at btree %s level %u:\n"
|
||||
"prev: %s\n"
|
||||
"next: %s\n",
|
||||
bch2_btree_ids[iter->btree_id], level,
|
||||
buf1, buf2);
|
||||
|
||||
six_unlock_intent(&ret->c.lock);
|
||||
ret = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_btree_trans_verify_locks(trans);
|
||||
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
|
||||
const struct bkey_i *k,
|
||||
enum btree_id btree_id, unsigned level)
|
||||
@ -1075,7 +945,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
" format: u64s %u fields %u %u %u %u %u\n"
|
||||
" unpack fn len: %u\n"
|
||||
" bytes used %zu/%zu (%zu%% full)\n"
|
||||
" sib u64s: %u, %u (merge threshold %zu)\n"
|
||||
" sib u64s: %u, %u (merge threshold %u)\n"
|
||||
" nr packed keys %u\n"
|
||||
" nr unpacked keys %u\n"
|
||||
" floats %zu\n"
|
||||
@ -1092,7 +962,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
b->nr.live_u64s * 100 / btree_max_u64s(c),
|
||||
b->sib_u64s[0],
|
||||
b->sib_u64s[1],
|
||||
BTREE_FOREGROUND_MERGE_THRESHOLD(c),
|
||||
c->btree_foreground_merge_threshold,
|
||||
b->nr.packed_keys,
|
||||
b->nr.unpacked_keys,
|
||||
stats.floats,
|
||||
|
@ -26,9 +26,6 @@ struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
|
||||
struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
|
||||
enum btree_id, unsigned, bool);
|
||||
|
||||
struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
|
||||
struct btree *, enum btree_node_sibling);
|
||||
|
||||
void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
|
||||
const struct bkey_i *, enum btree_id, unsigned);
|
||||
|
||||
@ -92,7 +89,7 @@ static inline unsigned btree_blocks(struct bch_fs *c)
|
||||
#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3)
|
||||
#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \
|
||||
(BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \
|
||||
(BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2))
|
||||
(BTREE_FOREGROUND_MERGE_THRESHOLD(c) >> 2))
|
||||
|
||||
#define btree_node_root(_c, _b) ((_c)->btree_roots[(_b)->c.btree_id].b)
|
||||
|
||||
|
@ -779,7 +779,7 @@ static int bch2_gc_done(struct bch_fs *c,
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
bool verify = (!initial ||
|
||||
(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)));
|
||||
(c->sb.compat & (1ULL << BCH_COMPAT_alloc_info)));
|
||||
unsigned i, dev;
|
||||
int ret = 0;
|
||||
|
||||
@ -1297,11 +1297,10 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
|
||||
return;
|
||||
}
|
||||
|
||||
as = bch2_btree_update_start(iter->trans, iter->btree_id,
|
||||
as = bch2_btree_update_start(iter, old_nodes[0]->c.level,
|
||||
btree_update_reserve_required(c, parent) + nr_old_nodes,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE,
|
||||
NULL);
|
||||
BTREE_INSERT_USE_RESERVE);
|
||||
if (IS_ERR(as)) {
|
||||
trace_btree_gc_coalesce_fail(c,
|
||||
BTREE_GC_COALESCE_FAIL_RESERVE_GET);
|
||||
|
@ -1547,6 +1547,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
|
||||
b->written += sectors_to_write;
|
||||
|
||||
atomic64_inc(&c->btree_writes_nr);
|
||||
atomic64_add(sectors_to_write, &c->btree_writes_sectors);
|
||||
|
||||
/* XXX: submitting IO with btree locks held: */
|
||||
bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, k.k);
|
||||
bch2_bkey_buf_exit(&k, c);
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "journal.h"
|
||||
#include "replicas.h"
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
@ -238,6 +239,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
struct btree_iter *linked, *deadlock_iter = NULL;
|
||||
u64 start_time = local_clock();
|
||||
unsigned reason = 9;
|
||||
bool ret;
|
||||
|
||||
/* Check if it's safe to block: */
|
||||
trans_for_each_iter(trans, linked) {
|
||||
@ -258,17 +260,12 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
*/
|
||||
if (type == SIX_LOCK_intent &&
|
||||
linked->nodes_locked != linked->nodes_intent_locked) {
|
||||
if (!(trans->nounlock)) {
|
||||
linked->locks_want = max_t(unsigned,
|
||||
linked->locks_want,
|
||||
__fls(linked->nodes_locked) + 1);
|
||||
if (!btree_iter_get_locks(linked, true, false)) {
|
||||
deadlock_iter = linked;
|
||||
reason = 1;
|
||||
}
|
||||
} else {
|
||||
linked->locks_want = max_t(unsigned,
|
||||
linked->locks_want,
|
||||
__fls(linked->nodes_locked) + 1);
|
||||
if (!btree_iter_get_locks(linked, true, false)) {
|
||||
deadlock_iter = linked;
|
||||
reason = 2;
|
||||
reason = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -298,18 +295,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
* we're about to lock, it must have the ancestors locked too:
|
||||
*/
|
||||
if (level > __fls(linked->nodes_locked)) {
|
||||
if (!(trans->nounlock)) {
|
||||
linked->locks_want =
|
||||
max(level + 1, max_t(unsigned,
|
||||
linked->locks_want,
|
||||
iter->locks_want));
|
||||
if (!btree_iter_get_locks(linked, true, false)) {
|
||||
deadlock_iter = linked;
|
||||
reason = 5;
|
||||
}
|
||||
} else {
|
||||
linked->locks_want =
|
||||
max(level + 1, max_t(unsigned,
|
||||
linked->locks_want,
|
||||
iter->locks_want));
|
||||
if (!btree_iter_get_locks(linked, true, false)) {
|
||||
deadlock_iter = linked;
|
||||
reason = 6;
|
||||
reason = 5;
|
||||
}
|
||||
}
|
||||
|
||||
@ -346,12 +338,23 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
if (six_trylock_type(&b->c.lock, type))
|
||||
return true;
|
||||
|
||||
if (six_lock_type(&b->c.lock, type, should_sleep_fn, p))
|
||||
return false;
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans->locking_iter_idx = iter->idx;
|
||||
trans->locking_pos = pos;
|
||||
trans->locking_btree_id = iter->btree_id;
|
||||
trans->locking_level = level;
|
||||
trans->locking = b;
|
||||
#endif
|
||||
|
||||
bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
|
||||
start_time);
|
||||
return true;
|
||||
ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans->locking = NULL;
|
||||
#endif
|
||||
if (ret)
|
||||
bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
|
||||
start_time);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Btree iterator locking: */
|
||||
@ -421,50 +424,25 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *iter,
|
||||
unsigned new_locks_want)
|
||||
void __bch2_btree_iter_downgrade(struct btree_iter *iter,
|
||||
unsigned new_locks_want)
|
||||
{
|
||||
unsigned l = iter->level;
|
||||
unsigned l;
|
||||
|
||||
EBUG_ON(iter->locks_want >= new_locks_want);
|
||||
EBUG_ON(iter->locks_want < new_locks_want);
|
||||
|
||||
iter->locks_want = new_locks_want;
|
||||
|
||||
do {
|
||||
if (!btree_iter_node(iter, l))
|
||||
break;
|
||||
|
||||
if (!bch2_btree_node_upgrade(iter, l)) {
|
||||
iter->locks_want = l;
|
||||
return false;
|
||||
}
|
||||
|
||||
l++;
|
||||
} while (l < iter->locks_want);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void __bch2_btree_iter_downgrade(struct btree_iter *iter,
|
||||
unsigned downgrade_to)
|
||||
{
|
||||
unsigned l, new_locks_want = downgrade_to ?:
|
||||
(iter->flags & BTREE_ITER_INTENT ? 1 : 0);
|
||||
|
||||
if (iter->locks_want < downgrade_to) {
|
||||
iter->locks_want = new_locks_want;
|
||||
|
||||
while (iter->nodes_locked &&
|
||||
(l = __fls(iter->nodes_locked)) >= iter->locks_want) {
|
||||
if (l > iter->level) {
|
||||
btree_node_unlock(iter, l);
|
||||
} else {
|
||||
if (btree_node_intent_locked(iter, l)) {
|
||||
six_lock_downgrade(&iter->l[l].b->c.lock);
|
||||
iter->nodes_intent_locked ^= 1 << l;
|
||||
}
|
||||
break;
|
||||
while (iter->nodes_locked &&
|
||||
(l = __fls(iter->nodes_locked)) >= iter->locks_want) {
|
||||
if (l > iter->level) {
|
||||
btree_node_unlock(iter, l);
|
||||
} else {
|
||||
if (btree_node_intent_locked(iter, l)) {
|
||||
six_lock_downgrade(&iter->l[l].b->c.lock);
|
||||
iter->nodes_intent_locked ^= 1 << l;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -484,13 +462,12 @@ void bch2_trans_downgrade(struct btree_trans *trans)
|
||||
bool bch2_trans_relock(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_iter *iter;
|
||||
bool ret = true;
|
||||
|
||||
trans_for_each_iter(trans, iter)
|
||||
if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
|
||||
ret &= bch2_btree_iter_relock(iter, true);
|
||||
|
||||
return ret;
|
||||
if (btree_iter_keep(trans, iter) &&
|
||||
!bch2_btree_iter_relock(iter, true))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void bch2_trans_unlock(struct btree_trans *trans)
|
||||
@ -1027,7 +1004,7 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
|
||||
|
||||
trans_for_each_iter(iter->trans, linked)
|
||||
if (linked->l[level].b == b) {
|
||||
__btree_node_unlock(linked, level);
|
||||
btree_node_unlock(linked, level);
|
||||
linked->l[level].b = BTREE_ITER_NO_NODE_DROP;
|
||||
}
|
||||
}
|
||||
@ -2008,6 +1985,8 @@ static inline void btree_iter_copy(struct btree_iter *dst,
|
||||
|
||||
struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
|
||||
unsigned btree_id, struct bpos pos,
|
||||
unsigned locks_want,
|
||||
unsigned depth,
|
||||
unsigned flags)
|
||||
{
|
||||
struct btree_iter *iter, *best = NULL;
|
||||
@ -2020,10 +1999,6 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
|
||||
pos.snapshot = btree_type_has_snapshots(btree_id)
|
||||
? U32_MAX : 0;
|
||||
|
||||
/* We always want a fresh iterator for node iterators: */
|
||||
if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES)
|
||||
goto alloc_iter;
|
||||
|
||||
trans_for_each_iter(trans, iter) {
|
||||
if (btree_iter_type(iter) != (flags & BTREE_ITER_TYPE))
|
||||
continue;
|
||||
@ -2038,7 +2013,7 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
|
||||
|
||||
best = iter;
|
||||
}
|
||||
alloc_iter:
|
||||
|
||||
if (!best) {
|
||||
iter = btree_trans_iter_alloc(trans);
|
||||
bch2_btree_iter_init(trans, iter, btree_id);
|
||||
@ -2062,10 +2037,25 @@ alloc_iter:
|
||||
|
||||
iter->snapshot = pos.snapshot;
|
||||
|
||||
if (!(iter->flags & BTREE_ITER_INTENT))
|
||||
bch2_btree_iter_downgrade(iter);
|
||||
else if (!iter->locks_want)
|
||||
__bch2_btree_iter_upgrade_nounlock(iter, 1);
|
||||
locks_want = min(locks_want, BTREE_MAX_DEPTH);
|
||||
|
||||
if (locks_want > iter->locks_want) {
|
||||
iter->locks_want = locks_want;
|
||||
btree_iter_get_locks(iter, true, false);
|
||||
} else if (locks_want < iter->locks_want) {
|
||||
__bch2_btree_iter_downgrade(iter, locks_want);
|
||||
}
|
||||
|
||||
while (iter->level < depth) {
|
||||
btree_node_unlock(iter, iter->level);
|
||||
iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT;
|
||||
iter->level++;
|
||||
}
|
||||
|
||||
while (iter->level > depth)
|
||||
iter->l[--iter->level].b = BTREE_ITER_NO_NODE_INIT;
|
||||
|
||||
iter->min_depth = depth;
|
||||
|
||||
bch2_btree_iter_set_pos(iter, pos);
|
||||
btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
|
||||
@ -2082,21 +2072,16 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
|
||||
{
|
||||
struct btree_iter *iter =
|
||||
__bch2_trans_get_iter(trans, btree_id, pos,
|
||||
BTREE_ITER_NODES|
|
||||
BTREE_ITER_NOT_EXTENTS|
|
||||
BTREE_ITER_ALL_SNAPSHOTS|
|
||||
flags);
|
||||
unsigned i;
|
||||
locks_want, depth,
|
||||
BTREE_ITER_NODES|
|
||||
BTREE_ITER_NOT_EXTENTS|
|
||||
BTREE_ITER_ALL_SNAPSHOTS|
|
||||
flags);
|
||||
|
||||
BUG_ON(bkey_cmp(iter->pos, pos));
|
||||
|
||||
iter->locks_want = locks_want;
|
||||
iter->level = depth;
|
||||
iter->min_depth = depth;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(iter->l); i++)
|
||||
iter->l[i].b = NULL;
|
||||
iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT;
|
||||
BUG_ON(iter->locks_want != min(locks_want, BTREE_MAX_DEPTH));
|
||||
BUG_ON(iter->level != depth);
|
||||
BUG_ON(iter->min_depth != depth);
|
||||
iter->ip_allocated = _RET_IP_;
|
||||
|
||||
return iter;
|
||||
@ -2304,11 +2289,24 @@ bch2_btree_iter_node_to_text(struct printbuf *out,
|
||||
struct btree_bkey_cached_common *_b,
|
||||
enum btree_iter_type type)
|
||||
{
|
||||
pr_buf(out, " %px l=%u %s:",
|
||||
_b, _b->level, bch2_btree_ids[_b->btree_id]);
|
||||
pr_buf(out, " l=%u %s:",
|
||||
_b->level, bch2_btree_ids[_b->btree_id]);
|
||||
bch2_bpos_to_text(out, btree_node_pos(_b, type));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
static bool trans_has_btree_nodes_locked(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_iter *iter;
|
||||
|
||||
trans_for_each_iter(trans, iter)
|
||||
if (btree_iter_type(iter) != BTREE_ITER_CACHED &&
|
||||
iter->nodes_locked)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
@ -2319,14 +2317,18 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
|
||||
mutex_lock(&c->btree_trans_lock);
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||
pr_buf(out, "%i %px %ps\n", trans->pid, trans, (void *) trans->ip);
|
||||
if (!trans_has_btree_nodes_locked(trans))
|
||||
continue;
|
||||
|
||||
pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip);
|
||||
|
||||
trans_for_each_iter(trans, iter) {
|
||||
if (!iter->nodes_locked)
|
||||
continue;
|
||||
|
||||
pr_buf(out, " iter %u %s:",
|
||||
pr_buf(out, " iter %u %c %s:",
|
||||
iter->idx,
|
||||
btree_iter_type(iter) == BTREE_ITER_CACHED ? 'c' : 'b',
|
||||
bch2_btree_ids[iter->btree_id]);
|
||||
bch2_bpos_to_text(out, iter->pos);
|
||||
pr_buf(out, "\n");
|
||||
@ -2345,17 +2347,18 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
|
||||
b = READ_ONCE(trans->locking);
|
||||
if (b) {
|
||||
pr_buf(out, " locking iter %u l=%u %s:",
|
||||
iter = &trans->iters[trans->locking_iter_idx];
|
||||
pr_buf(out, " locking iter %u %c l=%u %s:",
|
||||
trans->locking_iter_idx,
|
||||
btree_iter_type(iter) == BTREE_ITER_CACHED ? 'c' : 'b',
|
||||
trans->locking_level,
|
||||
bch2_btree_ids[trans->locking_btree_id]);
|
||||
bch2_bpos_to_text(out, trans->locking_pos);
|
||||
|
||||
|
||||
pr_buf(out, " node ");
|
||||
bch2_btree_iter_node_to_text(out,
|
||||
(void *) b,
|
||||
btree_iter_type(&trans->iters[trans->locking_iter_idx]));
|
||||
btree_iter_type(iter));
|
||||
pr_buf(out, "\n");
|
||||
}
|
||||
}
|
||||
|
@ -116,7 +116,6 @@ bool bch2_trans_relock(struct btree_trans *);
|
||||
void bch2_trans_unlock(struct btree_trans *);
|
||||
|
||||
bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
|
||||
bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
|
||||
|
||||
static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
|
||||
unsigned new_locks_want)
|
||||
@ -124,9 +123,7 @@ static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
|
||||
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
|
||||
|
||||
return iter->locks_want < new_locks_want
|
||||
? (!iter->trans->nounlock
|
||||
? __bch2_btree_iter_upgrade(iter, new_locks_want)
|
||||
: __bch2_btree_iter_upgrade_nounlock(iter, new_locks_want))
|
||||
? __bch2_btree_iter_upgrade(iter, new_locks_want)
|
||||
: iter->uptodate <= BTREE_ITER_NEED_PEEK;
|
||||
}
|
||||
|
||||
@ -134,8 +131,10 @@ void __bch2_btree_iter_downgrade(struct btree_iter *, unsigned);
|
||||
|
||||
static inline void bch2_btree_iter_downgrade(struct btree_iter *iter)
|
||||
{
|
||||
if (iter->locks_want > (iter->flags & BTREE_ITER_INTENT) ? 1 : 0)
|
||||
__bch2_btree_iter_downgrade(iter, 0);
|
||||
unsigned new_locks_want = (iter->flags & BTREE_ITER_INTENT ? 1 : 0);
|
||||
|
||||
if (iter->locks_want > new_locks_want)
|
||||
__bch2_btree_iter_downgrade(iter, new_locks_want);
|
||||
}
|
||||
|
||||
void bch2_trans_downgrade(struct btree_trans *);
|
||||
@ -175,8 +174,11 @@ static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos
|
||||
if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
|
||||
new_pos.snapshot = iter->snapshot;
|
||||
|
||||
bkey_init(&iter->k);
|
||||
iter->k.p = iter->pos = new_pos;
|
||||
iter->k.type = KEY_TYPE_deleted;
|
||||
iter->k.p.inode = iter->pos.inode = new_pos.inode;
|
||||
iter->k.p.offset = iter->pos.offset = new_pos.offset;
|
||||
iter->k.p.snapshot = iter->pos.snapshot = new_pos.snapshot;
|
||||
iter->k.size = 0;
|
||||
}
|
||||
|
||||
/* Sort order for locking btree iterators: */
|
||||
@ -261,14 +263,17 @@ int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
|
||||
void bch2_trans_unlink_iters(struct btree_trans *);
|
||||
|
||||
struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id,
|
||||
struct bpos, unsigned);
|
||||
struct bpos, unsigned,
|
||||
unsigned, unsigned);
|
||||
|
||||
static inline struct btree_iter *
|
||||
bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id,
|
||||
struct bpos pos, unsigned flags)
|
||||
{
|
||||
struct btree_iter *iter =
|
||||
__bch2_trans_get_iter(trans, btree_id, pos, flags);
|
||||
__bch2_trans_get_iter(trans, btree_id, pos,
|
||||
(flags & BTREE_ITER_INTENT) != 0, 0,
|
||||
flags);
|
||||
iter->ip_allocated = _THIS_IP_;
|
||||
return iter;
|
||||
}
|
||||
|
@ -352,6 +352,7 @@ err:
|
||||
static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
struct bkey_cached_key key,
|
||||
u64 journal_seq,
|
||||
unsigned commit_flags,
|
||||
bool evict)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -390,12 +391,17 @@ retry:
|
||||
BTREE_INSERT_NOUNLOCK|
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_RESERVED|
|
||||
BTREE_INSERT_JOURNAL_RECLAIM);
|
||||
(ck->journal.seq == journal_last_seq(j)
|
||||
? BTREE_INSERT_JOURNAL_RESERVED
|
||||
: 0)|
|
||||
commit_flags);
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
||||
if (ret == -EAGAIN)
|
||||
goto out;
|
||||
|
||||
if (ret) {
|
||||
bch2_fs_fatal_err_on(!bch2_journal_error(j), c,
|
||||
"error flushing key cache: %i", ret);
|
||||
@ -438,15 +444,15 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void btree_key_cache_journal_flush(struct journal *j,
|
||||
struct journal_entry_pin *pin,
|
||||
u64 seq)
|
||||
int bch2_btree_key_cache_journal_flush(struct journal *j,
|
||||
struct journal_entry_pin *pin, u64 seq)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bkey_cached *ck =
|
||||
container_of(pin, struct bkey_cached, journal);
|
||||
struct bkey_cached_key key;
|
||||
struct btree_trans trans;
|
||||
int ret = 0;
|
||||
|
||||
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
|
||||
@ -461,10 +467,13 @@ static void btree_key_cache_journal_flush(struct journal *j,
|
||||
six_unlock_read(&ck->c.lock);
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
btree_key_cache_flush_pos(&trans, key, seq, false);
|
||||
ret = btree_key_cache_flush_pos(&trans, key, seq,
|
||||
BTREE_INSERT_JOURNAL_RECLAIM, false);
|
||||
bch2_trans_exit(&trans);
|
||||
unlock:
|
||||
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -480,7 +489,7 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans,
|
||||
if (!bch2_btree_key_cache_find(c, id, pos))
|
||||
return 0;
|
||||
|
||||
return btree_key_cache_flush_pos(trans, key, 0, true);
|
||||
return btree_key_cache_flush_pos(trans, key, 0, 0, true);
|
||||
}
|
||||
|
||||
bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
@ -517,7 +526,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
|
||||
&ck->journal, btree_key_cache_journal_flush);
|
||||
&ck->journal, bch2_btree_key_cache_journal_flush);
|
||||
|
||||
if (kick_reclaim)
|
||||
journal_reclaim_kick(&c->journal);
|
||||
@ -581,9 +590,14 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
do {
|
||||
struct rhash_head *pos, *next;
|
||||
|
||||
rht_for_each_entry_safe(ck, pos, next, tbl, bc->shrink_iter, hash) {
|
||||
pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter));
|
||||
|
||||
while (!rht_is_a_nulls(pos)) {
|
||||
next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
|
||||
ck = container_of(pos, struct bkey_cached, hash);
|
||||
|
||||
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
|
||||
continue;
|
||||
goto next;
|
||||
|
||||
if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
|
||||
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
|
||||
@ -595,6 +609,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
scanned++;
|
||||
if (scanned >= nr)
|
||||
break;
|
||||
next:
|
||||
pos = next;
|
||||
}
|
||||
|
||||
bc->shrink_iter++;
|
||||
|
@ -1,15 +1,6 @@
|
||||
#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
|
||||
#define _BCACHEFS_BTREE_KEY_CACHE_H
|
||||
|
||||
static inline size_t bch2_nr_btree_keys_want_flush(struct bch_fs *c)
|
||||
{
|
||||
size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
|
||||
size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
|
||||
size_t max_dirty = nr_keys / 4;
|
||||
|
||||
return max_t(ssize_t, 0, nr_dirty - max_dirty);
|
||||
}
|
||||
|
||||
static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
|
||||
{
|
||||
size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
|
||||
@ -29,6 +20,9 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
|
||||
test_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
|
||||
}
|
||||
|
||||
int bch2_btree_key_cache_journal_flush(struct journal *,
|
||||
struct journal_entry_pin *, u64);
|
||||
|
||||
struct bkey_cached *
|
||||
bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos);
|
||||
|
||||
|
@ -95,7 +95,7 @@ btree_lock_want(struct btree_iter *iter, int level)
|
||||
return BTREE_NODE_UNLOCKED;
|
||||
}
|
||||
|
||||
static inline void __btree_node_unlock(struct btree_iter *iter, unsigned level)
|
||||
static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
|
||||
{
|
||||
int lock_type = btree_node_locked_type(iter, level);
|
||||
|
||||
@ -106,13 +106,6 @@ static inline void __btree_node_unlock(struct btree_iter *iter, unsigned level)
|
||||
mark_btree_node_unlocked(iter, level);
|
||||
}
|
||||
|
||||
static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
|
||||
{
|
||||
EBUG_ON(!level && iter->trans->nounlock);
|
||||
|
||||
__btree_node_unlock(iter, level);
|
||||
}
|
||||
|
||||
static inline void __bch2_btree_iter_unlock(struct btree_iter *iter)
|
||||
{
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
|
||||
@ -187,27 +180,14 @@ static inline bool btree_node_lock(struct btree *b,
|
||||
unsigned long ip)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
bool ret;
|
||||
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans->locking = b;
|
||||
trans->locking_iter_idx = iter->idx;
|
||||
trans->locking_pos = pos;
|
||||
trans->locking_btree_id = iter->btree_id;
|
||||
trans->locking_level = level;
|
||||
#endif
|
||||
ret = likely(six_trylock_type(&b->c.lock, type)) ||
|
||||
return likely(six_trylock_type(&b->c.lock, type)) ||
|
||||
btree_node_lock_increment(trans, b, level, type) ||
|
||||
__bch2_btree_node_lock(b, pos, level, iter, type,
|
||||
should_sleep_fn, p, ip);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans->locking = NULL;
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool __bch2_btree_node_relock(struct btree_iter *, unsigned);
|
||||
|
@ -47,8 +47,6 @@ struct bset_tree {
|
||||
u16 data_offset;
|
||||
u16 aux_data_offset;
|
||||
u16 end_offset;
|
||||
|
||||
struct bpos max_key;
|
||||
};
|
||||
|
||||
struct btree_write {
|
||||
@ -98,6 +96,11 @@ struct btree {
|
||||
u8 byte_order;
|
||||
u8 unpack_fn_len;
|
||||
|
||||
struct btree_write writes[2];
|
||||
|
||||
/* Key/pointer for this btree node */
|
||||
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
|
||||
|
||||
/*
|
||||
* XXX: add a delete sequence number, so when bch2_btree_node_relock()
|
||||
* fails because the lock sequence number has changed - i.e. the
|
||||
@ -128,11 +131,6 @@ struct btree {
|
||||
|
||||
/* lru list */
|
||||
struct list_head list;
|
||||
|
||||
struct btree_write writes[2];
|
||||
|
||||
/* Key/pointer for this btree node */
|
||||
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
|
||||
};
|
||||
|
||||
struct btree_cache {
|
||||
@ -372,7 +370,6 @@ struct btree_trans {
|
||||
u8 nr_updates2;
|
||||
unsigned used_mempool:1;
|
||||
unsigned error:1;
|
||||
unsigned nounlock:1;
|
||||
unsigned in_traverse_all:1;
|
||||
|
||||
u64 iters_linked;
|
||||
|
@ -437,10 +437,6 @@ static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes,
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
as->prealloc_nodes[as->nr_prealloc_nodes++] = b;
|
||||
}
|
||||
|
||||
@ -458,6 +454,10 @@ static void bch2_btree_update_free(struct btree_update *as)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
|
||||
if (as->took_gc_lock)
|
||||
up_read(&c->gc_lock);
|
||||
as->took_gc_lock = false;
|
||||
|
||||
bch2_journal_preres_put(&c->journal, &as->journal_preres);
|
||||
|
||||
bch2_journal_pin_drop(&c->journal, &as->journal);
|
||||
@ -893,24 +893,33 @@ void bch2_btree_update_done(struct btree_update *as)
|
||||
{
|
||||
BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
|
||||
|
||||
if (as->took_gc_lock)
|
||||
up_read(&as->c->gc_lock);
|
||||
as->took_gc_lock = false;
|
||||
|
||||
bch2_btree_reserve_put(as);
|
||||
|
||||
continue_at(&as->cl, btree_update_set_nodes_written, system_freezable_wq);
|
||||
}
|
||||
|
||||
struct btree_update *
|
||||
bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
|
||||
unsigned nr_nodes, unsigned flags,
|
||||
struct closure *cl)
|
||||
bch2_btree_update_start(struct btree_iter *iter, unsigned level,
|
||||
unsigned nr_nodes, unsigned flags)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_update *as;
|
||||
struct closure cl;
|
||||
int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
|
||||
? BCH_DISK_RESERVATION_NOFAIL : 0;
|
||||
int journal_flags = (flags & BTREE_INSERT_JOURNAL_RESERVED)
|
||||
? JOURNAL_RES_GET_RECLAIM : 0;
|
||||
int journal_flags = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (flags & BTREE_INSERT_JOURNAL_RESERVED)
|
||||
journal_flags |= JOURNAL_RES_GET_RESERVED;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
retry:
|
||||
/*
|
||||
* This check isn't necessary for correctness - it's just to potentially
|
||||
* prevent us from doing a lot of work that'll end up being wasted:
|
||||
@ -919,12 +928,36 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
/*
|
||||
* XXX: figure out how far we might need to split,
|
||||
* instead of locking/reserving all the way to the root:
|
||||
*/
|
||||
if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
|
||||
trace_trans_restart_iter_upgrade(trans->ip);
|
||||
return ERR_PTR(-EINTR);
|
||||
}
|
||||
|
||||
if (flags & BTREE_INSERT_GC_LOCK_HELD)
|
||||
lockdep_assert_held(&c->gc_lock);
|
||||
else if (!down_read_trylock(&c->gc_lock)) {
|
||||
if (flags & BTREE_INSERT_NOUNLOCK)
|
||||
return ERR_PTR(-EINTR);
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
down_read(&c->gc_lock);
|
||||
if (!bch2_trans_relock(trans)) {
|
||||
up_read(&c->gc_lock);
|
||||
return ERR_PTR(-EINTR);
|
||||
}
|
||||
}
|
||||
|
||||
as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO);
|
||||
memset(as, 0, sizeof(*as));
|
||||
closure_init(&as->cl, NULL);
|
||||
as->c = c;
|
||||
as->mode = BTREE_INTERIOR_NO_UPDATE;
|
||||
as->btree_id = id;
|
||||
as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD);
|
||||
as->btree_id = iter->btree_id;
|
||||
INIT_LIST_HEAD(&as->list);
|
||||
INIT_LIST_HEAD(&as->unwritten_list);
|
||||
INIT_LIST_HEAD(&as->write_blocked_list);
|
||||
@ -936,16 +969,25 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
|
||||
BTREE_UPDATE_JOURNAL_RES,
|
||||
journal_flags|JOURNAL_RES_GET_NONBLOCK);
|
||||
if (ret == -EAGAIN) {
|
||||
if (flags & BTREE_INSERT_NOUNLOCK)
|
||||
return ERR_PTR(-EINTR);
|
||||
/*
|
||||
* this would be cleaner if bch2_journal_preres_get() took a
|
||||
* closure argument
|
||||
*/
|
||||
if (flags & BTREE_INSERT_NOUNLOCK) {
|
||||
ret = -EINTR;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
|
||||
goto err;
|
||||
|
||||
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
|
||||
BTREE_UPDATE_JOURNAL_RES,
|
||||
journal_flags);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
goto err;
|
||||
|
||||
if (!bch2_trans_relock(trans)) {
|
||||
ret = -EINTR;
|
||||
@ -960,7 +1002,8 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_btree_reserve_get(as, nr_nodes, flags, cl);
|
||||
ret = bch2_btree_reserve_get(as, nr_nodes, flags,
|
||||
!(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -975,6 +1018,18 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
|
||||
return as;
|
||||
err:
|
||||
bch2_btree_update_free(as);
|
||||
|
||||
if (ret == -EAGAIN) {
|
||||
BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
closure_sync(&cl);
|
||||
ret = -EINTR;
|
||||
}
|
||||
|
||||
if (ret == -EINTR && bch2_trans_relock(trans))
|
||||
goto retry;
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
@ -1419,6 +1474,7 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
|
||||
int old_live_u64s = b->nr.live_u64s;
|
||||
int live_u64s_added, u64s_added;
|
||||
|
||||
lockdep_assert_held(&c->gc_lock);
|
||||
BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
|
||||
BUG_ON(!b->c.level);
|
||||
BUG_ON(!as || as->b);
|
||||
@ -1450,14 +1506,6 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
|
||||
bch2_btree_node_unlock_write(b, iter);
|
||||
|
||||
btree_node_interior_verify(c, b);
|
||||
|
||||
/*
|
||||
* when called from the btree_split path the new nodes aren't added to
|
||||
* the btree iterator yet, so the merge path's unlock/wait/relock dance
|
||||
* won't work:
|
||||
*/
|
||||
bch2_foreground_maybe_merge(c, iter, b->c.level,
|
||||
flags|BTREE_INSERT_NOUNLOCK);
|
||||
return;
|
||||
split:
|
||||
btree_split(as, b, iter, keys, flags);
|
||||
@ -1466,109 +1514,73 @@ split:
|
||||
int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
|
||||
unsigned flags)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct btree *b = iter_l(iter)->b;
|
||||
struct btree_update *as;
|
||||
struct closure cl;
|
||||
unsigned l;
|
||||
int ret = 0;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
/* Hack, because gc and splitting nodes doesn't mix yet: */
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
|
||||
!down_read_trylock(&c->gc_lock)) {
|
||||
if (flags & BTREE_INSERT_NOUNLOCK) {
|
||||
trace_transaction_restart_ip(trans->ip, _THIS_IP_);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
if (!bch2_trans_relock(trans))
|
||||
ret = -EINTR;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: figure out how far we might need to split,
|
||||
* instead of locking/reserving all the way to the root:
|
||||
*/
|
||||
if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
|
||||
trace_trans_restart_iter_upgrade(trans->ip);
|
||||
ret = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
as = bch2_btree_update_start(trans, iter->btree_id,
|
||||
btree_update_reserve_required(c, b), flags,
|
||||
!(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
|
||||
if (IS_ERR(as)) {
|
||||
ret = PTR_ERR(as);
|
||||
if (ret == -EAGAIN) {
|
||||
BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
|
||||
bch2_trans_unlock(trans);
|
||||
ret = -EINTR;
|
||||
|
||||
trace_transaction_restart_ip(trans->ip, _THIS_IP_);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
as = bch2_btree_update_start(iter, iter->level,
|
||||
btree_update_reserve_required(c, b), flags);
|
||||
if (IS_ERR(as))
|
||||
return PTR_ERR(as);
|
||||
|
||||
btree_split(as, b, iter, NULL, flags);
|
||||
bch2_btree_update_done(as);
|
||||
|
||||
/*
|
||||
* We haven't successfully inserted yet, so don't downgrade all the way
|
||||
* back to read locks;
|
||||
*/
|
||||
__bch2_btree_iter_downgrade(iter, 1);
|
||||
out:
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
|
||||
up_read(&c->gc_lock);
|
||||
closure_sync(&cl);
|
||||
for (l = iter->level + 1; btree_iter_node(iter, l) && !ret; l++)
|
||||
ret = bch2_foreground_maybe_merge(c, iter, l, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __bch2_foreground_maybe_merge(struct bch_fs *c,
|
||||
struct btree_iter *iter,
|
||||
unsigned level,
|
||||
unsigned flags,
|
||||
enum btree_node_sibling sib)
|
||||
int __bch2_foreground_maybe_merge(struct bch_fs *c,
|
||||
struct btree_iter *iter,
|
||||
unsigned level,
|
||||
unsigned flags,
|
||||
enum btree_node_sibling sib)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct btree_iter *sib_iter = NULL;
|
||||
struct btree_update *as;
|
||||
struct bkey_format_state new_s;
|
||||
struct bkey_format new_f;
|
||||
struct bkey_i delete;
|
||||
struct btree *b, *m, *n, *prev, *next, *parent;
|
||||
struct closure cl;
|
||||
struct bpos sib_pos;
|
||||
size_t sib_u64s;
|
||||
int ret = 0;
|
||||
int ret = 0, ret2 = 0;
|
||||
|
||||
BUG_ON(!btree_node_locked(iter, level));
|
||||
|
||||
closure_init_stack(&cl);
|
||||
retry:
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
BUG_ON(!btree_node_locked(iter, level));
|
||||
|
||||
b = iter->l[level].b;
|
||||
|
||||
parent = btree_node_parent(iter, b);
|
||||
if (!parent)
|
||||
if ((sib == btree_prev_sib && !bpos_cmp(b->data->min_key, POS_MIN)) ||
|
||||
(sib == btree_next_sib && !bpos_cmp(b->data->max_key, POS_MAX))) {
|
||||
b->sib_u64s[sib] = U16_MAX;
|
||||
goto out;
|
||||
|
||||
if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c))
|
||||
goto out;
|
||||
|
||||
/* XXX: can't be holding read locks */
|
||||
m = bch2_btree_node_get_sibling(c, iter, b, sib);
|
||||
if (IS_ERR(m)) {
|
||||
ret = PTR_ERR(m);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* NULL means no sibling: */
|
||||
if (!m) {
|
||||
sib_pos = sib == btree_prev_sib
|
||||
? bpos_predecessor(b->data->min_key)
|
||||
: bpos_successor(b->data->max_key);
|
||||
|
||||
sib_iter = bch2_trans_get_node_iter(trans, iter->btree_id,
|
||||
sib_pos, U8_MAX, level,
|
||||
BTREE_ITER_INTENT);
|
||||
ret = bch2_btree_iter_traverse(sib_iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
m = sib_iter->l[level].b;
|
||||
|
||||
if (btree_node_parent(iter, b) !=
|
||||
btree_node_parent(sib_iter, m)) {
|
||||
b->sib_u64s[sib] = U16_MAX;
|
||||
goto out;
|
||||
}
|
||||
@ -1581,6 +1593,8 @@ retry:
|
||||
next = m;
|
||||
}
|
||||
|
||||
BUG_ON(bkey_cmp(bpos_successor(prev->data->max_key), next->data->min_key));
|
||||
|
||||
bch2_bkey_format_init(&new_s);
|
||||
bch2_bkey_format_add_pos(&new_s, prev->data->min_key);
|
||||
__bch2_btree_calc_format(&new_s, prev);
|
||||
@ -1598,33 +1612,21 @@ retry:
|
||||
}
|
||||
|
||||
sib_u64s = min(sib_u64s, btree_max_u64s(c));
|
||||
sib_u64s = min(sib_u64s, (size_t) U16_MAX - 1);
|
||||
b->sib_u64s[sib] = sib_u64s;
|
||||
|
||||
if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) {
|
||||
six_unlock_intent(&m->c.lock);
|
||||
if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* We're changing btree topology, doesn't mix with gc: */
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
|
||||
!down_read_trylock(&c->gc_lock))
|
||||
goto err_cycle_gc_lock;
|
||||
|
||||
if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
|
||||
ret = -EINTR;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
as = bch2_btree_update_start(trans, iter->btree_id,
|
||||
parent = btree_node_parent(iter, b);
|
||||
as = bch2_btree_update_start(iter, level,
|
||||
btree_update_reserve_required(c, parent) + 1,
|
||||
flags|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE,
|
||||
!(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
|
||||
if (IS_ERR(as)) {
|
||||
ret = PTR_ERR(as);
|
||||
goto err_unlock;
|
||||
}
|
||||
BTREE_INSERT_USE_RESERVE);
|
||||
ret = PTR_ERR_OR_ZERO(as);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
trace_btree_merge(c, b);
|
||||
|
||||
@ -1658,6 +1660,7 @@ retry:
|
||||
bch2_btree_update_get_open_buckets(as, n);
|
||||
|
||||
six_lock_increment(&b->c.lock, SIX_LOCK_intent);
|
||||
six_lock_increment(&m->c.lock, SIX_LOCK_intent);
|
||||
bch2_btree_iter_node_drop(iter, b);
|
||||
bch2_btree_iter_node_drop(iter, m);
|
||||
|
||||
@ -1671,11 +1674,9 @@ retry:
|
||||
six_unlock_intent(&n->c.lock);
|
||||
|
||||
bch2_btree_update_done(as);
|
||||
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
|
||||
up_read(&c->gc_lock);
|
||||
out:
|
||||
bch2_btree_trans_verify_locks(trans);
|
||||
bch2_trans_iter_free(trans, sib_iter);
|
||||
|
||||
/*
|
||||
* Don't downgrade locks here: we're called after successful insert,
|
||||
@ -1686,58 +1687,56 @@ out:
|
||||
* split path, and downgrading to read locks in there is potentially
|
||||
* confusing:
|
||||
*/
|
||||
closure_sync(&cl);
|
||||
return;
|
||||
|
||||
err_cycle_gc_lock:
|
||||
six_unlock_intent(&m->c.lock);
|
||||
|
||||
if (flags & BTREE_INSERT_NOUNLOCK)
|
||||
goto out;
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
up_read(&c->gc_lock);
|
||||
ret = -EINTR;
|
||||
goto err;
|
||||
|
||||
err_unlock:
|
||||
six_unlock_intent(&m->c.lock);
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
|
||||
up_read(&c->gc_lock);
|
||||
return ret ?: ret2;
|
||||
err:
|
||||
BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK));
|
||||
|
||||
if ((ret == -EAGAIN || ret == -EINTR) &&
|
||||
!(flags & BTREE_INSERT_NOUNLOCK)) {
|
||||
bch2_trans_unlock(trans);
|
||||
closure_sync(&cl);
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
goto out;
|
||||
bch2_trans_iter_put(trans, sib_iter);
|
||||
sib_iter = NULL;
|
||||
|
||||
if (ret == -EINTR && bch2_trans_relock(trans))
|
||||
goto retry;
|
||||
|
||||
if (ret == -EINTR && !(flags & BTREE_INSERT_NOUNLOCK)) {
|
||||
ret2 = ret;
|
||||
ret = bch2_btree_iter_traverse_all(trans);
|
||||
if (!ret)
|
||||
goto retry;
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
|
||||
struct btree *b, unsigned flags,
|
||||
struct closure *cl)
|
||||
/**
|
||||
* bch_btree_node_rewrite - Rewrite/move a btree node
|
||||
*/
|
||||
int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
|
||||
__le64 seq, unsigned flags)
|
||||
{
|
||||
struct btree *n, *parent = btree_node_parent(iter, b);
|
||||
struct btree *b, *n, *parent;
|
||||
struct btree_update *as;
|
||||
int ret;
|
||||
|
||||
as = bch2_btree_update_start(iter->trans, iter->btree_id,
|
||||
flags |= BTREE_INSERT_NOFAIL;
|
||||
retry:
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
b = bch2_btree_iter_peek_node(iter);
|
||||
if (!b || b->data->keys.seq != seq)
|
||||
goto out;
|
||||
|
||||
parent = btree_node_parent(iter, b);
|
||||
as = bch2_btree_update_start(iter, b->c.level,
|
||||
(parent
|
||||
? btree_update_reserve_required(c, parent)
|
||||
: 0) + 1,
|
||||
flags, cl);
|
||||
if (IS_ERR(as)) {
|
||||
flags);
|
||||
ret = PTR_ERR_OR_ZERO(as);
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
if (ret) {
|
||||
trace_btree_gc_rewrite_node_fail(c, b);
|
||||
return PTR_ERR(as);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
@ -1768,60 +1767,8 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
|
||||
six_unlock_intent(&n->c.lock);
|
||||
|
||||
bch2_btree_update_done(as);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch_btree_node_rewrite - Rewrite/move a btree node
|
||||
*
|
||||
* Returns 0 on success, -EINTR or -EAGAIN on failure (i.e.
|
||||
* btree_check_reserve() has to wait)
|
||||
*/
|
||||
int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
|
||||
__le64 seq, unsigned flags)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct closure cl;
|
||||
struct btree *b;
|
||||
int ret;
|
||||
|
||||
flags |= BTREE_INSERT_NOFAIL;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
bch2_btree_iter_upgrade(iter, U8_MAX);
|
||||
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) {
|
||||
if (!down_read_trylock(&c->gc_lock)) {
|
||||
bch2_trans_unlock(trans);
|
||||
down_read(&c->gc_lock);
|
||||
}
|
||||
}
|
||||
|
||||
while (1) {
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
b = bch2_btree_iter_peek_node(iter);
|
||||
if (!b || b->data->keys.seq != seq)
|
||||
break;
|
||||
|
||||
ret = __btree_node_rewrite(c, iter, b, flags, &cl);
|
||||
if (ret != -EAGAIN &&
|
||||
ret != -EINTR)
|
||||
break;
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
closure_sync(&cl);
|
||||
}
|
||||
|
||||
out:
|
||||
bch2_btree_iter_downgrade(iter);
|
||||
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
closure_sync(&cl);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1892,71 +1839,34 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
|
||||
struct btree_update *as = NULL;
|
||||
struct btree *new_hash = NULL;
|
||||
struct closure cl;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
if (!bch2_btree_iter_upgrade(iter, U8_MAX))
|
||||
return -EINTR;
|
||||
|
||||
if (!down_read_trylock(&c->gc_lock)) {
|
||||
bch2_trans_unlock(iter->trans);
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
if (!bch2_trans_relock(iter->trans)) {
|
||||
ret = -EINTR;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* check btree_ptr_hash_val() after @b is locked by
|
||||
* btree_iter_traverse():
|
||||
*/
|
||||
if (btree_ptr_hash_val(new_key) != b->hash_val) {
|
||||
/* bch2_btree_reserve_get will unlock */
|
||||
ret = bch2_btree_cache_cannibalize_lock(c, &cl);
|
||||
if (ret) {
|
||||
bch2_trans_unlock(iter->trans);
|
||||
up_read(&c->gc_lock);
|
||||
closure_sync(&cl);
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
if (!bch2_trans_relock(iter->trans)) {
|
||||
ret = -EINTR;
|
||||
goto err;
|
||||
}
|
||||
if (!bch2_trans_relock(iter->trans))
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
new_hash = bch2_btree_node_mem_alloc(c);
|
||||
}
|
||||
retry:
|
||||
as = bch2_btree_update_start(iter->trans, iter->btree_id,
|
||||
parent ? btree_update_reserve_required(c, parent) : 0,
|
||||
BTREE_INSERT_NOFAIL, &cl);
|
||||
|
||||
as = bch2_btree_update_start(iter, b->c.level,
|
||||
parent ? btree_update_reserve_required(c, parent) : 0,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
if (IS_ERR(as)) {
|
||||
ret = PTR_ERR(as);
|
||||
if (ret == -EAGAIN)
|
||||
ret = -EINTR;
|
||||
|
||||
if (ret == -EINTR) {
|
||||
bch2_trans_unlock(iter->trans);
|
||||
up_read(&c->gc_lock);
|
||||
closure_sync(&cl);
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
if (bch2_trans_relock(iter->trans))
|
||||
goto retry;
|
||||
}
|
||||
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(new_key));
|
||||
if (ret)
|
||||
goto err_free_update;
|
||||
|
||||
__bch2_btree_node_update_key(c, as, iter, b, new_hash, new_key);
|
||||
|
||||
bch2_btree_iter_downgrade(iter);
|
||||
@ -1969,12 +1879,9 @@ err:
|
||||
six_unlock_write(&new_hash->c.lock);
|
||||
six_unlock_intent(&new_hash->c.lock);
|
||||
}
|
||||
up_read(&c->gc_lock);
|
||||
closure_sync(&cl);
|
||||
bch2_btree_cache_cannibalize_unlock(c);
|
||||
return ret;
|
||||
err_free_update:
|
||||
bch2_btree_update_free(as);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Init code: */
|
||||
|
@ -48,6 +48,7 @@ struct btree_update {
|
||||
} mode;
|
||||
|
||||
unsigned nodes_written:1;
|
||||
unsigned took_gc_lock:1;
|
||||
|
||||
enum btree_id btree_id;
|
||||
|
||||
@ -120,8 +121,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
|
||||
|
||||
void bch2_btree_update_done(struct btree_update *);
|
||||
struct btree_update *
|
||||
bch2_btree_update_start(struct btree_trans *, enum btree_id, unsigned,
|
||||
unsigned, struct closure *);
|
||||
bch2_btree_update_start(struct btree_iter *, unsigned, unsigned, unsigned);
|
||||
|
||||
void bch2_btree_interior_update_will_free_node(struct btree_update *,
|
||||
struct btree *);
|
||||
@ -132,10 +132,10 @@ void bch2_btree_insert_node(struct btree_update *, struct btree *,
|
||||
unsigned);
|
||||
int bch2_btree_split_leaf(struct bch_fs *, struct btree_iter *, unsigned);
|
||||
|
||||
void __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *,
|
||||
unsigned, unsigned, enum btree_node_sibling);
|
||||
int __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *,
|
||||
unsigned, unsigned, enum btree_node_sibling);
|
||||
|
||||
static inline void bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
|
||||
static inline int bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
|
||||
struct btree_iter *iter,
|
||||
unsigned level, unsigned flags,
|
||||
enum btree_node_sibling sib)
|
||||
@ -143,27 +143,27 @@ static inline void bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
|
||||
struct btree *b;
|
||||
|
||||
if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
if (!bch2_btree_node_relock(iter, level))
|
||||
return;
|
||||
return 0;
|
||||
|
||||
b = iter->l[level].b;
|
||||
if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
__bch2_foreground_maybe_merge(c, iter, level, flags, sib);
|
||||
return __bch2_foreground_maybe_merge(c, iter, level, flags, sib);
|
||||
}
|
||||
|
||||
static inline void bch2_foreground_maybe_merge(struct bch_fs *c,
|
||||
static inline int bch2_foreground_maybe_merge(struct bch_fs *c,
|
||||
struct btree_iter *iter,
|
||||
unsigned level,
|
||||
unsigned flags)
|
||||
{
|
||||
bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
|
||||
btree_prev_sib);
|
||||
bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
|
||||
btree_next_sib);
|
||||
return bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
|
||||
btree_prev_sib) ?:
|
||||
bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
|
||||
btree_next_sib);
|
||||
}
|
||||
|
||||
void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
|
||||
|
@ -134,7 +134,7 @@ fix_iter:
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
unsigned i, u64 seq)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
@ -145,14 +145,15 @@ static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
bch2_btree_node_write_cond(c, b,
|
||||
(btree_current_write(b) == w && w->journal.seq == seq));
|
||||
six_unlock_read(&b->c.lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||
static int btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||
{
|
||||
return __btree_node_flush(j, pin, 0, seq);
|
||||
}
|
||||
|
||||
static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||
static int btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||
{
|
||||
return __btree_node_flush(j, pin, 1, seq);
|
||||
}
|
||||
@ -375,7 +376,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
struct btree_insert_entry **stopped_at)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_fs_usage *fs_usage = NULL;
|
||||
struct btree_insert_entry *i;
|
||||
struct btree_trans_commit_hook *h;
|
||||
unsigned u64s = 0;
|
||||
@ -423,7 +423,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
|
||||
if (marking) {
|
||||
percpu_down_read(&c->mark_lock);
|
||||
fs_usage = bch2_fs_usage_scratch_get(c);
|
||||
}
|
||||
|
||||
/* Must be called under mark_lock: */
|
||||
if (marking && trans->fs_usage_deltas &&
|
||||
!bch2_replicas_delta_list_marked(c, trans->fs_usage_deltas)) {
|
||||
ret = BTREE_INSERT_NEED_MARK_REPLICAS;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -462,21 +468,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
i->k->k.version = MAX_VERSION;
|
||||
}
|
||||
|
||||
/* Must be called under mark_lock: */
|
||||
if (marking && trans->fs_usage_deltas &&
|
||||
bch2_replicas_delta_list_apply(c, fs_usage,
|
||||
trans->fs_usage_deltas)) {
|
||||
ret = BTREE_INSERT_NEED_MARK_REPLICAS;
|
||||
goto err;
|
||||
}
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type))
|
||||
bch2_mark_update(trans, i->iter, i->k,
|
||||
fs_usage, i->trigger_flags);
|
||||
NULL, i->trigger_flags);
|
||||
|
||||
if (marking)
|
||||
bch2_trans_fs_usage_apply(trans, fs_usage);
|
||||
if (marking && trans->fs_usage_deltas)
|
||||
bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas);
|
||||
|
||||
if (unlikely(c->gc_pos.phase))
|
||||
bch2_trans_mark_gc(trans);
|
||||
@ -485,31 +483,85 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
do_btree_insert_one(trans, i->iter, i->k);
|
||||
err:
|
||||
if (marking) {
|
||||
bch2_fs_usage_scratch_put(c, fs_usage);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
struct btree *b = iter_l(iter)->b;
|
||||
struct bkey_s_c old;
|
||||
int u64s_delta = 0;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Inserting directly into interior nodes is an uncommon operation with
|
||||
* various weird edge cases: also, a lot of things about
|
||||
* BTREE_ITER_NODES iters need to be audited
|
||||
*/
|
||||
if (unlikely(btree_iter_type(iter) != BTREE_ITER_KEYS))
|
||||
return 0;
|
||||
|
||||
BUG_ON(iter->level);
|
||||
|
||||
trans_for_each_update2(trans, i) {
|
||||
if (iter_l(i->iter)->b != b)
|
||||
continue;
|
||||
|
||||
old = bch2_btree_iter_peek_slot(i->iter);
|
||||
ret = bkey_err(old);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
u64s_delta += !bkey_deleted(&i->k->k) ? i->k->k.u64s : 0;
|
||||
u64s_delta -= !bkey_deleted(old.k) ? old.k->u64s : 0;
|
||||
}
|
||||
|
||||
return u64s_delta <= 0
|
||||
? (bch2_foreground_maybe_merge(trans->c, iter, iter->level,
|
||||
trans->flags & ~BTREE_INSERT_NOUNLOCK) ?: -EINTR)
|
||||
: 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get journal reservation, take write locks, and attempt to do btree update(s):
|
||||
*/
|
||||
static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
struct btree_insert_entry **stopped_at)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_insert_entry *i;
|
||||
struct btree_iter *iter;
|
||||
int ret;
|
||||
|
||||
trans_for_each_update2(trans, i)
|
||||
BUG_ON(!btree_node_intent_locked(i->iter, i->iter->level));
|
||||
trans_for_each_update2(trans, i) {
|
||||
struct btree *b;
|
||||
|
||||
ret = bch2_journal_preres_get(&trans->c->journal,
|
||||
BUG_ON(!btree_node_intent_locked(i->iter, i->level));
|
||||
|
||||
if (btree_iter_type(i->iter) == BTREE_ITER_CACHED)
|
||||
continue;
|
||||
|
||||
b = iter_l(i->iter)->b;
|
||||
if (b->sib_u64s[0] < c->btree_foreground_merge_threshold ||
|
||||
b->sib_u64s[1] < c->btree_foreground_merge_threshold) {
|
||||
ret = maybe_do_btree_merge(trans, i->iter);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
trans_for_each_update2(trans, i)
|
||||
BUG_ON(!btree_node_intent_locked(i->iter, i->level));
|
||||
|
||||
ret = bch2_journal_preres_get(&c->journal,
|
||||
&trans->journal_preres, trans->journal_preres_u64s,
|
||||
JOURNAL_RES_GET_NONBLOCK|
|
||||
((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM)
|
||||
? JOURNAL_RES_GET_RECLAIM : 0));
|
||||
((trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
|
||||
? JOURNAL_RES_GET_RESERVED : 0));
|
||||
if (unlikely(ret == -EAGAIN))
|
||||
ret = bch2_trans_journal_preres_get_cold(trans,
|
||||
trans->journal_preres_u64s);
|
||||
@ -547,7 +599,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
|
||||
trans_for_each_update2(trans, i)
|
||||
if (!same_leaf_as_prev(trans, i))
|
||||
bch2_btree_node_lock_for_insert(trans->c,
|
||||
bch2_btree_node_lock_for_insert(c,
|
||||
iter_l(i->iter)->b, i->iter);
|
||||
|
||||
ret = bch2_trans_commit_write_locked(trans, stopped_at);
|
||||
@ -558,35 +610,45 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
i->iter);
|
||||
|
||||
if (!ret && trans->journal_pin)
|
||||
bch2_journal_pin_add(&trans->c->journal, trans->journal_res.seq,
|
||||
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
|
||||
trans->journal_pin, NULL);
|
||||
|
||||
/*
|
||||
* Drop journal reservation after dropping write locks, since dropping
|
||||
* the journal reservation may kick off a journal write:
|
||||
*/
|
||||
bch2_journal_res_put(&trans->c->journal, &trans->journal_res);
|
||||
bch2_journal_res_put(&c->journal, &trans->journal_res);
|
||||
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
if (trans->flags & BTREE_INSERT_NOUNLOCK)
|
||||
trans->nounlock = true;
|
||||
|
||||
if (!(trans->flags & BTREE_INSERT_NOUNLOCK))
|
||||
trans_for_each_update2(trans, i)
|
||||
if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
|
||||
!same_leaf_as_prev(trans, i))
|
||||
bch2_foreground_maybe_merge(trans->c, i->iter,
|
||||
0, trans->flags);
|
||||
|
||||
trans->nounlock = false;
|
||||
|
||||
bch2_trans_downgrade(trans);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int journal_reclaim_wait_done(struct bch_fs *c)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = bch2_journal_error(&c->journal);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = !bch2_btree_key_cache_must_wait(c);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (mutex_trylock(&c->journal.reclaim_lock)) {
|
||||
ret = bch2_journal_reclaim(&c->journal);
|
||||
mutex_unlock(&c->journal.reclaim_lock);
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
ret = !bch2_btree_key_cache_must_wait(c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline
|
||||
int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
struct btree_insert_entry *i,
|
||||
@ -641,11 +703,9 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
case BTREE_INSERT_NEED_MARK_REPLICAS:
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k));
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
ret = bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (bch2_trans_relock(trans))
|
||||
return 0;
|
||||
@ -656,6 +716,10 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
case BTREE_INSERT_NEED_JOURNAL_RES:
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
|
||||
!(trans->flags & BTREE_INSERT_JOURNAL_RESERVED))
|
||||
return -EAGAIN;
|
||||
|
||||
ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -669,11 +733,8 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
case BTREE_INSERT_NEED_JOURNAL_RECLAIM:
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
do {
|
||||
mutex_lock(&c->journal.reclaim_lock);
|
||||
ret = bch2_journal_reclaim(&c->journal);
|
||||
mutex_unlock(&c->journal.reclaim_lock);
|
||||
} while (!ret && bch2_btree_key_cache_must_wait(c));
|
||||
wait_event(c->journal.reclaim_wait,
|
||||
(ret = journal_reclaim_wait_done(c)));
|
||||
|
||||
if (!ret && bch2_trans_relock(trans))
|
||||
return 0;
|
||||
@ -920,17 +981,14 @@ int __bch2_trans_commit(struct btree_trans *trans)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're not using bch2_btree_iter_upgrade here because
|
||||
* we know trans->nounlock can't be set:
|
||||
*/
|
||||
if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) &&
|
||||
!__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) {
|
||||
if (unlikely(!bch2_btree_iter_upgrade(i->iter, i->level + 1))) {
|
||||
trace_trans_restart_upgrade(trans->ip);
|
||||
ret = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
BUG_ON(!btree_node_intent_locked(i->iter, i->level));
|
||||
|
||||
u64s = jset_u64s(i->k->k.u64s);
|
||||
if (btree_iter_type(i->iter) == BTREE_ITER_CACHED &&
|
||||
likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)))
|
||||
|
@ -167,37 +167,6 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
|
||||
percpu_up_write(&c->mark_lock);
|
||||
}
|
||||
|
||||
void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage)
|
||||
{
|
||||
if (fs_usage == c->usage_scratch)
|
||||
mutex_unlock(&c->usage_scratch_lock);
|
||||
else
|
||||
kfree(fs_usage);
|
||||
}
|
||||
|
||||
struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage *ret;
|
||||
unsigned bytes = fs_usage_u64s(c) * sizeof(u64);
|
||||
|
||||
ret = kzalloc(bytes, GFP_NOWAIT|__GFP_NOWARN);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (mutex_trylock(&c->usage_scratch_lock))
|
||||
goto out_pool;
|
||||
|
||||
ret = kzalloc(bytes, GFP_NOFS);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&c->usage_scratch_lock);
|
||||
out_pool:
|
||||
ret = c->usage_scratch;
|
||||
memset(ret, 0, bytes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct bch_dev_usage *dev_usage_ptr(struct bch_dev *ca,
|
||||
unsigned journal_seq,
|
||||
bool gc)
|
||||
@ -252,30 +221,28 @@ u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
|
||||
struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage *ret;
|
||||
unsigned seq, i, v, u64s = fs_usage_u64s(c);
|
||||
retry:
|
||||
ret = kmalloc(u64s * sizeof(u64), GFP_NOFS);
|
||||
if (unlikely(!ret))
|
||||
return NULL;
|
||||
struct bch_fs_usage_online *ret;
|
||||
unsigned seq, i, u64s;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
|
||||
v = fs_usage_u64s(c);
|
||||
if (unlikely(u64s != v)) {
|
||||
u64s = v;
|
||||
ret = kmalloc(sizeof(struct bch_fs_usage_online) +
|
||||
sizeof(u64) + c->replicas.nr, GFP_NOFS);
|
||||
if (unlikely(!ret)) {
|
||||
percpu_up_read(&c->mark_lock);
|
||||
kfree(ret);
|
||||
goto retry;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ret->online_reserved = percpu_u64_get(c->online_reserved);
|
||||
|
||||
u64s = fs_usage_u64s(c);
|
||||
do {
|
||||
seq = read_seqcount_begin(&c->usage_lock);
|
||||
memcpy(ret, c->usage_base, u64s * sizeof(u64));
|
||||
memcpy(&ret->u, c->usage_base, u64s * sizeof(u64));
|
||||
for (i = 0; i < ARRAY_SIZE(c->usage); i++)
|
||||
acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[i], u64s);
|
||||
acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i], u64s);
|
||||
} while (read_seqcount_retry(&c->usage_lock, seq));
|
||||
|
||||
return ret;
|
||||
@ -311,31 +278,31 @@ void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
|
||||
|
||||
void bch2_fs_usage_to_text(struct printbuf *out,
|
||||
struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage)
|
||||
struct bch_fs_usage_online *fs_usage)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity);
|
||||
|
||||
pr_buf(out, "hidden:\t\t\t\t%llu\n",
|
||||
fs_usage->hidden);
|
||||
fs_usage->u.hidden);
|
||||
pr_buf(out, "data:\t\t\t\t%llu\n",
|
||||
fs_usage->data);
|
||||
fs_usage->u.data);
|
||||
pr_buf(out, "cached:\t\t\t\t%llu\n",
|
||||
fs_usage->cached);
|
||||
fs_usage->u.cached);
|
||||
pr_buf(out, "reserved:\t\t\t%llu\n",
|
||||
fs_usage->reserved);
|
||||
fs_usage->u.reserved);
|
||||
pr_buf(out, "nr_inodes:\t\t\t%llu\n",
|
||||
fs_usage->nr_inodes);
|
||||
fs_usage->u.nr_inodes);
|
||||
pr_buf(out, "online reserved:\t\t%llu\n",
|
||||
fs_usage->online_reserved);
|
||||
|
||||
for (i = 0;
|
||||
i < ARRAY_SIZE(fs_usage->persistent_reserved);
|
||||
i < ARRAY_SIZE(fs_usage->u.persistent_reserved);
|
||||
i++) {
|
||||
pr_buf(out, "%u replicas:\n", i + 1);
|
||||
pr_buf(out, "\treserved:\t\t%llu\n",
|
||||
fs_usage->persistent_reserved[i]);
|
||||
fs_usage->u.persistent_reserved[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < c->replicas.nr; i++) {
|
||||
@ -344,7 +311,7 @@ void bch2_fs_usage_to_text(struct printbuf *out,
|
||||
|
||||
pr_buf(out, "\t");
|
||||
bch2_replicas_entry_to_text(out, e);
|
||||
pr_buf(out, ":\t%llu\n", fs_usage->replicas[i]);
|
||||
pr_buf(out, ":\t%llu\n", fs_usage->u.replicas[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -360,12 +327,12 @@ static u64 avail_factor(u64 r)
|
||||
return div_u64(r << RESERVE_FACTOR, (1 << RESERVE_FACTOR) + 1);
|
||||
}
|
||||
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage_online *fs_usage)
|
||||
{
|
||||
return min(fs_usage->hidden +
|
||||
fs_usage->btree +
|
||||
fs_usage->data +
|
||||
reserve_factor(fs_usage->reserved +
|
||||
return min(fs_usage->u.hidden +
|
||||
fs_usage->u.btree +
|
||||
fs_usage->u.data +
|
||||
reserve_factor(fs_usage->u.reserved +
|
||||
fs_usage->online_reserved),
|
||||
c->capacity);
|
||||
}
|
||||
@ -382,7 +349,7 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
|
||||
data = bch2_fs_usage_read_one(c, &c->usage_base->data) +
|
||||
bch2_fs_usage_read_one(c, &c->usage_base->btree);
|
||||
reserved = bch2_fs_usage_read_one(c, &c->usage_base->reserved) +
|
||||
bch2_fs_usage_read_one(c, &c->usage_base->online_reserved);
|
||||
percpu_u64_get(c->online_reserved);
|
||||
|
||||
ret.used = min(ret.capacity, data + reserve_factor(reserved));
|
||||
ret.free = ret.capacity - ret.used;
|
||||
@ -436,43 +403,6 @@ static bool bucket_became_unavailable(struct bucket_mark old,
|
||||
!is_available_bucket(new);
|
||||
}
|
||||
|
||||
int bch2_fs_usage_apply(struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
struct disk_reservation *disk_res,
|
||||
unsigned journal_seq)
|
||||
{
|
||||
s64 added = fs_usage->data + fs_usage->reserved;
|
||||
s64 should_not_have_added;
|
||||
int ret = 0;
|
||||
|
||||
percpu_rwsem_assert_held(&c->mark_lock);
|
||||
|
||||
/*
|
||||
* Not allowed to reduce sectors_available except by getting a
|
||||
* reservation:
|
||||
*/
|
||||
should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0);
|
||||
if (WARN_ONCE(should_not_have_added > 0,
|
||||
"disk usage increased by %lli more than reservation of %llu",
|
||||
added, disk_res ? disk_res->sectors : 0)) {
|
||||
atomic64_sub(should_not_have_added, &c->sectors_available);
|
||||
added -= should_not_have_added;
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
if (added > 0) {
|
||||
disk_res->sectors -= added;
|
||||
fs_usage->online_reserved -= added;
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
acc_u64s((u64 *) fs_usage_ptr(c, journal_seq, false),
|
||||
(u64 *) fs_usage, fs_usage_u64s(c));
|
||||
preempt_enable();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void account_bucket(struct bch_fs_usage *fs_usage,
|
||||
struct bch_dev_usage *dev_usage,
|
||||
enum bch_data_type type,
|
||||
@ -494,6 +424,8 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
percpu_rwsem_assert_held(&c->mark_lock);
|
||||
|
||||
preempt_disable();
|
||||
if (!fs_usage)
|
||||
fs_usage = fs_usage_ptr(c, journal_seq, gc);
|
||||
u = dev_usage_ptr(ca, journal_seq, gc);
|
||||
|
||||
if (bucket_type(old))
|
||||
@ -504,8 +436,6 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
account_bucket(fs_usage, u, bucket_type(new),
|
||||
1, ca->mi.bucket_size);
|
||||
|
||||
u->buckets_alloc +=
|
||||
(int) new.owned_by_allocator - (int) old.owned_by_allocator;
|
||||
u->buckets_ec += (int) new.stripe - (int) old.stripe;
|
||||
u->buckets_unavailable +=
|
||||
is_unavailable_bucket(new) - is_unavailable_bucket(old);
|
||||
@ -524,22 +454,17 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
bch2_wake_allocator(ca);
|
||||
}
|
||||
|
||||
static inline int update_replicas(struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
struct bch_replicas_entry *r,
|
||||
s64 sectors)
|
||||
static inline void update_replicas(struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
struct bch_replicas_entry *r,
|
||||
s64 sectors)
|
||||
{
|
||||
int idx = bch2_replicas_entry_idx(c, r);
|
||||
|
||||
if (idx < 0)
|
||||
return -1;
|
||||
|
||||
if (!fs_usage)
|
||||
return 0;
|
||||
BUG_ON(idx < 0);
|
||||
|
||||
fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
|
||||
fs_usage->replicas[idx] += sectors;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void update_cached_sectors(struct bch_fs *c,
|
||||
@ -586,6 +511,7 @@ static inline void update_replicas_list(struct btree_trans *trans,
|
||||
n = (void *) d->d + d->used;
|
||||
n->delta = sectors;
|
||||
memcpy(&n->r, r, replicas_entry_bytes(r));
|
||||
bch2_replicas_entry_sort(&n->r);
|
||||
d->used += b;
|
||||
}
|
||||
|
||||
@ -599,43 +525,6 @@ static inline void update_cached_sectors_list(struct btree_trans *trans,
|
||||
update_replicas_list(trans, &r.e, sectors);
|
||||
}
|
||||
|
||||
static inline struct replicas_delta *
|
||||
replicas_delta_next(struct replicas_delta *d)
|
||||
{
|
||||
return (void *) d + replicas_entry_bytes(&d->r) + 8;
|
||||
}
|
||||
|
||||
int bch2_replicas_delta_list_apply(struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
struct replicas_delta_list *r)
|
||||
{
|
||||
struct replicas_delta *d = r->d;
|
||||
struct replicas_delta *top = (void *) r->d + r->used;
|
||||
unsigned i;
|
||||
|
||||
for (d = r->d; d != top; d = replicas_delta_next(d))
|
||||
if (update_replicas(c, fs_usage, &d->r, d->delta)) {
|
||||
top = d;
|
||||
goto unwind;
|
||||
}
|
||||
|
||||
if (!fs_usage)
|
||||
return 0;
|
||||
|
||||
fs_usage->nr_inodes += r->nr_inodes;
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
|
||||
fs_usage->reserved += r->persistent_reserved[i];
|
||||
fs_usage->persistent_reserved[i] += r->persistent_reserved[i];
|
||||
}
|
||||
|
||||
return 0;
|
||||
unwind:
|
||||
for (d = r->d; d != top; d = replicas_delta_next(d))
|
||||
update_replicas(c, fs_usage, &d->r, -d->delta);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define do_mark_fn(fn, c, pos, flags, ...) \
|
||||
({ \
|
||||
int gc, ret = 0; \
|
||||
@ -653,7 +542,6 @@ static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, bool owned_by_allocator,
|
||||
bool gc)
|
||||
{
|
||||
struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc);
|
||||
struct bucket *g = __bucket(ca, b, gc);
|
||||
struct bucket_mark old, new;
|
||||
|
||||
@ -661,13 +549,6 @@ static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
new.owned_by_allocator = owned_by_allocator;
|
||||
}));
|
||||
|
||||
/*
|
||||
* XXX: this is wrong, this means we'll be doing updates to the percpu
|
||||
* buckets_alloc counter that don't have an open journal buffer and
|
||||
* we'll race with the machinery that accumulates that to ca->usage_base
|
||||
*/
|
||||
bch2_dev_usage_update(c, ca, fs_usage, old, new, 0, gc);
|
||||
|
||||
BUG_ON(!gc &&
|
||||
!owned_by_allocator && !old.owned_by_allocator);
|
||||
|
||||
@ -1416,22 +1297,15 @@ int bch2_mark_update(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_trans_fs_usage_apply(struct btree_trans *trans,
|
||||
struct bch_fs_usage *fs_usage)
|
||||
static noinline __cold
|
||||
void fs_usage_apply_warn(struct btree_trans *trans,
|
||||
unsigned disk_res_sectors)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_insert_entry *i;
|
||||
static int warned_disk_usage = 0;
|
||||
u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
|
||||
char buf[200];
|
||||
|
||||
if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res,
|
||||
trans->journal_res.seq) ||
|
||||
warned_disk_usage ||
|
||||
xchg(&warned_disk_usage, 1))
|
||||
return;
|
||||
|
||||
bch_err(c, "disk usage increased more than %llu sectors reserved",
|
||||
bch_err(c, "disk usage increased more than %u sectors reserved",
|
||||
disk_res_sectors);
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
@ -1466,6 +1340,65 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_trans_fs_usage_apply(struct btree_trans *trans,
|
||||
struct replicas_delta_list *deltas)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
static int warned_disk_usage = 0;
|
||||
bool warn = false;
|
||||
unsigned disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
|
||||
struct replicas_delta *d = deltas->d;
|
||||
struct replicas_delta *top = (void *) deltas->d + deltas->used;
|
||||
struct bch_fs_usage *dst;
|
||||
s64 added = 0, should_not_have_added;
|
||||
unsigned i;
|
||||
|
||||
percpu_rwsem_assert_held(&c->mark_lock);
|
||||
|
||||
preempt_disable();
|
||||
dst = fs_usage_ptr(c, trans->journal_res.seq, false);
|
||||
|
||||
for (d = deltas->d; d != top; d = replicas_delta_next(d)) {
|
||||
switch (d->r.data_type) {
|
||||
case BCH_DATA_btree:
|
||||
case BCH_DATA_user:
|
||||
case BCH_DATA_parity:
|
||||
added += d->delta;
|
||||
}
|
||||
|
||||
update_replicas(c, dst, &d->r, d->delta);
|
||||
}
|
||||
|
||||
dst->nr_inodes += deltas->nr_inodes;
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
|
||||
added += deltas->persistent_reserved[i];
|
||||
dst->reserved += deltas->persistent_reserved[i];
|
||||
dst->persistent_reserved[i] += deltas->persistent_reserved[i];
|
||||
}
|
||||
|
||||
/*
|
||||
* Not allowed to reduce sectors_available except by getting a
|
||||
* reservation:
|
||||
*/
|
||||
should_not_have_added = added - (s64) disk_res_sectors;
|
||||
if (unlikely(should_not_have_added > 0)) {
|
||||
atomic64_sub(should_not_have_added, &c->sectors_available);
|
||||
added -= should_not_have_added;
|
||||
warn = true;
|
||||
}
|
||||
|
||||
if (added > 0) {
|
||||
trans->disk_res->sectors -= added;
|
||||
this_cpu_sub(*c->online_reserved, added);
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
|
||||
if (unlikely(warn) && !xchg(&warned_disk_usage, 1))
|
||||
fs_usage_apply_warn(trans, disk_res_sectors);
|
||||
}
|
||||
|
||||
/* trans_mark: */
|
||||
|
||||
static struct btree_iter *trans_get_update(struct btree_trans *trans,
|
||||
@ -2197,16 +2130,6 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c,
|
||||
|
||||
/* Disk reservations: */
|
||||
|
||||
void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
|
||||
{
|
||||
percpu_down_read(&c->mark_lock);
|
||||
this_cpu_sub(c->usage[0]->online_reserved,
|
||||
res->sectors);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
res->sectors = 0;
|
||||
}
|
||||
|
||||
#define SECTORS_CACHE 1024
|
||||
|
||||
int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
|
||||
@ -2240,7 +2163,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
|
||||
|
||||
out:
|
||||
pcpu->sectors_available -= sectors;
|
||||
this_cpu_add(c->usage[0]->online_reserved, sectors);
|
||||
this_cpu_add(*c->online_reserved, sectors);
|
||||
res->sectors += sectors;
|
||||
|
||||
preempt_enable();
|
||||
@ -2257,7 +2180,7 @@ recalculate:
|
||||
(flags & BCH_DISK_RESERVATION_NOFAIL)) {
|
||||
atomic64_set(&c->sectors_available,
|
||||
max_t(s64, 0, sectors_available - sectors));
|
||||
this_cpu_add(c->usage[0]->online_reserved, sectors);
|
||||
this_cpu_add(*c->online_reserved, sectors);
|
||||
res->sectors += sectors;
|
||||
ret = 0;
|
||||
} else {
|
||||
|
@ -210,19 +210,16 @@ static inline unsigned dev_usage_u64s(void)
|
||||
return sizeof(struct bch_dev_usage) / sizeof(u64);
|
||||
}
|
||||
|
||||
void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *);
|
||||
struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *);
|
||||
|
||||
u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *);
|
||||
|
||||
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
|
||||
struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *);
|
||||
|
||||
void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned);
|
||||
|
||||
void bch2_fs_usage_to_text(struct printbuf *,
|
||||
struct bch_fs *, struct bch_fs_usage *);
|
||||
struct bch_fs *, struct bch_fs_usage_online *);
|
||||
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage_online *);
|
||||
|
||||
struct bch_fs_usage_short
|
||||
bch2_fs_usage_read_short(struct bch_fs *);
|
||||
@ -240,20 +237,15 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
|
||||
|
||||
int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned,
|
||||
s64, struct bch_fs_usage *, u64, unsigned);
|
||||
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
|
||||
struct disk_reservation *, unsigned);
|
||||
|
||||
int bch2_mark_update(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_i *, struct bch_fs_usage *, unsigned);
|
||||
|
||||
int bch2_replicas_delta_list_apply(struct bch_fs *,
|
||||
struct bch_fs_usage *,
|
||||
struct replicas_delta_list *);
|
||||
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c,
|
||||
unsigned, s64, unsigned);
|
||||
int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
|
||||
struct bkey_i *insert, unsigned);
|
||||
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
|
||||
void bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
|
||||
|
||||
int bch2_trans_mark_metadata_bucket(struct btree_trans *,
|
||||
struct disk_reservation *, struct bch_dev *,
|
||||
@ -263,13 +255,11 @@ int bch2_trans_mark_dev_sb(struct bch_fs *, struct disk_reservation *,
|
||||
|
||||
/* disk reservations: */
|
||||
|
||||
void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
|
||||
|
||||
static inline void bch2_disk_reservation_put(struct bch_fs *c,
|
||||
struct disk_reservation *res)
|
||||
{
|
||||
if (res->sectors)
|
||||
__bch2_disk_reservation_put(c, res);
|
||||
this_cpu_sub(*c->online_reserved, res->sectors);
|
||||
res->sectors = 0;
|
||||
}
|
||||
|
||||
#define BCH_DISK_RESERVATION_NOFAIL (1 << 0)
|
||||
|
@ -53,7 +53,6 @@ struct bucket_array {
|
||||
};
|
||||
|
||||
struct bch_dev_usage {
|
||||
u64 buckets_alloc;
|
||||
u64 buckets_ec;
|
||||
u64 buckets_unavailable;
|
||||
|
||||
@ -66,12 +65,6 @@ struct bch_dev_usage {
|
||||
|
||||
struct bch_fs_usage {
|
||||
/* all fields are in units of 512 byte sectors: */
|
||||
|
||||
u64 online_reserved;
|
||||
|
||||
/* fields after online_reserved are cleared/recalculated by gc: */
|
||||
u64 gc_start[0];
|
||||
|
||||
u64 hidden;
|
||||
u64 btree;
|
||||
u64 data;
|
||||
@ -91,6 +84,11 @@ struct bch_fs_usage {
|
||||
u64 replicas[];
|
||||
};
|
||||
|
||||
struct bch_fs_usage_online {
|
||||
u64 online_reserved;
|
||||
struct bch_fs_usage u;
|
||||
};
|
||||
|
||||
struct bch_fs_usage_short {
|
||||
u64 capacity;
|
||||
u64 used;
|
||||
@ -98,22 +96,6 @@ struct bch_fs_usage_short {
|
||||
u64 nr_inodes;
|
||||
};
|
||||
|
||||
struct replicas_delta {
|
||||
s64 delta;
|
||||
struct bch_replicas_entry r;
|
||||
} __packed;
|
||||
|
||||
struct replicas_delta_list {
|
||||
unsigned size;
|
||||
unsigned used;
|
||||
|
||||
struct {} memset_start;
|
||||
u64 nr_inodes;
|
||||
u64 persistent_reserved[BCH_REPLICAS_MAX];
|
||||
struct {} memset_end;
|
||||
struct replicas_delta d[0];
|
||||
};
|
||||
|
||||
/*
|
||||
* A reservation for space on disk:
|
||||
*/
|
||||
|
@ -379,7 +379,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
|
||||
{
|
||||
struct bch_ioctl_fs_usage *arg = NULL;
|
||||
struct bch_replicas_usage *dst_e, *dst_end;
|
||||
struct bch_fs_usage *src;
|
||||
struct bch_fs_usage_online *src;
|
||||
u32 replica_entries_bytes;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
@ -405,7 +405,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
|
||||
arg->online_reserved = src->online_reserved;
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++)
|
||||
arg->persistent_reserved[i] = src->persistent_reserved[i];
|
||||
arg->persistent_reserved[i] = src->u.persistent_reserved[i];
|
||||
|
||||
dst_e = arg->replicas;
|
||||
dst_end = (void *) arg->replicas + replica_entries_bytes;
|
||||
@ -419,7 +419,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
|
||||
break;
|
||||
}
|
||||
|
||||
dst_e->sectors = src->replicas[i];
|
||||
dst_e->sectors = src->u.replicas[i];
|
||||
dst_e->r = *src_e;
|
||||
|
||||
/* recheck after setting nr_devs: */
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "btree_gc.h"
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
#include "error.h"
|
||||
#include "journal.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
@ -59,21 +60,23 @@ journal_seq_to_buf(struct journal *j, u64 seq)
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void journal_pin_new_entry(struct journal *j, int count)
|
||||
static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
|
||||
{
|
||||
struct journal_entry_pin_list *p;
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
INIT_LIST_HEAD(&p->key_cache_list);
|
||||
INIT_LIST_HEAD(&p->flushed);
|
||||
atomic_set(&p->count, count);
|
||||
p->devs.nr = 0;
|
||||
}
|
||||
|
||||
static void journal_pin_new_entry(struct journal *j)
|
||||
{
|
||||
/*
|
||||
* The fifo_push() needs to happen at the same time as j->seq is
|
||||
* incremented for journal_last_seq() to be calculated correctly
|
||||
*/
|
||||
atomic64_inc(&j->seq);
|
||||
p = fifo_push_ref(&j->pin);
|
||||
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
INIT_LIST_HEAD(&p->flushed);
|
||||
atomic_set(&p->count, count);
|
||||
p->devs.nr = 0;
|
||||
journal_pin_list_init(fifo_push_ref(&j->pin), 1);
|
||||
}
|
||||
|
||||
static void bch2_journal_buf_init(struct journal *j)
|
||||
@ -192,7 +195,7 @@ static bool __journal_entry_close(struct journal *j)
|
||||
__bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq));
|
||||
|
||||
/* Initialize new buffer: */
|
||||
journal_pin_new_entry(j, 1);
|
||||
journal_pin_new_entry(j);
|
||||
|
||||
bch2_journal_buf_init(j);
|
||||
|
||||
@ -450,6 +453,27 @@ unlock:
|
||||
if (!ret)
|
||||
goto retry;
|
||||
|
||||
if ((ret == cur_entry_journal_full ||
|
||||
ret == cur_entry_journal_pin_full) &&
|
||||
!can_discard &&
|
||||
j->reservations.idx == j->reservations.unwritten_idx &&
|
||||
(flags & JOURNAL_RES_GET_RESERVED)) {
|
||||
char *journal_debug_buf = kmalloc(4096, GFP_ATOMIC);
|
||||
|
||||
bch_err(c, "Journal stuck!");
|
||||
if (journal_debug_buf) {
|
||||
bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j);
|
||||
bch_err(c, "%s", journal_debug_buf);
|
||||
|
||||
bch2_journal_pins_to_text(&_PBUF(journal_debug_buf, 4096), j);
|
||||
bch_err(c, "Journal pins:\n%s", journal_debug_buf);
|
||||
kfree(journal_debug_buf);
|
||||
}
|
||||
|
||||
bch2_fatal_error(c);
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
/*
|
||||
* Journal is full - can't rely on reclaim from work item due to
|
||||
* freezing:
|
||||
@ -499,7 +523,7 @@ static bool journal_preres_available(struct journal *j,
|
||||
unsigned new_u64s,
|
||||
unsigned flags)
|
||||
{
|
||||
bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags);
|
||||
bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true);
|
||||
|
||||
if (!ret && mutex_trylock(&j->reclaim_lock)) {
|
||||
bch2_journal_reclaim(j);
|
||||
@ -1009,12 +1033,8 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
|
||||
j->pin.back = cur_seq;
|
||||
atomic64_set(&j->seq, cur_seq - 1);
|
||||
|
||||
fifo_for_each_entry_ptr(p, &j->pin, seq) {
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
INIT_LIST_HEAD(&p->flushed);
|
||||
atomic_set(&p->count, 1);
|
||||
p->devs.nr = 0;
|
||||
}
|
||||
fifo_for_each_entry_ptr(p, &j->pin, seq)
|
||||
journal_pin_list_init(p, 1);
|
||||
|
||||
list_for_each_entry(i, journal_entries, list) {
|
||||
unsigned ptr;
|
||||
@ -1037,7 +1057,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
|
||||
set_bit(JOURNAL_STARTED, &j->flags);
|
||||
j->last_flush_write = jiffies;
|
||||
|
||||
journal_pin_new_entry(j, 1);
|
||||
journal_pin_new_entry(j);
|
||||
|
||||
j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j);
|
||||
|
||||
@ -1114,6 +1134,7 @@ int bch2_fs_journal_init(struct journal *j)
|
||||
spin_lock_init(&j->err_lock);
|
||||
init_waitqueue_head(&j->wait);
|
||||
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
|
||||
init_waitqueue_head(&j->reclaim_wait);
|
||||
init_waitqueue_head(&j->pin_flush_wait);
|
||||
mutex_init(&j->reclaim_lock);
|
||||
mutex_init(&j->discard_lock);
|
||||
@ -1166,6 +1187,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
"last_seq_ondisk:\t%llu\n"
|
||||
"flushed_seq_ondisk:\t%llu\n"
|
||||
"prereserved:\t\t%u/%u\n"
|
||||
"each entry reserved:\t%u\n"
|
||||
"nr flush writes:\t%llu\n"
|
||||
"nr noflush writes:\t%llu\n"
|
||||
"nr direct reclaim:\t%llu\n"
|
||||
@ -1180,6 +1202,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
j->flushed_seq_ondisk,
|
||||
j->prereserved.reserved,
|
||||
j->prereserved.remaining,
|
||||
j->entry_u64s_reserved,
|
||||
j->nr_flush_writes,
|
||||
j->nr_noflush_writes,
|
||||
j->nr_direct_reclaim,
|
||||
|
@ -213,11 +213,13 @@ static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type
|
||||
enum btree_id id, unsigned level,
|
||||
const void *data, unsigned u64s)
|
||||
{
|
||||
memset(entry, 0, sizeof(*entry));
|
||||
entry->u64s = cpu_to_le16(u64s);
|
||||
entry->type = type;
|
||||
entry->btree_id = id;
|
||||
entry->level = level;
|
||||
entry->type = type;
|
||||
entry->pad[0] = 0;
|
||||
entry->pad[1] = 0;
|
||||
entry->pad[2] = 0;
|
||||
memcpy_u64s_small(entry->_data, data, u64s);
|
||||
|
||||
return jset_u64s(u64s);
|
||||
@ -306,7 +308,6 @@ int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
|
||||
#define JOURNAL_RES_GET_NONBLOCK (1 << 0)
|
||||
#define JOURNAL_RES_GET_CHECK (1 << 1)
|
||||
#define JOURNAL_RES_GET_RESERVED (1 << 2)
|
||||
#define JOURNAL_RES_GET_RECLAIM (1 << 3)
|
||||
|
||||
static inline int journal_res_get_fast(struct journal *j,
|
||||
struct journal_res *res,
|
||||
@ -410,7 +411,12 @@ static inline void bch2_journal_preres_put(struct journal *j,
|
||||
|
||||
s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
|
||||
res->u64s = 0;
|
||||
closure_wake_up(&j->preres_wait);
|
||||
|
||||
if (unlikely(s.waiting)) {
|
||||
clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)),
|
||||
(unsigned long *) &j->prereserved.v);
|
||||
closure_wake_up(&j->preres_wait);
|
||||
}
|
||||
|
||||
if (s.reserved <= s.remaining &&
|
||||
!test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
|
||||
@ -426,32 +432,32 @@ int __bch2_journal_preres_get(struct journal *,
|
||||
static inline int bch2_journal_preres_get_fast(struct journal *j,
|
||||
struct journal_preres *res,
|
||||
unsigned new_u64s,
|
||||
unsigned flags)
|
||||
unsigned flags,
|
||||
bool set_waiting)
|
||||
{
|
||||
int d = new_u64s - res->u64s;
|
||||
union journal_preres_state old, new;
|
||||
u64 v = atomic64_read(&j->prereserved.counter);
|
||||
int ret;
|
||||
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
ret = 0;
|
||||
|
||||
new.reserved += d;
|
||||
|
||||
/*
|
||||
* If we're being called from the journal reclaim path, we have
|
||||
* to unconditionally give out the pre-reservation, there's
|
||||
* nothing else sensible we can do - otherwise we'd recurse back
|
||||
* into the reclaim path and deadlock:
|
||||
*/
|
||||
|
||||
if (!(flags & JOURNAL_RES_GET_RECLAIM) &&
|
||||
new.reserved > new.remaining)
|
||||
if ((flags & JOURNAL_RES_GET_RESERVED) ||
|
||||
new.reserved + d < new.remaining) {
|
||||
new.reserved += d;
|
||||
ret = 1;
|
||||
} else if (set_waiting && !new.waiting)
|
||||
new.waiting = true;
|
||||
else
|
||||
return 0;
|
||||
} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
res->u64s += d;
|
||||
return 1;
|
||||
if (ret)
|
||||
res->u64s += d;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int bch2_journal_preres_get(struct journal *j,
|
||||
@ -462,7 +468,7 @@ static inline int bch2_journal_preres_get(struct journal *j,
|
||||
if (new_u64s <= res->u64s)
|
||||
return 0;
|
||||
|
||||
if (bch2_journal_preres_get_fast(j, res, new_u64s, flags))
|
||||
if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false))
|
||||
return 0;
|
||||
|
||||
if (flags & JOURNAL_RES_GET_NONBLOCK)
|
||||
|
@ -239,7 +239,7 @@ void bch2_journal_space_available(struct journal *j)
|
||||
u64s_remaining = (u64) clean << 6;
|
||||
u64s_remaining -= (u64) total << 3;
|
||||
u64s_remaining = max(0LL, u64s_remaining);
|
||||
u64s_remaining /= 2;
|
||||
u64s_remaining /= 4;
|
||||
u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
|
||||
out:
|
||||
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
|
||||
@ -353,6 +353,9 @@ static inline void __journal_pin_drop(struct journal *j,
|
||||
if (!journal_pin_active(pin))
|
||||
return;
|
||||
|
||||
if (j->flush_in_progress == pin)
|
||||
j->flush_in_progress_dropped = true;
|
||||
|
||||
pin_list = journal_seq_pin(j, pin->seq);
|
||||
pin->seq = 0;
|
||||
list_del_init(&pin->list);
|
||||
@ -404,7 +407,12 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
|
||||
pin->seq = seq;
|
||||
pin->flush = flush_fn;
|
||||
|
||||
list_add(&pin->list, flush_fn ? &pin_list->list : &pin_list->flushed);
|
||||
if (flush_fn == bch2_btree_key_cache_journal_flush)
|
||||
list_add(&pin->list, &pin_list->key_cache_list);
|
||||
else if (flush_fn)
|
||||
list_add(&pin->list, &pin_list->list);
|
||||
else
|
||||
list_add(&pin->list, &pin_list->flushed);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/*
|
||||
@ -434,39 +442,49 @@ void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
|
||||
*/
|
||||
|
||||
static struct journal_entry_pin *
|
||||
journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
|
||||
journal_get_next_pin(struct journal *j,
|
||||
bool get_any,
|
||||
bool get_key_cache,
|
||||
u64 max_seq, u64 *seq)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
struct journal_entry_pin *ret = NULL;
|
||||
|
||||
if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
|
||||
return NULL;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq)
|
||||
if (*seq > max_seq ||
|
||||
(ret = list_first_entry_or_null(&pin_list->list,
|
||||
struct journal_entry_pin, list)))
|
||||
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) {
|
||||
if (*seq > max_seq && !get_any && !get_key_cache)
|
||||
break;
|
||||
|
||||
if (ret) {
|
||||
list_move(&ret->list, &pin_list->flushed);
|
||||
BUG_ON(j->flush_in_progress);
|
||||
j->flush_in_progress = ret;
|
||||
if (*seq <= max_seq || get_any) {
|
||||
ret = list_first_entry_or_null(&pin_list->list,
|
||||
struct journal_entry_pin, list);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (*seq <= max_seq || get_any || get_key_cache) {
|
||||
ret = list_first_entry_or_null(&pin_list->key_cache_list,
|
||||
struct journal_entry_pin, list);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* returns true if we did work */
|
||||
static u64 journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
unsigned min_nr)
|
||||
static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
unsigned min_any,
|
||||
unsigned min_key_cache)
|
||||
{
|
||||
struct journal_entry_pin *pin;
|
||||
u64 seq, ret = 0;
|
||||
size_t nr_flushed = 0;
|
||||
journal_pin_flush_fn flush_fn;
|
||||
u64 seq;
|
||||
int err;
|
||||
|
||||
if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
|
||||
return 0;
|
||||
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
|
||||
@ -475,23 +493,47 @@ static u64 journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
|
||||
j->last_flushed = jiffies;
|
||||
|
||||
pin = journal_get_next_pin(j, min_nr
|
||||
? U64_MAX : seq_to_flush, &seq);
|
||||
spin_lock(&j->lock);
|
||||
pin = journal_get_next_pin(j,
|
||||
min_any != 0,
|
||||
min_key_cache != 0,
|
||||
seq_to_flush, &seq);
|
||||
if (pin) {
|
||||
BUG_ON(j->flush_in_progress);
|
||||
j->flush_in_progress = pin;
|
||||
j->flush_in_progress_dropped = false;
|
||||
flush_fn = pin->flush;
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (!pin)
|
||||
break;
|
||||
|
||||
if (min_nr)
|
||||
min_nr--;
|
||||
if (min_key_cache && pin->flush == bch2_btree_key_cache_journal_flush)
|
||||
min_key_cache--;
|
||||
|
||||
pin->flush(j, pin, seq);
|
||||
if (min_any)
|
||||
min_any--;
|
||||
|
||||
BUG_ON(j->flush_in_progress != pin);
|
||||
err = flush_fn(j, pin, seq);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
/* Pin might have been dropped or rearmed: */
|
||||
if (likely(!err && !j->flush_in_progress_dropped))
|
||||
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed);
|
||||
j->flush_in_progress = NULL;
|
||||
j->flush_in_progress_dropped = false;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
wake_up(&j->pin_flush_wait);
|
||||
ret++;
|
||||
|
||||
if (err)
|
||||
break;
|
||||
|
||||
nr_flushed++;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return nr_flushed;
|
||||
}
|
||||
|
||||
static u64 journal_seq_to_flush(struct journal *j)
|
||||
@ -556,8 +598,8 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
||||
u64 seq_to_flush, nr_flushed = 0;
|
||||
size_t min_nr;
|
||||
u64 seq_to_flush;
|
||||
size_t min_nr, nr_flushed;
|
||||
unsigned flags;
|
||||
int ret = 0;
|
||||
|
||||
@ -595,15 +637,9 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
|
||||
if (j->prereserved.reserved * 2 > j->prereserved.remaining)
|
||||
min_nr = 1;
|
||||
|
||||
if (atomic_read(&c->btree_cache.dirty) * 4 >
|
||||
c->btree_cache.used * 3)
|
||||
min_nr = 1;
|
||||
|
||||
if (fifo_free(&j->pin) <= 32)
|
||||
min_nr = 1;
|
||||
|
||||
min_nr = max(min_nr, bch2_nr_btree_keys_want_flush(c));
|
||||
|
||||
trace_journal_reclaim_start(c,
|
||||
min_nr,
|
||||
j->prereserved.reserved,
|
||||
@ -613,14 +649,19 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
|
||||
atomic_long_read(&c->btree_key_cache.nr_dirty),
|
||||
atomic_long_read(&c->btree_key_cache.nr_keys));
|
||||
|
||||
nr_flushed = journal_flush_pins(j, seq_to_flush, min_nr);
|
||||
nr_flushed = journal_flush_pins(j, seq_to_flush,
|
||||
min_nr,
|
||||
min(bch2_nr_btree_keys_need_flush(c), 128UL));
|
||||
|
||||
if (direct)
|
||||
j->nr_direct_reclaim += nr_flushed;
|
||||
else
|
||||
j->nr_background_reclaim += nr_flushed;
|
||||
trace_journal_reclaim_finish(c, nr_flushed);
|
||||
} while (min_nr && nr_flushed);
|
||||
|
||||
if (nr_flushed)
|
||||
wake_up(&j->reclaim_wait);
|
||||
} while (min_nr && nr_flushed && !direct);
|
||||
|
||||
memalloc_noreclaim_restore(flags);
|
||||
|
||||
@ -713,7 +754,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
|
||||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
|
||||
*did_work = journal_flush_pins(j, seq_to_flush, 0) != 0;
|
||||
*did_work = journal_flush_pins(j, seq_to_flush, 0, 0) != 0;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
/*
|
||||
|
@ -43,6 +43,7 @@ struct journal_buf {
|
||||
|
||||
struct journal_entry_pin_list {
|
||||
struct list_head list;
|
||||
struct list_head key_cache_list;
|
||||
struct list_head flushed;
|
||||
atomic_t count;
|
||||
struct bch_devs_list devs;
|
||||
@ -50,7 +51,7 @@ struct journal_entry_pin_list {
|
||||
|
||||
struct journal;
|
||||
struct journal_entry_pin;
|
||||
typedef void (*journal_pin_flush_fn)(struct journal *j,
|
||||
typedef int (*journal_pin_flush_fn)(struct journal *j,
|
||||
struct journal_entry_pin *, u64);
|
||||
|
||||
struct journal_entry_pin {
|
||||
@ -105,8 +106,9 @@ union journal_preres_state {
|
||||
};
|
||||
|
||||
struct {
|
||||
u32 reserved;
|
||||
u32 remaining;
|
||||
u64 waiting:1,
|
||||
reserved:31,
|
||||
remaining:32;
|
||||
};
|
||||
};
|
||||
|
||||
@ -243,6 +245,7 @@ struct journal {
|
||||
spinlock_t err_lock;
|
||||
|
||||
struct mutex reclaim_lock;
|
||||
wait_queue_head_t reclaim_wait;
|
||||
struct task_struct *reclaim_thread;
|
||||
bool reclaim_kicked;
|
||||
u64 nr_direct_reclaim;
|
||||
@ -250,6 +253,7 @@ struct journal {
|
||||
|
||||
unsigned long last_flushed;
|
||||
struct journal_entry_pin *flush_in_progress;
|
||||
bool flush_in_progress_dropped;
|
||||
wait_queue_head_t pin_flush_wait;
|
||||
|
||||
/* protects advancing ja->discard_idx: */
|
||||
|
@ -88,6 +88,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_put(&trans, iter);
|
||||
|
||||
ret = bch2_trans_exit(&trans) ?: ret;
|
||||
bch2_bkey_buf_exit(&sk, c);
|
||||
@ -135,20 +136,24 @@ retry:
|
||||
dev_idx, flags, true);
|
||||
if (ret) {
|
||||
bch_err(c, "Cannot drop device without losing data");
|
||||
goto err;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = bch2_btree_node_update_key(c, iter, b, k.k);
|
||||
if (ret == -EINTR) {
|
||||
b = bch2_btree_iter_peek_node(iter);
|
||||
ret = 0;
|
||||
goto retry;
|
||||
}
|
||||
if (ret) {
|
||||
bch_err(c, "Error updating btree node key: %i", ret);
|
||||
goto err;
|
||||
break;
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_free(&trans, iter);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* flush relevant btree updates */
|
||||
|
@ -793,6 +793,9 @@ next:
|
||||
out:
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (ret)
|
||||
bch_err(c, "error %i in bch2_move_btree", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -916,8 +919,8 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
|
||||
rewrite_old_nodes_pred, c, stats);
|
||||
if (!ret) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_EXTENTS_ABOVE_BTREE_UPDATES_DONE;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_BFORMAT_OVERFLOW_DONE;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_extents_above_btree_updates_done;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_bformat_overflow_done;
|
||||
c->disk_sb.sb->version_min = c->disk_sb.sb->version;
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
@ -21,6 +21,11 @@ const char * const bch2_sb_features[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
const char * const bch2_sb_compat[] = {
|
||||
BCH_SB_COMPAT()
|
||||
NULL
|
||||
};
|
||||
|
||||
const char * const bch2_btree_ids[] = {
|
||||
BCH_BTREE_IDS()
|
||||
NULL
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
extern const char * const bch2_error_actions[];
|
||||
extern const char * const bch2_sb_features[];
|
||||
extern const char * const bch2_sb_compat[];
|
||||
extern const char * const bch2_btree_ids[];
|
||||
extern const char * const bch2_csum_opts[];
|
||||
extern const char * const bch2_compression_opts[];
|
||||
|
@ -935,7 +935,7 @@ static int read_btree_roots(struct bch_fs *c)
|
||||
|
||||
if (i == BTREE_ID_alloc &&
|
||||
c->opts.reconstruct_alloc) {
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -945,7 +945,7 @@ static int read_btree_roots(struct bch_fs *c)
|
||||
"invalid btree root %s",
|
||||
bch2_btree_ids[i]);
|
||||
if (i == BTREE_ID_alloc)
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
|
||||
}
|
||||
|
||||
ret = bch2_btree_root_read(c, i, &r->key, r->level);
|
||||
@ -955,7 +955,7 @@ static int read_btree_roots(struct bch_fs *c)
|
||||
"error reading btree root %s",
|
||||
bch2_btree_ids[i]);
|
||||
if (i == BTREE_ID_alloc)
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
|
||||
}
|
||||
}
|
||||
|
||||
@ -998,7 +998,7 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_BFORMAT_OVERFLOW_DONE))) {
|
||||
if (!(c->sb.compat & (1ULL << BCH_COMPAT_bformat_overflow_done))) {
|
||||
bch_err(c, "filesystem may have incompatible bkey formats; run fsck from the compat branch to fix");
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
@ -1041,7 +1041,7 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
last_journal_entry &&
|
||||
!journal_entry_empty(last_journal_entry), c,
|
||||
"filesystem marked clean but journal not empty")) {
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
|
||||
c->sb.clean = false;
|
||||
}
|
||||
@ -1075,7 +1075,7 @@ use_clean:
|
||||
}
|
||||
|
||||
if (c->opts.reconstruct_alloc) {
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
|
||||
drop_alloc_keys(&c->journal_keys);
|
||||
}
|
||||
|
||||
@ -1128,8 +1128,8 @@ use_clean:
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
|
||||
if (c->opts.fsck ||
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA)) ||
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_alloc_info)) ||
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_alloc_metadata)) ||
|
||||
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
|
||||
bch_info(c, "starting mark and sweep");
|
||||
err = "error in mark and sweep";
|
||||
@ -1215,11 +1215,11 @@ use_clean:
|
||||
bch_verbose(c, "quotas done");
|
||||
}
|
||||
|
||||
if (!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_EXTENTS_ABOVE_BTREE_UPDATES_DONE)) ||
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_BFORMAT_OVERFLOW_DONE))) {
|
||||
if (!(c->sb.compat & (1ULL << BCH_COMPAT_extents_above_btree_updates_done)) ||
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_bformat_overflow_done))) {
|
||||
struct bch_move_stats stats = { 0 };
|
||||
|
||||
bch_verbose(c, "scanning for old btree nodes");
|
||||
bch_info(c, "scanning for old btree nodes");
|
||||
ret = bch2_fs_read_write(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1227,7 +1227,7 @@ use_clean:
|
||||
ret = bch2_scan_old_btree_nodes(c, &stats);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "scanning for old btree nodes done");
|
||||
bch_info(c, "scanning for old btree nodes done");
|
||||
}
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
@ -1238,7 +1238,7 @@ use_clean:
|
||||
}
|
||||
|
||||
if (!test_bit(BCH_FS_ERROR, &c->flags)) {
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_alloc_info;
|
||||
write_sb = true;
|
||||
}
|
||||
|
||||
@ -1289,8 +1289,8 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
bch_notice(c, "initializing new filesystem");
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_EXTENTS_ABOVE_BTREE_UPDATES_DONE;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_BFORMAT_OVERFLOW_DONE;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_extents_above_btree_updates_done;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_bformat_overflow_done;
|
||||
|
||||
if (c->opts.version_upgrade) {
|
||||
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
|
||||
|
@ -271,11 +271,13 @@ static int replicas_table_update(struct bch_fs *c,
|
||||
struct bch_replicas_cpu *new_r)
|
||||
{
|
||||
struct bch_fs_usage __percpu *new_usage[JOURNAL_BUF_NR];
|
||||
struct bch_fs_usage *new_scratch = NULL;
|
||||
struct bch_fs_usage_online *new_scratch = NULL;
|
||||
struct bch_fs_usage __percpu *new_gc = NULL;
|
||||
struct bch_fs_usage *new_base = NULL;
|
||||
unsigned i, bytes = sizeof(struct bch_fs_usage) +
|
||||
sizeof(u64) * new_r->nr;
|
||||
unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) +
|
||||
sizeof(u64) * new_r->nr;
|
||||
int ret = 0;
|
||||
|
||||
memset(new_usage, 0, sizeof(new_usage));
|
||||
@ -286,7 +288,7 @@ static int replicas_table_update(struct bch_fs *c,
|
||||
goto err;
|
||||
|
||||
if (!(new_base = kzalloc(bytes, GFP_KERNEL)) ||
|
||||
!(new_scratch = kmalloc(bytes, GFP_KERNEL)) ||
|
||||
!(new_scratch = kmalloc(scratch_bytes, GFP_KERNEL)) ||
|
||||
(c->usage_gc &&
|
||||
!(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_KERNEL))))
|
||||
goto err;
|
||||
@ -462,6 +464,36 @@ static int __bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* replicas delta list: */
|
||||
|
||||
bool bch2_replicas_delta_list_marked(struct bch_fs *c,
|
||||
struct replicas_delta_list *r)
|
||||
{
|
||||
struct replicas_delta *d = r->d;
|
||||
struct replicas_delta *top = (void *) r->d + r->used;
|
||||
|
||||
percpu_rwsem_assert_held(&c->mark_lock);
|
||||
|
||||
for (d = r->d; d != top; d = replicas_delta_next(d))
|
||||
if (bch2_replicas_entry_idx(c, &d->r) < 0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
int bch2_replicas_delta_list_mark(struct bch_fs *c,
|
||||
struct replicas_delta_list *r)
|
||||
{
|
||||
struct replicas_delta *d = r->d;
|
||||
struct replicas_delta *top = (void *) r->d + r->used;
|
||||
int ret = 0;
|
||||
|
||||
for (d = r->d; !ret && d != top; d = replicas_delta_next(d))
|
||||
ret = bch2_mark_replicas(c, &d->r);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* bkey replicas: */
|
||||
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
@ -473,6 +505,11 @@ int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
|
||||
return __bch2_mark_bkey_replicas(c, k, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Old replicas_gc mechanism: only used for journal replicas entries now, should
|
||||
* die at some point:
|
||||
*/
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *c, int ret)
|
||||
{
|
||||
unsigned i;
|
||||
@ -566,6 +603,8 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* New much simpler mechanism for clearing out unneeded replicas entries: */
|
||||
|
||||
int bch2_replicas_gc2(struct bch_fs *c)
|
||||
{
|
||||
struct bch_replicas_cpu new = { 0 };
|
||||
@ -966,11 +1005,18 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
for_each_cpu_replicas_entry(&c->replicas, e) {
|
||||
unsigned i, nr_online = 0, dflags = 0;
|
||||
unsigned i, nr_online = 0, nr_failed = 0, dflags = 0;
|
||||
bool metadata = e->data_type < BCH_DATA_user;
|
||||
|
||||
for (i = 0; i < e->nr_devs; i++)
|
||||
for (i = 0; i < e->nr_devs; i++) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, e->devs[i]);
|
||||
|
||||
nr_online += test_bit(e->devs[i], devs.d);
|
||||
nr_failed += ca->mi.state == BCH_MEMBER_STATE_failed;
|
||||
}
|
||||
|
||||
if (nr_failed == e->nr_devs)
|
||||
continue;
|
||||
|
||||
if (nr_online < e->nr_required)
|
||||
dflags |= metadata
|
||||
|
@ -26,6 +26,31 @@ bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry *);
|
||||
int bch2_mark_replicas(struct bch_fs *,
|
||||
struct bch_replicas_entry *);
|
||||
|
||||
struct replicas_delta {
|
||||
s64 delta;
|
||||
struct bch_replicas_entry r;
|
||||
} __packed;
|
||||
|
||||
struct replicas_delta_list {
|
||||
unsigned size;
|
||||
unsigned used;
|
||||
|
||||
struct {} memset_start;
|
||||
u64 nr_inodes;
|
||||
u64 persistent_reserved[BCH_REPLICAS_MAX];
|
||||
struct {} memset_end;
|
||||
struct replicas_delta d[0];
|
||||
};
|
||||
|
||||
static inline struct replicas_delta *
|
||||
replicas_delta_next(struct replicas_delta *d)
|
||||
{
|
||||
return (void *) d + replicas_entry_bytes(&d->r) + 8;
|
||||
}
|
||||
|
||||
bool bch2_replicas_delta_list_marked(struct bch_fs *, struct replicas_delta_list *);
|
||||
int bch2_replicas_delta_list_mark(struct bch_fs *, struct replicas_delta_list *);
|
||||
|
||||
void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *, struct bkey_s_c);
|
||||
int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
|
||||
|
@ -377,7 +377,6 @@ static void bch2_sb_update(struct bch_fs *c)
|
||||
ca->mi = bch2_mi_to_cpu(mi->members + i);
|
||||
}
|
||||
|
||||
/* doesn't copy member info */
|
||||
static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
|
||||
{
|
||||
struct bch_sb_field *src_f, *dst_f;
|
||||
@ -996,7 +995,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
struct bch_dev *ca;
|
||||
unsigned i, dev;
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
percpu_down_read(&c->mark_lock);
|
||||
|
||||
if (!journal_seq) {
|
||||
for (i = 0; i < ARRAY_SIZE(c->usage); i++)
|
||||
@ -1067,7 +1066,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
percpu_up_write(&c->mark_lock);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
struct jset_entry_clock *clock =
|
||||
@ -1093,8 +1092,8 @@ void bch2_fs_mark_clean(struct bch_fs *c)
|
||||
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
|
||||
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_alloc_info;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_alloc_metadata;
|
||||
c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_extents_above_btree_updates);
|
||||
c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_btree_updates_journalled);
|
||||
|
||||
|
@ -153,6 +153,8 @@ read_attribute(io_latency_stats_read);
|
||||
read_attribute(io_latency_stats_write);
|
||||
read_attribute(congested);
|
||||
|
||||
read_attribute(btree_avg_write_size);
|
||||
|
||||
read_attribute(bucket_quantiles_last_read);
|
||||
read_attribute(bucket_quantiles_last_write);
|
||||
read_attribute(bucket_quantiles_fragmentation);
|
||||
@ -230,9 +232,17 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static size_t bch2_btree_avg_write_size(struct bch_fs *c)
|
||||
{
|
||||
u64 nr = atomic64_read(&c->btree_writes_nr);
|
||||
u64 sectors = atomic64_read(&c->btree_writes_sectors);
|
||||
|
||||
return nr ? div64_u64(sectors, nr) : 0;
|
||||
}
|
||||
|
||||
static int fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
|
||||
struct bch_fs_usage_online *fs_usage = bch2_fs_usage_read(c);
|
||||
|
||||
if (!fs_usage)
|
||||
return -ENOMEM;
|
||||
@ -318,6 +328,7 @@ SHOW(bch2_fs)
|
||||
sysfs_print(block_size, block_bytes(c));
|
||||
sysfs_print(btree_node_size, btree_bytes(c));
|
||||
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
|
||||
sysfs_hprint(btree_avg_write_size, bch2_btree_avg_write_size(c));
|
||||
|
||||
sysfs_print(read_realloc_races,
|
||||
atomic_long_read(&c->read_realloc_races));
|
||||
@ -513,6 +524,7 @@ struct attribute *bch2_fs_files[] = {
|
||||
&sysfs_block_size,
|
||||
&sysfs_btree_node_size,
|
||||
&sysfs_btree_cache_size,
|
||||
&sysfs_btree_avg_write_size,
|
||||
|
||||
&sysfs_journal_write_delay_ms,
|
||||
&sysfs_journal_reclaim_delay_ms,
|
||||
@ -800,7 +812,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
pr_buf(out,
|
||||
"ec\t%16llu\n"
|
||||
"available%15llu\n"
|
||||
"alloc\t%16llu\n"
|
||||
"\n"
|
||||
"free_inc\t\t%zu/%zu\n"
|
||||
"free[RESERVE_MOVINGGC]\t%zu/%zu\n"
|
||||
@ -813,7 +824,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
"btree reserve cache\t%u\n",
|
||||
stats.buckets_ec,
|
||||
__dev_buckets_available(ca, stats),
|
||||
stats.buckets_alloc,
|
||||
fifo_used(&ca->free_inc), ca->free_inc.size,
|
||||
fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
|
||||
fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size,
|
||||
|
@ -252,12 +252,13 @@ retry:
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
ret = !(old.v & l[type].lock_fail);
|
||||
|
||||
EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
|
||||
}
|
||||
|
||||
if (ret)
|
||||
six_set_owner(lock, type, old);
|
||||
|
||||
EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
|
||||
EBUG_ON(type == SIX_LOCK_write && (try || ret) && (lock->state.write_locking));
|
||||
|
||||
return ret;
|
||||
|
Loading…
Reference in New Issue
Block a user