Update bcachefs sources to f38382c574 bcachefs: Improve key marking interface

This commit is contained in:
Kent Overstreet 2019-05-24 11:57:29 -04:00
parent 93bdfcb210
commit 7e35389599
42 changed files with 1071 additions and 892 deletions

View File

@ -1 +1 @@
454bd4f82d85bb42a86b8eb0172b13e86e5788a7
f38382c5747090ac9160e6d5fa1386954cb1f23c

View File

@ -63,7 +63,7 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd)
struct btree_iter *iter;
struct btree *b;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_node(&trans, iter, i, POS_MIN, 0, b) {
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
@ -160,7 +160,7 @@ static void list_keys(struct bch_fs *c, enum btree_id btree_id,
char buf[512];
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, btree_id, start,
BTREE_ITER_PREFETCH, k, ret) {
@ -181,7 +181,7 @@ static void list_btree_formats(struct bch_fs *c, enum btree_id btree_id,
struct btree *b;
char buf[4096];
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_node(&trans, iter, btree_id, start, 0, b) {
if (bkey_cmp(b->key.k.p, end) > 0)
@ -204,7 +204,7 @@ static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id,
struct btree *b;
char buf[4096];
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_node(&trans, iter, btree_id, start, 0, b) {
if (bkey_cmp(b->key.k.p, end) > 0)

View File

@ -499,16 +499,14 @@ TRACE_EVENT(copygc,
);
DECLARE_EVENT_CLASS(transaction_restart,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip),
TP_PROTO(unsigned long ip),
TP_ARGS(ip),
TP_STRUCT__entry(
__array(char, name, 16)
__field(unsigned long, ip )
),
TP_fast_assign(
memcpy(__entry->name, c->name, 16);
__entry->ip = ip;
),
@ -516,58 +514,130 @@ DECLARE_EVENT_CLASS(transaction_restart,
);
DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_would_deadlock,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_iters_realloced,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
TRACE_EVENT(trans_restart_iters_realloced,
TP_PROTO(unsigned long ip, unsigned nr),
TP_ARGS(ip, nr),
TP_STRUCT__entry(
__field(unsigned long, ip )
__field(unsigned, nr )
),
TP_fast_assign(
__entry->ip = ip;
__entry->nr = nr;
),
TP_printk("%pf nr %u", (void *) __entry->ip, __entry->nr)
);
DEFINE_EVENT(transaction_restart, trans_restart_mem_realloced,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
TRACE_EVENT(trans_restart_mem_realloced,
TP_PROTO(unsigned long ip, unsigned long bytes),
TP_ARGS(ip, bytes),
TP_STRUCT__entry(
__field(unsigned long, ip )
__field(unsigned long, bytes )
),
TP_fast_assign(
__entry->ip = ip;
__entry->bytes = bytes;
),
TP_printk("%pf bytes %lu", (void *) __entry->ip, __entry->bytes)
);
DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_journal_preres_get,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_fault_inject,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_btree_node_split,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_mark,
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_upgrade,
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_iter_upgrade,
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_traverse,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_atomic,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
DECLARE_EVENT_CLASS(node_lock_fail,
TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
TP_ARGS(level, iter_seq, node, node_seq),
TP_STRUCT__entry(
__field(u32, level)
__field(u32, iter_seq)
__field(u32, node)
__field(u32, node_seq)
),
TP_fast_assign(
__entry->level = level;
__entry->iter_seq = iter_seq;
__entry->node = node;
__entry->node_seq = node_seq;
),
TP_printk("level %u iter seq %u node %u node seq %u",
__entry->level, __entry->iter_seq,
__entry->node, __entry->node_seq)
);
DEFINE_EVENT(node_lock_fail, node_upgrade_fail,
TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
TP_ARGS(level, iter_seq, node, node_seq)
);
DEFINE_EVENT(node_lock_fail, node_relock_fail,
TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
TP_ARGS(level, iter_seq, node, node_seq)
);
#endif /* _TRACE_BCACHE_H */

View File

@ -220,7 +220,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
struct bkey_s_c_xattr xattr;
struct posix_acl *acl = NULL;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@ -301,7 +301,7 @@ int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
int ret;
mutex_lock(&inode->ei_update_lock);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
if (type == ACL_TYPE_ACCESS && acl) {
ret = posix_acl_update_mode(&inode->v, &mode, &acl);

View File

@ -228,10 +228,12 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
unsigned i;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k, ret)
bch2_mark_key(c, k, true, 0, NULL, 0, 0);
bch2_mark_key(c, k, 0, NULL, 0,
BCH_BUCKET_MARK_ALLOC_READ|
BCH_BUCKET_MARK_NOATOMIC);
ret = bch2_trans_exit(&trans) ?: ret;
if (ret) {
@ -241,8 +243,9 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
for_each_journal_key(*journal_keys, j)
if (j->btree_id == BTREE_ID_ALLOC)
bch2_mark_key(c, bkey_i_to_s_c(j->k),
true, 0, NULL, 0, 0);
bch2_mark_key(c, bkey_i_to_s_c(j->k), 0, NULL, 0,
BCH_BUCKET_MARK_ALLOC_READ|
BCH_BUCKET_MARK_NOATOMIC);
percpu_down_write(&c->mark_lock);
bch2_dev_usage_from_buckets(c);
@ -283,7 +286,7 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
if (k->k.p.offset >= ca->mi.nbuckets)
return 0;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p,
BTREE_ITER_INTENT);
@ -328,7 +331,7 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
@ -948,6 +951,7 @@ retry:
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
BTREE_INSERT_BUCKET_INVALIDATE|
flags);
if (ret == -EINTR)
goto retry;
@ -1027,7 +1031,7 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
u64 journal_seq = 0;
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
POS(ca->dev_idx, 0),

View File

@ -842,4 +842,9 @@ static inline s64 bch2_current_time(struct bch_fs *c)
return timespec_to_bch2_time(c, now);
}
static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
{
return dev < c->sb.nr_devices && c->devs[dev];
}
#endif /* _BCACHEFS_H */

View File

@ -201,15 +201,20 @@ enum merge_result bch2_bkey_merge(struct bch_fs *c,
struct bkey_i *l, struct bkey_i *r)
{
const struct bkey_ops *ops = &bch2_bkey_ops[l->k.type];
enum merge_result ret;
if (!key_merging_disabled(c) &&
ops->key_merge &&
l->k.type == r->k.type &&
!bversion_cmp(l->k.version, r->k.version) &&
!bkey_cmp(l->k.p, bkey_start_pos(&r->k)))
return ops->key_merge(c, l, r);
if (key_merging_disabled(c) ||
!ops->key_merge ||
l->k.type != r->k.type ||
bversion_cmp(l->k.version, r->k.version) ||
bkey_cmp(l->k.p, bkey_start_pos(&r->k)))
return BCH_MERGE_NOMERGE;
return BCH_MERGE_NOMERGE;
ret = ops->key_merge(c, l, r);
if (ret != BCH_MERGE_NOMERGE)
l->k.needs_whiteout |= r->k.needs_whiteout;
return ret;
}
static const struct old_bkey_type {

View File

@ -652,8 +652,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
*/
struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
const struct bkey_i *k, unsigned level,
enum six_lock_type lock_type,
bool may_drop_locks)
enum six_lock_type lock_type)
{
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
@ -720,8 +719,7 @@ retry:
if (btree_node_read_locked(iter, level + 1))
btree_node_unlock(iter, level + 1);
if (!btree_node_lock(b, k->k.p, level, iter,
lock_type, may_drop_locks))
if (!btree_node_lock(b, k->k.p, level, iter, lock_type))
return ERR_PTR(-EINTR);
if (unlikely(PTR_HASH(&b->key) != PTR_HASH(k) ||
@ -731,9 +729,7 @@ retry:
if (bch2_btree_node_relock(iter, level + 1))
goto retry;
trans_restart();
trace_trans_restart_btree_node_reused(c,
iter->trans->ip);
trace_trans_restart_btree_node_reused(iter->trans->ip);
return ERR_PTR(-EINTR);
}
}
@ -770,9 +766,9 @@ retry:
struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
struct btree_iter *iter,
struct btree *b,
bool may_drop_locks,
enum btree_node_sibling sib)
{
struct btree_trans *trans = iter->trans;
struct btree *parent;
struct btree_node_iter node_iter;
struct bkey_packed *k;
@ -784,8 +780,10 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
if (!parent)
return NULL;
if (!bch2_btree_node_relock(iter, level + 1))
goto out_upgrade;
if (!bch2_btree_node_relock(iter, level + 1)) {
ret = ERR_PTR(-EINTR);
goto out;
}
node_iter = iter->l[parent->level].iter;
@ -802,19 +800,19 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
bch2_bkey_unpack(parent, &tmp.k, k);
ret = bch2_btree_node_get(c, iter, &tmp.k, level,
SIX_LOCK_intent, may_drop_locks);
SIX_LOCK_intent);
if (PTR_ERR_OR_ZERO(ret) == -EINTR && may_drop_locks) {
if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
struct btree_iter *linked;
if (!bch2_btree_node_relock(iter, level + 1))
goto out_upgrade;
goto out;
/*
* We might have got -EINTR because trylock failed, and we're
* holding other locks that would cause us to deadlock:
*/
trans_for_each_iter(iter->trans, linked)
trans_for_each_iter(trans, linked)
if (btree_iter_cmp(iter, linked) < 0)
__bch2_btree_iter_unlock(linked);
@ -822,7 +820,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
btree_node_unlock(iter, level);
ret = bch2_btree_node_get(c, iter, &tmp.k, level,
SIX_LOCK_intent, may_drop_locks);
SIX_LOCK_intent);
/*
* before btree_iter_relock() calls btree_iter_verify_locks():
@ -839,17 +837,16 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
}
}
bch2_btree_trans_relock(iter->trans);
bch2_trans_relock(trans);
}
out:
if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
btree_node_unlock(iter, level + 1);
bch2_btree_trans_verify_locks(iter->trans);
if (PTR_ERR_OR_ZERO(ret) == -EINTR)
bch2_btree_iter_upgrade(iter, level + 2);
BUG_ON((!may_drop_locks || !IS_ERR(ret)) &&
(iter->uptodate >= BTREE_ITER_NEED_RELOCK ||
!btree_node_locked(iter, level)));
BUG_ON(!IS_ERR(ret) && !btree_node_locked(iter, level));
if (!IS_ERR_OR_NULL(ret)) {
struct btree *n1 = ret, *n2 = b;
@ -862,12 +859,9 @@ out:
n2->data->min_key));
}
bch2_btree_trans_verify_locks(trans);
return ret;
out_upgrade:
if (may_drop_locks)
bch2_btree_iter_upgrade(iter, level + 2, true);
ret = ERR_PTR(-EINTR);
goto out;
}
void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,

View File

@ -22,11 +22,10 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
const struct bkey_i *, unsigned,
enum six_lock_type, bool);
enum six_lock_type);
struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
struct btree *, bool,
enum btree_node_sibling);
struct btree *, enum btree_node_sibling);
void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
const struct bkey_i *, unsigned);

View File

@ -170,7 +170,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
*max_stale = max(*max_stale, ptr_stale(ca, ptr));
}
bch2_mark_key(c, k, true, k.k->size, NULL, 0, flags);
bch2_mark_key(c, k, k.k->size, NULL, 0, flags);
fsck_err:
return ret;
}
@ -214,7 +214,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
u8 max_stale;
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
@ -283,7 +283,7 @@ static int mark_journal_key(struct bch_fs *c, enum btree_id id,
if (ret)
return ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, id, bkey_start_pos(&insert->k),
BTREE_ITER_SLOTS, k, ret) {
@ -422,8 +422,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
for_each_pending_btree_node_free(c, as, d)
if (d->index_update_done)
bch2_mark_key(c, bkey_i_to_s_c(&d->key),
true, 0, NULL, 0,
bch2_mark_key(c, bkey_i_to_s_c(&d->key), 0, NULL, 0,
BCH_BUCKET_MARK_GC);
mutex_unlock(&c->btree_interior_update_lock);
@ -1057,7 +1056,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
struct btree *merge[GC_MERGE_NODES];
u32 lock_seq[GC_MERGE_NODES];
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
/*
* XXX: We don't have a good way of positively matching on sibling nodes

View File

@ -1151,7 +1151,7 @@ static void bch2_btree_node_write_error(struct bch_fs *c,
struct btree_iter *iter;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_node_iter(&trans, b->btree_id, b->key.k.p,
BTREE_MAX_DEPTH, b->level, 0);

View File

@ -14,13 +14,18 @@ static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *,
struct btree_iter_level *,
struct bkey *);
#define BTREE_ITER_NOT_END ((struct btree *) 1)
#define BTREE_ITER_NO_NODE_GET_LOCKS ((struct btree *) 1)
#define BTREE_ITER_NO_NODE_DROP ((struct btree *) 2)
#define BTREE_ITER_NO_NODE_LOCK_ROOT ((struct btree *) 3)
#define BTREE_ITER_NO_NODE_UP ((struct btree *) 4)
#define BTREE_ITER_NO_NODE_DOWN ((struct btree *) 5)
#define BTREE_ITER_NO_NODE_INIT ((struct btree *) 6)
#define BTREE_ITER_NO_NODE_ERROR ((struct btree *) 7)
static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
{
return l < BTREE_MAX_DEPTH &&
iter->l[l].b &&
iter->l[l].b != BTREE_ITER_NOT_END;
(unsigned long) iter->l[l].b >= 128;
}
/* Returns < 0 if @k is before iter pos, > 0 if @k is after */
@ -105,19 +110,20 @@ bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
struct btree *b = btree_iter_node(iter, level);
int want = __btree_lock_want(iter, level);
if (!b || b == BTREE_ITER_NOT_END)
if (!is_btree_node(iter, level))
return false;
if (race_fault())
return false;
if (!six_relock_type(&b->lock, want, iter->l[level].lock_seq) &&
!(iter->l[level].lock_seq >> 1 == b->lock.state.seq >> 1 &&
btree_node_lock_increment(iter, b, level, want)))
if (six_relock_type(&b->lock, want, iter->l[level].lock_seq) ||
(btree_node_lock_seq_matches(iter, b, level) &&
btree_node_lock_increment(iter, b, level, want))) {
mark_btree_node_locked(iter, level, want);
return true;
} else {
return false;
mark_btree_node_locked(iter, level, want);
return true;
}
}
static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
@ -140,7 +146,7 @@ static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
: six_relock_type(&b->lock, SIX_LOCK_intent, iter->l[level].lock_seq))
goto success;
if (iter->l[level].lock_seq >> 1 == b->lock.state.seq >> 1 &&
if (btree_node_lock_seq_matches(iter, b, level) &&
btree_node_lock_increment(iter, b, level, BTREE_NODE_INTENT_LOCKED)) {
btree_node_unlock(iter, level);
goto success;
@ -153,7 +159,7 @@ success:
}
static inline bool btree_iter_get_locks(struct btree_iter *iter,
bool upgrade)
bool upgrade, bool trace)
{
unsigned l = iter->level;
int fail_idx = -1;
@ -165,6 +171,17 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
if (!(upgrade
? bch2_btree_node_upgrade(iter, l)
: bch2_btree_node_relock(iter, l))) {
if (trace)
(upgrade
? trace_node_upgrade_fail
: trace_node_relock_fail)(l, iter->l[l].lock_seq,
is_btree_node(iter, l)
? 0
: (unsigned long) iter->l[l].b,
is_btree_node(iter, l)
? iter->l[l].b->lock.state.seq
: 0);
fail_idx = l;
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
}
@ -179,7 +196,7 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
*/
while (fail_idx >= 0) {
btree_node_unlock(iter, fail_idx);
iter->l[fail_idx].b = BTREE_ITER_NOT_END;
iter->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
--fail_idx;
}
@ -195,8 +212,7 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
unsigned level,
struct btree_iter *iter,
enum six_lock_type type,
bool may_drop_locks)
enum six_lock_type type)
{
struct btree_iter *linked;
bool ret = true;
@ -224,11 +240,11 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
*/
if (type == SIX_LOCK_intent &&
linked->nodes_locked != linked->nodes_intent_locked) {
if (may_drop_locks) {
if (!(iter->trans->nounlock)) {
linked->locks_want = max_t(unsigned,
linked->locks_want,
__fls(linked->nodes_locked) + 1);
btree_iter_get_locks(linked, true);
btree_iter_get_locks(linked, true, false);
}
ret = false;
}
@ -240,21 +256,19 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
*/
if (linked->btree_id == iter->btree_id &&
level > __fls(linked->nodes_locked)) {
if (may_drop_locks) {
if (!(iter->trans->nounlock)) {
linked->locks_want =
max(level + 1, max_t(unsigned,
linked->locks_want,
iter->locks_want));
btree_iter_get_locks(linked, true);
btree_iter_get_locks(linked, true, false);
}
ret = false;
}
}
if (unlikely(!ret)) {
trans_restart();
trace_trans_restart_would_deadlock(iter->trans->c,
iter->trans->ip);
trace_trans_restart_would_deadlock(iter->trans->ip);
return false;
}
@ -269,9 +283,6 @@ void bch2_btree_iter_verify_locks(struct btree_iter *iter)
{
unsigned l;
BUG_ON((iter->flags & BTREE_ITER_NOUNLOCK) &&
!btree_node_locked(iter, 0));
for (l = 0; btree_iter_node(iter, l); l++) {
if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
!btree_node_locked(iter, l))
@ -292,10 +303,10 @@ void bch2_btree_trans_verify_locks(struct btree_trans *trans)
#endif
__flatten
static bool bch2_btree_iter_relock(struct btree_iter *iter)
static bool bch2_btree_iter_relock(struct btree_iter *iter, bool trace)
{
return iter->uptodate >= BTREE_ITER_NEED_RELOCK
? btree_iter_get_locks(iter, false)
? btree_iter_get_locks(iter, false, trace)
: true;
}
@ -308,7 +319,7 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
iter->locks_want = new_locks_want;
if (btree_iter_get_locks(iter, true))
if (btree_iter_get_locks(iter, true, true))
return true;
/*
@ -319,10 +330,9 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
trans_for_each_iter(iter->trans, linked)
if (linked != iter &&
linked->btree_id == iter->btree_id &&
btree_iter_cmp(linked, iter) <= 0 &&
linked->locks_want < new_locks_want) {
linked->locks_want = new_locks_want;
btree_iter_get_locks(linked, true);
btree_iter_get_locks(linked, true, false);
}
return false;
@ -389,28 +399,21 @@ void __bch2_btree_iter_downgrade(struct btree_iter *iter,
bch2_btree_trans_verify_locks(iter->trans);
}
int bch2_btree_iter_unlock(struct btree_iter *iter)
{
struct btree_iter *linked;
/* Btree transaction locking: */
trans_for_each_iter(iter->trans, linked)
__bch2_btree_iter_unlock(linked);
return btree_iter_err(iter);
}
bool bch2_btree_trans_relock(struct btree_trans *trans)
bool bch2_trans_relock(struct btree_trans *trans)
{
struct btree_iter *iter;
bool ret = true;
trans_for_each_iter(trans, iter)
ret &= bch2_btree_iter_relock(iter);
if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
ret &= bch2_btree_iter_relock(iter, true);
return ret;
}
void bch2_btree_trans_unlock(struct btree_trans *trans)
void bch2_trans_unlock(struct btree_trans *trans)
{
struct btree_iter *iter;
@ -418,8 +421,6 @@ void bch2_btree_trans_unlock(struct btree_trans *trans)
__bch2_btree_iter_unlock(iter);
}
/* Btree transaction locking: */
/* Btree iterator: */
#ifdef CONFIG_BCACHEFS_DEBUG
@ -824,7 +825,7 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
trans_for_each_iter(iter->trans, linked)
if (linked->l[level].b == b) {
__btree_node_unlock(linked, level);
linked->l[level].b = BTREE_ITER_NOT_END;
linked->l[level].b = BTREE_ITER_NO_NODE_DROP;
}
}
@ -862,26 +863,28 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
* that depth
*/
iter->level = depth_want;
iter->l[iter->level].b = NULL;
for (i = iter->level; i < BTREE_MAX_DEPTH; i++)
iter->l[i].b = NULL;
return 1;
}
lock_type = __btree_lock_want(iter, iter->level);
if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
iter, lock_type, true)))
iter, lock_type)))
return -EINTR;
if (likely(b == c->btree_roots[iter->btree_id].b &&
b->level == iter->level &&
!race_fault())) {
for (i = 0; i < iter->level; i++)
iter->l[i].b = BTREE_ITER_NOT_END;
iter->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
iter->l[iter->level].b = b;
for (i = iter->level + 1; i < BTREE_MAX_DEPTH; i++)
iter->l[i].b = NULL;
mark_btree_node_locked(iter, iter->level, lock_type);
btree_iter_node_set(iter, b);
return 0;
}
six_unlock_type(&b->lock, lock_type);
@ -932,7 +935,7 @@ static inline int btree_iter_down(struct btree_iter *iter)
bch2_bkey_unpack(l->b, &tmp.k,
bch2_btree_node_iter_peek(&l->iter, l->b));
b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, true);
b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type);
if (unlikely(IS_ERR(b)))
return PTR_ERR(b);
@ -971,7 +974,7 @@ static int __btree_iter_traverse_all(struct btree_trans *trans,
#undef btree_iter_cmp_by_idx
retry_all:
bch2_btree_trans_unlock(trans);
bch2_trans_unlock(trans);
if (unlikely(ret == -ENOMEM)) {
struct closure cl;
@ -987,7 +990,7 @@ retry_all:
if (unlikely(ret == -EIO)) {
trans->error = true;
iter->flags |= BTREE_ITER_ERROR;
iter->l[iter->level].b = BTREE_ITER_NOT_END;
iter->l[iter->level].b = BTREE_ITER_NO_NODE_ERROR;
goto out;
}
@ -1022,12 +1025,12 @@ static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
unsigned l = iter->level;
while (btree_iter_node(iter, l) &&
!(is_btree_node(iter, l) &&
bch2_btree_node_relock(iter, l) &&
(!check_pos ||
btree_iter_pos_in_node(iter, iter->l[l].b)))) {
(!is_btree_node(iter, l) ||
!bch2_btree_node_relock(iter, l) ||
(check_pos &&
!btree_iter_pos_in_node(iter, iter->l[l].b)))) {
btree_node_unlock(iter, l);
iter->l[l].b = BTREE_ITER_NOT_END;
iter->l[l].b = BTREE_ITER_NO_NODE_UP;
l++;
}
@ -1041,7 +1044,7 @@ static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
* Returns 0 on success, -EIO on error (error reading in a btree node).
*
* On error, caller (peek_node()/peek_key()) must return NULL; the error is
* stashed in the iterator and returned from bch2_btree_iter_unlock().
* stashed in the iterator and returned from bch2_trans_exit().
*/
int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
{
@ -1050,7 +1053,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
if (unlikely(iter->level >= BTREE_MAX_DEPTH))
return 0;
if (bch2_btree_iter_relock(iter))
if (bch2_btree_iter_relock(iter, false))
return 0;
/*
@ -1083,7 +1086,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
return 0;
iter->level = depth_want;
iter->l[iter->level].b = BTREE_ITER_NOT_END;
iter->l[iter->level].b = BTREE_ITER_NO_NODE_DOWN;
return ret;
}
}
@ -1099,7 +1102,8 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
{
int ret;
ret = __bch2_btree_iter_traverse(iter);
ret = bch2_trans_cond_resched(iter->trans) ?:
__bch2_btree_iter_traverse(iter);
if (unlikely(ret))
ret = __btree_iter_traverse_all(iter->trans, iter, ret);
@ -1111,7 +1115,7 @@ static inline void bch2_btree_iter_checks(struct btree_iter *iter,
{
EBUG_ON(iter->btree_id >= BTREE_ID_NR);
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
(iter->btree_id == BTREE_ID_EXTENTS &&
(btree_node_type_is_extents(iter->btree_id) &&
type != BTREE_ITER_NODES));
bch2_btree_trans_verify_locks(iter->trans);
@ -1291,9 +1295,11 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
return btree_iter_peek_uptodate(iter);
while (1) {
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
}
k = __btree_iter_peek(iter, l);
if (likely(k.k))
@ -1345,10 +1351,17 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
iter->pos = btree_type_successor(iter->btree_id, iter->k.p);
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
k = bch2_btree_iter_peek(iter);
if (IS_ERR_OR_NULL(k.k))
return k;
/*
* XXX: when we just need to relock we should be able to avoid
* calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
* for that to work
*/
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
return bch2_btree_iter_peek(iter);
}
do {
@ -1548,9 +1561,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (iter->uptodate == BTREE_ITER_UPTODATE)
return btree_iter_peek_uptodate(iter);
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
}
return __bch2_btree_iter_peek_slot(iter);
}
@ -1587,7 +1602,7 @@ static inline void bch2_btree_iter_init(struct btree_trans *trans,
struct bch_fs *c = trans->c;
unsigned i;
if (btree_id == BTREE_ID_EXTENTS &&
if (btree_node_type_is_extents(btree_id) &&
!(flags & BTREE_ITER_NODES))
flags |= BTREE_ITER_IS_EXTENTS;
@ -1604,7 +1619,7 @@ static inline void bch2_btree_iter_init(struct btree_trans *trans,
iter->nodes_intent_locked = 0;
for (i = 0; i < ARRAY_SIZE(iter->l); i++)
iter->l[i].b = NULL;
iter->l[iter->level].b = BTREE_ITER_NOT_END;
iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT;
prefetch(c->btree_roots[btree_id].b);
}
@ -1649,11 +1664,13 @@ int bch2_trans_iter_free_on_commit(struct btree_trans *trans,
return ret;
}
static int btree_trans_realloc_iters(struct btree_trans *trans,
unsigned new_size)
static int bch2_trans_realloc_iters(struct btree_trans *trans,
unsigned new_size)
{
void *new_iters, *new_updates;
new_size = roundup_pow_of_two(new_size);
BUG_ON(new_size > BTREE_ITER_MAX);
if (new_size <= trans->size)
@ -1694,19 +1711,13 @@ success:
trans->size = new_size;
if (trans->iters_live) {
trans_restart();
trace_trans_restart_iters_realloced(trans->c, trans->ip);
trace_trans_restart_iters_realloced(trans->ip, trans->size);
return -EINTR;
}
return 0;
}
void bch2_trans_preload_iters(struct btree_trans *trans)
{
btree_trans_realloc_iters(trans, BTREE_ITER_MAX);
}
static int btree_trans_iter_alloc(struct btree_trans *trans)
{
unsigned idx = __ffs64(~trans->iters_linked);
@ -1715,7 +1726,7 @@ static int btree_trans_iter_alloc(struct btree_trans *trans)
goto got_slot;
if (trans->nr_iters == trans->size) {
int ret = btree_trans_realloc_iters(trans, trans->size * 2);
int ret = bch2_trans_realloc_iters(trans, trans->size * 2);
if (ret)
return ret;
}
@ -1812,7 +1823,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
for (i = 0; i < ARRAY_SIZE(iter->l); i++)
iter->l[i].b = NULL;
iter->l[iter->level].b = BTREE_ITER_NOT_END;
iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT;
return iter;
}
@ -1845,50 +1856,40 @@ struct btree_iter *bch2_trans_copy_iter(struct btree_trans *trans,
return &trans->iters[idx];
}
void *bch2_trans_kmalloc(struct btree_trans *trans,
size_t size)
static int bch2_trans_preload_mem(struct btree_trans *trans, size_t size)
{
void *ret;
if (trans->mem_top + size > trans->mem_bytes) {
if (size > trans->mem_bytes) {
size_t old_bytes = trans->mem_bytes;
size_t new_bytes = roundup_pow_of_two(trans->mem_top + size);
size_t new_bytes = roundup_pow_of_two(size);
void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
if (!new_mem)
return ERR_PTR(-ENOMEM);
return -ENOMEM;
trans->mem = new_mem;
trans->mem_bytes = new_bytes;
if (old_bytes) {
trans_restart();
trace_trans_restart_mem_realloced(trans->c, trans->ip);
return ERR_PTR(-EINTR);
trace_trans_restart_mem_realloced(trans->ip, new_bytes);
return -EINTR;
}
}
ret = trans->mem + trans->mem_top;
trans->mem_top += size;
return ret;
return 0;
}
int bch2_trans_unlock(struct btree_trans *trans)
void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
{
u64 iters = trans->iters_linked;
int ret = 0;
void *p;
int ret;
while (iters) {
unsigned idx = __ffs64(iters);
struct btree_iter *iter = &trans->iters[idx];
ret = bch2_trans_preload_mem(trans, trans->mem_top + size);
if (ret)
return ERR_PTR(ret);
ret = ret ?: btree_iter_err(iter);
__bch2_btree_iter_unlock(iter);
iters ^= 1ULL << idx;
}
return ret;
p = trans->mem + trans->mem_top;
trans->mem_top += size;
return p;
}
inline void bch2_trans_unlink_iters(struct btree_trans *trans, u64 iters)
@ -1904,7 +1905,7 @@ inline void bch2_trans_unlink_iters(struct btree_trans *trans, u64 iters)
}
}
void __bch2_trans_begin(struct btree_trans *trans)
void bch2_trans_begin(struct btree_trans *trans)
{
u64 iters_to_unlink;
@ -1935,7 +1936,9 @@ void __bch2_trans_begin(struct btree_trans *trans)
bch2_btree_iter_traverse_all(trans);
}
void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c)
void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
unsigned expected_nr_iters,
size_t expected_mem_bytes)
{
memset(trans, 0, offsetof(struct btree_trans, iters_onstack));
@ -1944,12 +1947,20 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c)
trans->size = ARRAY_SIZE(trans->iters_onstack);
trans->iters = trans->iters_onstack;
trans->updates = trans->updates_onstack;
trans->fs_usage_deltas = NULL;
if (expected_nr_iters > trans->size)
bch2_trans_realloc_iters(trans, expected_nr_iters);
if (expected_mem_bytes)
bch2_trans_preload_mem(trans, expected_mem_bytes);
}
int bch2_trans_exit(struct btree_trans *trans)
{
bch2_trans_unlock(trans);
kfree(trans->fs_usage_deltas);
kfree(trans->mem);
if (trans->used_mempool)
mempool_free(trans->iters, &trans->c->btree_iters_pool);

View File

@ -18,6 +18,19 @@ static inline struct btree *btree_iter_node(struct btree_iter *iter,
return level < BTREE_MAX_DEPTH ? iter->l[level].b : NULL;
}
static inline bool btree_node_lock_seq_matches(const struct btree_iter *iter,
const struct btree *b, unsigned level)
{
/*
* We don't compare the low bits of the lock sequence numbers because
* @iter might have taken a write lock on @b, and we don't want to skip
* the linked iterator if the sequence numbers were equal before taking
* that write lock. The lock sequence number is incremented by taking
* and releasing write locks and is even when unlocked:
*/
return iter->l[level].lock_seq >> 1 == b->lock.state.seq >> 1;
}
static inline struct btree *btree_node_parent(struct btree_iter *iter,
struct btree *b)
{
@ -56,30 +69,20 @@ __trans_next_iter(struct btree_trans *trans, unsigned idx)
static inline bool __iter_has_node(const struct btree_iter *iter,
const struct btree *b)
{
/*
* We don't compare the low bits of the lock sequence numbers because
* @iter might have taken a write lock on @b, and we don't want to skip
* the linked iterator if the sequence numbers were equal before taking
* that write lock. The lock sequence number is incremented by taking
* and releasing write locks and is even when unlocked:
*/
return iter->l[b->level].b == b &&
iter->l[b->level].lock_seq >> 1 == b->lock.state.seq >> 1;
btree_node_lock_seq_matches(iter, b, b->level);
}
static inline struct btree_iter *
__trans_next_iter_with_node(struct btree_trans *trans, struct btree *b,
unsigned idx)
{
EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx);
struct btree_iter *iter = __trans_next_iter(trans, idx);
for (; idx < trans->nr_iters; idx++)
if ((trans->iters_linked & (1ULL << idx)) &&
__iter_has_node(&trans->iters[idx], b))
return &trans->iters[idx];
while (iter && !__iter_has_node(iter, b))
iter = __trans_next_iter(trans, iter->idx + 1);
return NULL;
return iter;
}
#define trans_for_each_iter_with_node(_trans, _b, _iter) \
@ -101,22 +104,19 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
struct btree_node_iter *, struct bkey_packed *,
unsigned, unsigned);
int bch2_btree_iter_unlock(struct btree_iter *);
bool bch2_btree_trans_relock(struct btree_trans *);
void bch2_btree_trans_unlock(struct btree_trans *);
bool bch2_trans_relock(struct btree_trans *);
void bch2_trans_unlock(struct btree_trans *);
bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
unsigned new_locks_want,
bool may_drop_locks)
unsigned new_locks_want)
{
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
return iter->locks_want < new_locks_want
? (may_drop_locks
? (!iter->trans->nounlock
? __bch2_btree_iter_upgrade(iter, new_locks_want)
: __bch2_btree_iter_upgrade_nounlock(iter, new_locks_want))
: iter->uptodate <= BTREE_ITER_NEED_PEEK;
@ -157,7 +157,7 @@ static inline struct bpos btree_type_successor(enum btree_id id,
if (id == BTREE_ID_INODES) {
pos.inode++;
pos.offset = 0;
} else if (id != BTREE_ID_EXTENTS) {
} else if (!btree_node_type_is_extents(id)) {
pos = bkey_successor(pos);
}
@ -170,7 +170,7 @@ static inline struct bpos btree_type_predecessor(enum btree_id id,
if (id == BTREE_ID_INODES) {
--pos.inode;
pos.offset = 0;
} else /* if (id != BTREE_ID_EXTENTS) */ {
} else {
pos = bkey_predecessor(pos);
}
@ -192,19 +192,18 @@ static inline int btree_iter_cmp(const struct btree_iter *l,
return __btree_iter_cmp(l->btree_id, l->pos, r);
}
int bch2_trans_unlock(struct btree_trans *);
/*
* Unlocks before scheduling
* Note: does not revalidate iterator
*/
static inline void bch2_trans_cond_resched(struct btree_trans *trans)
static inline int bch2_trans_cond_resched(struct btree_trans *trans)
{
if (need_resched()) {
if (need_resched() || race_fault()) {
bch2_trans_unlock(trans);
schedule();
} else if (race_fault()) {
bch2_trans_unlock(trans);
return bch2_trans_relock(trans) ? 0 : -EINTR;
} else {
return 0;
}
}
@ -232,8 +231,6 @@ static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
unsigned flags)
{
bch2_trans_cond_resched(iter->trans);
return flags & BTREE_ITER_SLOTS
? bch2_btree_iter_next_slot(iter)
: bch2_btree_iter_next(iter);
@ -262,7 +259,6 @@ static inline int bkey_err(struct bkey_s_c k)
/* new multiple iterator interface: */
void bch2_trans_preload_iters(struct btree_trans *);
int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *);
int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
int bch2_trans_iter_free_on_commit(struct btree_trans *, struct btree_iter *);
@ -297,7 +293,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *,
enum btree_id, struct bpos,
unsigned, unsigned, unsigned);
void __bch2_trans_begin(struct btree_trans *);
void bch2_trans_begin(struct btree_trans *);
static inline void bch2_trans_begin_updates(struct btree_trans *trans)
{
@ -305,27 +301,7 @@ static inline void bch2_trans_begin_updates(struct btree_trans *trans)
}
void *bch2_trans_kmalloc(struct btree_trans *, size_t);
void bch2_trans_init(struct btree_trans *, struct bch_fs *);
void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
int bch2_trans_exit(struct btree_trans *);
#ifdef TRACE_TRANSACTION_RESTARTS
#define bch2_trans_begin(_trans) \
do { \
if (is_power_of_2((_trans)->nr_restarts) && \
(_trans)->nr_restarts >= 8) \
pr_info("nr restarts: %zu", (_trans)->nr_restarts); \
\
(_trans)->nr_restarts++; \
__bch2_trans_begin(_trans); \
} while (0)
#else
#define bch2_trans_begin(_trans) __bch2_trans_begin(_trans)
#endif
#ifdef TRACE_TRANSACTION_RESTARTS_ALL
#define trans_restart(...) pr_info("transaction restart" __VA_ARGS__)
#else
#define trans_restart(...) no_printk("transaction restart" __VA_ARGS__)
#endif
#endif /* _BCACHEFS_BTREE_ITER_H */

View File

@ -107,7 +107,7 @@ static inline void __btree_node_unlock(struct btree_iter *iter, unsigned level)
static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
{
BUG_ON(!level && iter->flags & BTREE_ITER_NOUNLOCK);
EBUG_ON(!level && iter->trans->nounlock);
__btree_node_unlock(iter, level);
}
@ -175,20 +175,18 @@ static inline bool btree_node_lock_increment(struct btree_iter *iter,
}
bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
struct btree_iter *, enum six_lock_type, bool);
struct btree_iter *, enum six_lock_type);
static inline bool btree_node_lock(struct btree *b, struct bpos pos,
unsigned level,
struct btree_iter *iter,
enum six_lock_type type,
bool may_drop_locks)
enum six_lock_type type)
{
EBUG_ON(level >= BTREE_MAX_DEPTH);
return likely(six_trylock_type(&b->lock, type)) ||
btree_node_lock_increment(iter, b, level, type) ||
__bch2_btree_node_lock(b, pos, level, iter,
type, may_drop_locks);
__bch2_btree_node_lock(b, pos, level, iter, type);
}
bool __bch2_btree_node_relock(struct btree_iter *, unsigned);

View File

@ -193,7 +193,6 @@ enum btree_iter_type {
*/
#define BTREE_ITER_IS_EXTENTS (1 << 4)
#define BTREE_ITER_ERROR (1 << 5)
#define BTREE_ITER_NOUNLOCK (1 << 6)
enum btree_iter_uptodate {
BTREE_ITER_UPTODATE = 0,
@ -269,7 +268,6 @@ struct btree_insert_entry {
struct btree_trans {
struct bch_fs *c;
unsigned long ip;
size_t nr_restarts;
u64 commit_start;
u64 iters_linked;
@ -283,6 +281,7 @@ struct btree_trans {
u8 size;
unsigned used_mempool:1;
unsigned error:1;
unsigned nounlock:1;
unsigned mem_top;
unsigned mem_bytes;
@ -297,11 +296,12 @@ struct btree_trans {
u64 *journal_seq;
struct disk_reservation *disk_res;
unsigned flags;
unsigned journal_u64s;
struct btree_iter iters_onstack[2];
struct btree_insert_entry updates_onstack[6];
struct replicas_delta_list fs_usage_deltas;
struct replicas_delta_list *fs_usage_deltas;
};
#define BTREE_FLAG(flag) \

View File

@ -47,6 +47,7 @@ enum {
__BTREE_INSERT_NOMARK,
__BTREE_INSERT_MARK_INMEM,
__BTREE_INSERT_NO_CLEAR_REPLICAS,
__BTREE_INSERT_BUCKET_INVALIDATE,
__BTREE_INSERT_NOWAIT,
__BTREE_INSERT_GC_LOCK_HELD,
__BCH_HASH_SET_MUST_CREATE,
@ -93,6 +94,8 @@ enum {
#define BTREE_INSERT_NO_CLEAR_REPLICAS (1 << __BTREE_INSERT_NO_CLEAR_REPLICAS)
#define BTREE_INSERT_BUCKET_INVALIDATE (1 << __BTREE_INSERT_BUCKET_INVALIDATE)
/* Don't block on allocation failure (for new btree nodes: */
#define BTREE_INSERT_NOWAIT (1 << __BTREE_INSERT_NOWAIT)
#define BTREE_INSERT_GC_LOCK_HELD (1 << __BTREE_INSERT_GC_LOCK_HELD)
@ -105,6 +108,8 @@ int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
struct disk_reservation *, u64 *, int flags);
int bch2_btree_delete_at_range(struct btree_trans *, struct btree_iter *,
struct bpos, u64 *);
int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
struct bpos, struct bpos, u64 *);
@ -125,7 +130,7 @@ struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
struct btree_trans trans; \
int _ret; \
\
bch2_trans_init(&trans, (_c)); \
bch2_trans_init(&trans, (_c), 0, 0); \
\
do { \
bch2_trans_begin(&trans); \

View File

@ -193,7 +193,9 @@ found:
: gc_pos_btree_root(as->btree_id)) >= 0 &&
gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0)
bch2_mark_key_locked(c, bkey_i_to_s_c(&d->key),
false, 0, NULL, 0, BCH_BUCKET_MARK_GC);
0, NULL, 0,
BCH_BUCKET_MARK_OVERWRITE|
BCH_BUCKET_MARK_GC);
}
static void __btree_node_free(struct bch_fs *c, struct btree *b)
@ -263,13 +265,13 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
{
BUG_ON(!pending->index_update_done);
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
false, 0,
NULL, 0, 0);
bch2_mark_key(c, bkey_i_to_s_c(&pending->key), 0, NULL, 0,
BCH_BUCKET_MARK_OVERWRITE);
if (gc_visited(c, gc_phase(GC_PHASE_PENDING_DELETE)))
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
false, 0, NULL, 0, BCH_BUCKET_MARK_GC);
bch2_mark_key(c, bkey_i_to_s_c(&pending->key), 0, NULL, 0,
BCH_BUCKET_MARK_OVERWRITE|
BCH_BUCKET_MARK_GC);
}
static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
@ -1074,10 +1076,12 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
true, 0, fs_usage, 0, 0);
0, fs_usage, 0,
BCH_BUCKET_MARK_INSERT);
if (gc_visited(c, gc_pos_btree_root(b->btree_id)))
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
true, 0, NULL, 0,
0, NULL, 0,
BCH_BUCKET_MARK_INSERT|
BCH_BUCKET_MARK_GC);
if (old && !btree_node_fake(old))
@ -1170,11 +1174,14 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
true, 0, fs_usage, 0, 0);
0, fs_usage, 0,
BCH_BUCKET_MARK_INSERT);
if (gc_visited(c, gc_pos_btree_node(b)))
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
true, 0, NULL, 0, BCH_BUCKET_MARK_GC);
0, NULL, 0,
BCH_BUCKET_MARK_INSERT|
BCH_BUCKET_MARK_GC);
while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
@ -1550,6 +1557,7 @@ split:
int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
unsigned flags)
{
struct btree_trans *trans = iter->trans;
struct btree *b = iter->l[0].b;
struct btree_update *as;
struct closure cl;
@ -1560,7 +1568,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
* We already have a disk reservation and open buckets pinned; this
* allocation must not block:
*/
trans_for_each_iter(iter->trans, linked)
trans_for_each_iter(trans, linked)
if (linked->btree_id == BTREE_ID_EXTENTS)
flags |= BTREE_INSERT_USE_RESERVE;
@ -1572,10 +1580,10 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
if (flags & BTREE_INSERT_NOUNLOCK)
return -EINTR;
bch2_btree_trans_unlock(iter->trans);
bch2_trans_unlock(trans);
down_read(&c->gc_lock);
if (!bch2_btree_trans_relock(iter->trans))
if (!bch2_trans_relock(trans))
ret = -EINTR;
}
@ -1583,8 +1591,8 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
* XXX: figure out how far we might need to split,
* instead of locking/reserving all the way to the root:
*/
if (!bch2_btree_iter_upgrade(iter, U8_MAX,
!(flags & BTREE_INSERT_NOUNLOCK))) {
if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
trace_trans_restart_iter_upgrade(trans->ip);
ret = -EINTR;
goto out;
}
@ -1596,7 +1604,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
ret = PTR_ERR(as);
if (ret == -EAGAIN) {
BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
bch2_btree_iter_unlock(iter);
bch2_trans_unlock(trans);
ret = -EINTR;
}
goto out;
@ -1623,6 +1631,7 @@ void __bch2_foreground_maybe_merge(struct bch_fs *c,
unsigned flags,
enum btree_node_sibling sib)
{
struct btree_trans *trans = iter->trans;
struct btree_update *as;
struct bkey_format_state new_s;
struct bkey_format new_f;
@ -1646,8 +1655,7 @@ retry:
goto out;
/* XXX: can't be holding read locks */
m = bch2_btree_node_get_sibling(c, iter, b,
!(flags & BTREE_INSERT_NOUNLOCK), sib);
m = bch2_btree_node_get_sibling(c, iter, b, sib);
if (IS_ERR(m)) {
ret = PTR_ERR(m);
goto err;
@ -1694,8 +1702,7 @@ retry:
!down_read_trylock(&c->gc_lock))
goto err_cycle_gc_lock;
if (!bch2_btree_iter_upgrade(iter, U8_MAX,
!(flags & BTREE_INSERT_NOUNLOCK))) {
if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
ret = -EINTR;
goto err_unlock;
}
@ -1757,7 +1764,7 @@ retry:
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
up_read(&c->gc_lock);
out:
bch2_btree_trans_verify_locks(iter->trans);
bch2_btree_trans_verify_locks(trans);
/*
* Don't downgrade locks here: we're called after successful insert,
@ -1777,7 +1784,7 @@ err_cycle_gc_lock:
if (flags & BTREE_INSERT_NOUNLOCK)
goto out;
bch2_btree_iter_unlock(iter);
bch2_trans_unlock(trans);
down_read(&c->gc_lock);
up_read(&c->gc_lock);
@ -1793,7 +1800,7 @@ err:
if ((ret == -EAGAIN || ret == -EINTR) &&
!(flags & BTREE_INSERT_NOUNLOCK)) {
bch2_btree_iter_unlock(iter);
bch2_trans_unlock(trans);
closure_sync(&cl);
ret = bch2_btree_iter_traverse(iter);
if (ret)
@ -1860,6 +1867,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
__le64 seq, unsigned flags)
{
struct btree_trans *trans = iter->trans;
struct closure cl;
struct btree *b;
int ret;
@ -1868,11 +1876,11 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
closure_init_stack(&cl);
bch2_btree_iter_upgrade(iter, U8_MAX, true);
bch2_btree_iter_upgrade(iter, U8_MAX);
if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) {
if (!down_read_trylock(&c->gc_lock)) {
bch2_btree_iter_unlock(iter);
bch2_trans_unlock(trans);
down_read(&c->gc_lock);
}
}
@ -1891,7 +1899,7 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
ret != -EINTR)
break;
bch2_btree_iter_unlock(iter);
bch2_trans_unlock(trans);
closure_sync(&cl);
}
@ -1994,10 +2002,12 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
true, 0, fs_usage, 0, 0);
0, fs_usage, 0,
BCH_BUCKET_MARK_INSERT);
if (gc_visited(c, gc_pos_btree_root(b->btree_id)))
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
true, 0, NULL, 0,
0, NULL, 0,
BCH_BUCKET_MARK_INSERT||
BCH_BUCKET_MARK_GC);
bch2_btree_node_free_index(as, NULL,
@ -2040,14 +2050,14 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
closure_init_stack(&cl);
if (!bch2_btree_iter_upgrade(iter, U8_MAX, true))
if (!bch2_btree_iter_upgrade(iter, U8_MAX))
return -EINTR;
if (!down_read_trylock(&c->gc_lock)) {
bch2_btree_trans_unlock(iter->trans);
bch2_trans_unlock(iter->trans);
down_read(&c->gc_lock);
if (!bch2_btree_trans_relock(iter->trans)) {
if (!bch2_trans_relock(iter->trans)) {
ret = -EINTR;
goto err;
}
@ -2058,12 +2068,12 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
/* bch2_btree_reserve_get will unlock */
ret = bch2_btree_cache_cannibalize_lock(c, &cl);
if (ret) {
bch2_btree_trans_unlock(iter->trans);
bch2_trans_unlock(iter->trans);
up_read(&c->gc_lock);
closure_sync(&cl);
down_read(&c->gc_lock);
if (!bch2_btree_trans_relock(iter->trans)) {
if (!bch2_trans_relock(iter->trans)) {
ret = -EINTR;
goto err;
}
@ -2087,12 +2097,12 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
if (ret != -EINTR)
goto err;
bch2_btree_trans_unlock(iter->trans);
bch2_trans_unlock(iter->trans);
up_read(&c->gc_lock);
closure_sync(&cl);
down_read(&c->gc_lock);
if (!bch2_btree_trans_relock(iter->trans))
if (!bch2_trans_relock(iter->trans))
goto err;
}

View File

@ -430,16 +430,15 @@ static int bch2_trans_journal_preres_get(struct btree_trans *trans)
if (ret != -EAGAIN)
return ret;
bch2_btree_trans_unlock(trans);
bch2_trans_unlock(trans);
ret = bch2_journal_preres_get(&c->journal,
&trans->journal_preres, u64s, 0);
if (ret)
return ret;
if (!bch2_btree_trans_relock(trans)) {
trans_restart(" (iter relock after journal preres get blocked)");
trace_trans_restart_journal_preres_get(c, trans->ip);
if (!bch2_trans_relock(trans)) {
trace_trans_restart_journal_preres_get(trans->ip);
return -EINTR;
}
@ -450,21 +449,13 @@ static int bch2_trans_journal_res_get(struct btree_trans *trans,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
unsigned u64s = 0;
int ret;
if (unlikely(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
return 0;
if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
flags |= JOURNAL_RES_GET_RESERVED;
trans_for_each_update(trans, i)
u64s += jset_u64s(i->k->k.u64s);
ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
u64s, flags);
trans->journal_u64s, flags);
return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
}
@ -550,33 +541,29 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct bch_fs_usage *fs_usage = NULL;
struct btree_insert_entry *i;
struct btree_iter *linked;
unsigned mark_flags = trans->flags & BTREE_INSERT_BUCKET_INVALIDATE
? BCH_BUCKET_MARK_BUCKET_INVALIDATE
: 0;
int ret;
if (likely(!(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS))) {
memset(&trans->fs_usage_deltas.fs_usage, 0,
sizeof(trans->fs_usage_deltas.fs_usage));
trans->fs_usage_deltas.top = trans->fs_usage_deltas.d;
}
trans_for_each_update_iter(trans, i)
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
trans_for_each_update_iter(trans, i)
if (update_has_triggers(trans, i) &&
update_triggers_transactional(trans, i)) {
ret = bch2_trans_mark_update(trans, i,
&trans->fs_usage_deltas);
ret = bch2_trans_mark_update(trans, i);
if (ret == -EINTR)
trace_trans_restart_mark(trans->ip);
if (ret)
return ret;
goto out_clear_replicas;
}
btree_trans_lock_write(c, trans);
if (race_fault()) {
ret = -EINTR;
trans_restart(" (race)");
trace_trans_restart_fault_inject(c, trans->ip);
trace_trans_restart_fault_inject(trans->ip);
goto out;
}
@ -610,9 +597,16 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
* Don't get journal reservation until after we know insert will
* succeed:
*/
ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_NONBLOCK);
if (ret)
goto out;
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
trans->journal_u64s = 0;
trans_for_each_update(trans, i)
trans->journal_u64s += jset_u64s(i->k->k.u64s);
ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_NONBLOCK);
if (ret)
goto out;
}
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
if (journal_seq_verify(c))
@ -623,33 +617,24 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
i->k->k.version = MAX_VERSION;
}
if (trans->flags & BTREE_INSERT_NOUNLOCK) {
/*
* linked iterators that weren't being updated may or may not
* have been traversed/locked, depending on what the caller was
* doing:
*/
trans_for_each_iter(trans, linked)
if (linked->uptodate < BTREE_ITER_NEED_RELOCK)
linked->flags |= BTREE_ITER_NOUNLOCK;
}
trans_for_each_update_iter(trans, i)
if (update_has_triggers(trans, i) &&
!update_triggers_transactional(trans, i))
bch2_mark_update(trans, i, fs_usage, 0);
bch2_mark_update(trans, i, fs_usage, mark_flags);
if (fs_usage) {
if (fs_usage && trans->fs_usage_deltas)
bch2_replicas_delta_list_apply(c, fs_usage,
&trans->fs_usage_deltas);
trans->fs_usage_deltas);
if (fs_usage)
bch2_trans_fs_usage_apply(trans, fs_usage);
}
if (likely(!(trans->flags & BTREE_INSERT_NOMARK)) &&
unlikely(c->gc_pos.phase))
trans_for_each_update_iter(trans, i)
if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
bch2_mark_update(trans, i, NULL,
mark_flags|
BCH_BUCKET_MARK_GC);
trans_for_each_update(trans, i)
@ -667,6 +652,12 @@ out:
}
bch2_journal_res_put(&c->journal, &trans->journal_res);
out_clear_replicas:
if (trans->fs_usage_deltas) {
memset(&trans->fs_usage_deltas->fs_usage, 0,
sizeof(trans->fs_usage_deltas->fs_usage));
trans->fs_usage_deltas->used = 0;
}
return ret;
}
@ -725,9 +716,10 @@ int bch2_trans_commit_error(struct btree_trans *trans,
* don't care if we got ENOSPC because we told split it
* couldn't block:
*/
if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) {
trans_restart(" (split)");
trace_trans_restart_btree_node_split(c, trans->ip);
if (!ret ||
ret == -EINTR ||
(flags & BTREE_INSERT_NOUNLOCK)) {
trace_trans_restart_btree_node_split(trans->ip);
ret = -EINTR;
}
break;
@ -743,25 +735,23 @@ int bch2_trans_commit_error(struct btree_trans *trans,
return ret;
}
if (bch2_btree_trans_relock(trans))
if (bch2_trans_relock(trans))
return 0;
trans_restart(" (iter relock after marking replicas)");
trace_trans_restart_mark_replicas(c, trans->ip);
trace_trans_restart_mark_replicas(trans->ip);
ret = -EINTR;
break;
case BTREE_INSERT_NEED_JOURNAL_RES:
bch2_btree_trans_unlock(trans);
bch2_trans_unlock(trans);
ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
if (ret)
return ret;
if (bch2_btree_trans_relock(trans))
if (bch2_trans_relock(trans))
return 0;
trans_restart(" (iter relock after journal res get blocked)");
trace_trans_restart_journal_res_get(c, trans->ip);
trace_trans_restart_journal_res_get(trans->ip);
ret = -EINTR;
break;
default:
@ -773,8 +763,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
int ret2 = bch2_btree_iter_traverse_all(trans);
if (ret2) {
trans_restart(" (traverse)");
trace_trans_restart_traverse(c, trans->ip);
trace_trans_restart_traverse(trans->ip);
return ret2;
}
@ -785,8 +774,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
if (!(flags & BTREE_INSERT_ATOMIC))
return 0;
trans_restart(" (atomic)");
trace_trans_restart_atomic(c, trans->ip);
trace_trans_restart_atomic(trans->ip);
}
return ret;
@ -808,16 +796,11 @@ static int __bch2_trans_commit(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
struct btree_iter *linked;
int ret;
trans_for_each_update_iter(trans, i) {
unsigned old_locks_want = i->iter->locks_want;
unsigned old_uptodate = i->iter->uptodate;
if (!bch2_btree_iter_upgrade(i->iter, 1, true)) {
trans_restart(" (failed upgrade, locks_want %u uptodate %u)",
old_locks_want, old_uptodate);
if (!bch2_btree_iter_upgrade(i->iter, 1)) {
trace_trans_restart_upgrade(trans->ip);
ret = -EINTR;
goto err;
}
@ -831,18 +814,20 @@ static int __bch2_trans_commit(struct btree_trans *trans,
if (unlikely(ret))
goto err;
if (trans->flags & BTREE_INSERT_NOUNLOCK)
trans->nounlock = true;
trans_for_each_update_leaf(trans, i)
bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
trans->nounlock = false;
trans_for_each_update_iter(trans, i)
bch2_btree_iter_downgrade(i->iter);
err:
/* make sure we didn't drop or screw up locks: */
bch2_btree_trans_verify_locks(trans);
trans_for_each_iter(trans, linked)
linked->flags &= ~BTREE_ITER_NOUNLOCK;
return ret;
}
@ -883,7 +868,7 @@ int bch2_trans_commit(struct btree_trans *trans,
if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))
return -EROFS;
bch2_btree_trans_unlock(trans);
bch2_trans_unlock(trans);
ret = bch2_fs_read_write_early(c);
if (ret)
@ -891,7 +876,7 @@ int bch2_trans_commit(struct btree_trans *trans,
percpu_ref_get(&c->writes);
if (!bch2_btree_trans_relock(trans)) {
if (!bch2_trans_relock(trans)) {
ret = -EINTR;
goto err;
}
@ -965,20 +950,6 @@ struct btree_insert_entry *bch2_trans_update(struct btree_trans *trans,
return i;
}
int bch2_btree_delete_at(struct btree_trans *trans,
struct btree_iter *iter, unsigned flags)
{
struct bkey_i k;
bkey_init(&k.k);
k.k.p = iter->pos;
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &k));
return bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|flags);
}
/**
* bch2_btree_insert - insert keys into the extent btree
* @c: pointer to struct bch_fs
@ -995,7 +966,9 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
struct btree_iter *iter;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
iter = bch2_trans_get_iter(&trans, id, bkey_start_pos(&k->k),
BTREE_ITER_INTENT);
@ -1003,35 +976,24 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, k));
ret = bch2_trans_commit(&trans, disk_res, journal_seq, flags);
if (ret == -EINTR)
goto retry;
bch2_trans_exit(&trans);
return ret;
}
/*
* bch_btree_delete_range - delete everything within a given range
*
* Range is a half open interval - [start, end)
*/
int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
struct bpos start, struct bpos end,
u64 *journal_seq)
int bch2_btree_delete_at_range(struct btree_trans *trans,
struct btree_iter *iter,
struct bpos end,
u64 *journal_seq)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
retry:
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k)) &&
bkey_cmp(iter->pos, end) < 0) {
unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
/* really shouldn't be using a bare, unpadded bkey_i */
struct bkey_i delete;
bkey_init(&delete.k);
@ -1049,26 +1011,72 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
delete.k.p = iter->pos;
if (iter->flags & BTREE_ITER_IS_EXTENTS) {
unsigned max_sectors =
KEY_SIZE_MAX & (~0 << trans->c->block_bits);
/* create the biggest key we can */
bch2_key_resize(&delete.k, max_sectors);
bch2_cut_back(end, &delete.k);
bch2_extent_trim_atomic(&delete, iter);
}
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &delete));
ret = bch2_trans_commit(&trans, NULL, journal_seq,
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &delete));
ret = bch2_trans_commit(trans, NULL, journal_seq,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL);
if (ret == -EINTR)
ret = 0;
if (ret)
break;
bch2_trans_cond_resched(&trans);
bch2_trans_cond_resched(trans);
}
bch2_trans_exit(&trans);
if (ret == -EINTR) {
ret = 0;
goto retry;
}
return ret;
}
int bch2_btree_delete_at(struct btree_trans *trans,
struct btree_iter *iter, unsigned flags)
{
struct bkey_i k;
bkey_init(&k.k);
k.k.p = iter->pos;
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &k));
return bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|flags);
}
/*
* bch_btree_delete_range - delete everything within a given range
*
* Range is a half open interval - [start, end)
*/
int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
struct bpos start, struct bpos end,
u64 *journal_seq)
{
struct btree_trans trans;
struct btree_iter *iter;
int ret = 0;
/*
* XXX: whether we need mem/more iters depends on whether this btree id
* has triggers
*/
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512);
iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
ret = bch2_btree_delete_at_range(&trans, iter, end, journal_seq);
ret = bch2_trans_exit(&trans) ?: ret;
BUG_ON(ret == -EINTR);
return ret;
}

View File

@ -495,9 +495,11 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c)
buckets = bucket_array(ca);
preempt_disable();
for_each_bucket(g, buckets)
bch2_dev_usage_update(c, ca, c->usage_base,
old, g->mark, false);
preempt_enable();
}
}
@ -544,6 +546,67 @@ static inline void update_cached_sectors(struct bch_fs *c,
update_replicas(c, fs_usage, &r.e, sectors);
}
static struct replicas_delta_list *
replicas_deltas_realloc(struct btree_trans *trans, unsigned more)
{
struct replicas_delta_list *d = trans->fs_usage_deltas;
unsigned new_size = d ? (d->size + more) * 2 : 128;
if (!d || d->used + more > d->size) {
d = krealloc(d, sizeof(*d) + new_size, GFP_NOIO|__GFP_ZERO);
BUG_ON(!d);
d->size = new_size;
trans->fs_usage_deltas = d;
}
return d;
}
static inline void update_replicas_list(struct btree_trans *trans,
struct bch_replicas_entry *r,
s64 sectors)
{
struct replicas_delta_list *d;
struct replicas_delta *n;
unsigned b = replicas_entry_bytes(r) + 8;
d = replicas_deltas_realloc(trans, b);
n = (void *) d->d + d->used;
n->delta = sectors;
memcpy(&n->r, r, replicas_entry_bytes(r));
d->used += b;
}
static inline void update_cached_sectors_list(struct btree_trans *trans,
unsigned dev, s64 sectors)
{
struct bch_replicas_padded r;
bch2_replicas_entry_cached(&r.e, dev);
update_replicas_list(trans, &r.e, sectors);
}
void bch2_replicas_delta_list_apply(struct bch_fs *c,
struct bch_fs_usage *fs_usage,
struct replicas_delta_list *r)
{
struct replicas_delta *d = r->d;
struct replicas_delta *top = (void *) r->d + r->used;
acc_u64s((u64 *) fs_usage,
(u64 *) &r->fs_usage, sizeof(*fs_usage) / sizeof(u64));
while (d != top) {
BUG_ON((void *) d > (void *) top);
update_replicas(c, fs_usage, &d->r, d->delta);
d = (void *) d + replicas_entry_bytes(&d->r) + 8;
}
}
#define do_mark_fn(fn, c, pos, flags, ...) \
({ \
int gc, ret = 0; \
@ -623,23 +686,20 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
}
static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
bool inserting,
struct bch_fs_usage *fs_usage,
unsigned journal_seq, unsigned flags,
bool gc)
u64 journal_seq, unsigned flags)
{
bool gc = flags & BCH_BUCKET_MARK_GC;
struct bkey_alloc_unpacked u;
struct bch_dev *ca;
struct bucket *g;
struct bucket_mark old, m;
if (!inserting)
return 0;
/*
* alloc btree is read in by bch2_alloc_read, not gc:
*/
if (flags & BCH_BUCKET_MARK_GC)
if ((flags & BCH_BUCKET_MARK_GC) &&
!(flags & BCH_BUCKET_MARK_BUCKET_INVALIDATE))
return 0;
ca = bch_dev_bkey_exists(c, k.k->p.inode);
@ -650,18 +710,21 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
g = __bucket(ca, k.k->p.offset, gc);
u = bch2_alloc_unpack(k);
old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({
old = bucket_cmpxchg(g, m, ({
m.gen = u.gen;
m.data_type = u.data_type;
m.dirty_sectors = u.dirty_sectors;
m.cached_sectors = u.cached_sectors;
if (!(flags & BCH_BUCKET_MARK_GC)) {
if (journal_seq) {
m.journal_seq_valid = 1;
m.journal_seq = journal_seq;
}
}));
if (!(flags & BCH_BUCKET_MARK_ALLOC_READ))
bch2_dev_usage_update(c, ca, fs_usage, old, m, gc);
g->io_time[READ] = u.read_time;
g->io_time[WRITE] = u.write_time;
g->oldest_gen = u.oldest_gen;
@ -672,7 +735,8 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
* not:
*/
if (old.cached_sectors) {
if ((flags & BCH_BUCKET_MARK_BUCKET_INVALIDATE) &&
old.cached_sectors) {
update_cached_sectors(c, fs_usage, ca->dev_idx,
-old.cached_sectors);
trace_invalidate(ca, bucket_to_sector(ca, k.k->p.offset),
@ -759,11 +823,12 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
static void bucket_set_stripe(struct bch_fs *c,
const struct bch_stripe *v,
bool enabled,
struct bch_fs_usage *fs_usage,
u64 journal_seq,
bool gc)
unsigned flags)
{
bool enabled = !(flags & BCH_BUCKET_MARK_OVERWRITE);
bool gc = flags & BCH_BUCKET_MARK_GC;
unsigned i;
for (i = 0; i < v->nr_blocks; i++) {
@ -789,9 +854,9 @@ static bool bch2_mark_pointer(struct bch_fs *c,
struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type,
struct bch_fs_usage *fs_usage,
unsigned journal_seq, unsigned flags,
bool gc)
u64 journal_seq, unsigned flags)
{
bool gc = flags & BCH_BUCKET_MARK_GC;
struct bucket_mark old, new;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc);
@ -858,9 +923,9 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
struct bch_extent_stripe_ptr p,
enum bch_data_type data_type,
struct bch_fs_usage *fs_usage,
s64 sectors, unsigned flags,
bool gc)
s64 sectors, unsigned flags)
{
bool gc = flags & BCH_BUCKET_MARK_GC;
struct stripe *m;
unsigned old, new, nr_data;
int blocks_nonempty_delta;
@ -913,8 +978,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
s64 sectors, enum bch_data_type data_type,
struct bch_fs_usage *fs_usage,
unsigned journal_seq, unsigned flags,
bool gc)
unsigned journal_seq, unsigned flags)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@ -935,7 +999,7 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
? sectors
: ptr_disk_sectors_delta(p, sectors);
bool stale = bch2_mark_pointer(c, p, disk_sectors, data_type,
fs_usage, journal_seq, flags, gc);
fs_usage, journal_seq, flags);
if (p.ptr.cached) {
if (disk_sectors && !stale)
@ -948,7 +1012,7 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
for (i = 0; i < p.ec_nr; i++) {
ret = bch2_mark_stripe_ptr(c, p.ec[i],
data_type, fs_usage,
disk_sectors, flags, gc);
disk_sectors, flags);
if (ret)
return ret;
}
@ -964,11 +1028,10 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
}
static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
bool inserting,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags,
bool gc)
u64 journal_seq, unsigned flags)
{
bool gc = flags & BCH_BUCKET_MARK_GC;
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
size_t idx = s.k->p.offset;
struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
@ -976,19 +1039,14 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
spin_lock(&c->ec_stripes_heap_lock);
if (!m || (!inserting && !m->alive)) {
if (!m || ((flags & BCH_BUCKET_MARK_OVERWRITE) && !m->alive)) {
spin_unlock(&c->ec_stripes_heap_lock);
bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
idx);
return -1;
}
if (!gc && m->alive)
bch2_stripes_heap_del(c, m, idx);
memset(m, 0, sizeof(*m));
if (inserting) {
if (!(flags & BCH_BUCKET_MARK_OVERWRITE)) {
m->sectors = le16_to_cpu(s.v->sectors);
m->algorithm = s.v->algorithm;
m->nr_blocks = s.v->nr_blocks;
@ -996,11 +1054,11 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
bch2_bkey_to_replicas(&m->r.e, k);
/*
* XXX: account for stripes somehow here
*/
/*
* XXX: account for stripes somehow here
*/
#if 0
update_replicas(c, fs_usage, &m->r.e, stripe_sectors);
update_replicas(c, fs_usage, &m->r.e, stripe_sectors);
#endif
/* gc recalculates these fields: */
@ -1013,53 +1071,54 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
}
if (!gc)
bch2_stripes_heap_insert(c, m, idx);
else
m->alive = true;
bch2_stripes_heap_update(c, m, idx);
m->alive = true;
} else {
if (!gc)
bch2_stripes_heap_del(c, m, idx);
memset(m, 0, sizeof(*m));
}
spin_unlock(&c->ec_stripes_heap_lock);
bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc);
bucket_set_stripe(c, s.v, fs_usage, 0, flags);
return 0;
}
int bch2_mark_key_locked(struct bch_fs *c,
struct bkey_s_c k,
bool inserting, s64 sectors,
struct bkey_s_c k, s64 sectors,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
{
bool gc = flags & BCH_BUCKET_MARK_GC;
int ret = 0;
preempt_disable();
if (!fs_usage || gc)
fs_usage = fs_usage_ptr(c, journal_seq, gc);
if (!fs_usage || (flags & BCH_BUCKET_MARK_GC))
fs_usage = fs_usage_ptr(c, journal_seq,
flags & BCH_BUCKET_MARK_GC);
switch (k.k->type) {
case KEY_TYPE_alloc:
ret = bch2_mark_alloc(c, k, inserting,
fs_usage, journal_seq, flags, gc);
ret = bch2_mark_alloc(c, k, fs_usage, journal_seq, flags);
break;
case KEY_TYPE_btree_ptr:
ret = bch2_mark_extent(c, k, inserting
? c->opts.btree_node_size
: -c->opts.btree_node_size,
BCH_DATA_BTREE,
fs_usage, journal_seq, flags, gc);
sectors = !(flags & BCH_BUCKET_MARK_OVERWRITE)
? c->opts.btree_node_size
: -c->opts.btree_node_size;
ret = bch2_mark_extent(c, k, sectors, BCH_DATA_BTREE,
fs_usage, journal_seq, flags);
break;
case KEY_TYPE_extent:
ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
fs_usage, journal_seq, flags, gc);
fs_usage, journal_seq, flags);
break;
case KEY_TYPE_stripe:
ret = bch2_mark_stripe(c, k, inserting,
fs_usage, journal_seq, flags, gc);
ret = bch2_mark_stripe(c, k, fs_usage, journal_seq, flags);
break;
case KEY_TYPE_inode:
if (inserting)
if (!(flags & BCH_BUCKET_MARK_OVERWRITE))
fs_usage->nr_inodes++;
else
fs_usage->nr_inodes--;
@ -1083,14 +1142,14 @@ int bch2_mark_key_locked(struct bch_fs *c,
}
int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
bool inserting, s64 sectors,
s64 sectors,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
{
int ret;
percpu_down_read_preempt_disable(&c->mark_lock);
ret = bch2_mark_key_locked(c, k, inserting, sectors,
ret = bch2_mark_key_locked(c, k, sectors,
fs_usage, journal_seq, flags);
percpu_up_read_preempt_enable(&c->mark_lock);
@ -1130,9 +1189,9 @@ inline int bch2_mark_overwrite(struct btree_trans *trans,
sectors = old.k->p.offset - new->k.p.offset;
BUG_ON(sectors <= 0);
bch2_mark_key_locked(c, old, true, sectors,
bch2_mark_key_locked(c, old, sectors,
fs_usage, trans->journal_res.seq,
flags);
BCH_BUCKET_MARK_INSERT|flags);
sectors = bkey_start_offset(&new->k) -
old.k->p.offset;
@ -1142,8 +1201,9 @@ inline int bch2_mark_overwrite(struct btree_trans *trans,
BUG_ON(sectors >= 0);
}
return bch2_mark_key_locked(c, old, false, sectors, fs_usage,
trans->journal_res.seq, flags) ?: 1;
return bch2_mark_key_locked(c, old, sectors, fs_usage,
trans->journal_res.seq,
BCH_BUCKET_MARK_OVERWRITE|flags) ?: 1;
}
int bch2_mark_update(struct btree_trans *trans,
@ -1162,10 +1222,11 @@ int bch2_mark_update(struct btree_trans *trans,
return 0;
if (!(trans->flags & BTREE_INSERT_NOMARK_INSERT))
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
bpos_min(insert->k->k.p, b->key.k.p).offset -
bkey_start_offset(&insert->k->k),
fs_usage, trans->journal_res.seq, flags);
fs_usage, trans->journal_res.seq,
BCH_BUCKET_MARK_INSERT|flags);
if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
return 0;
@ -1246,46 +1307,6 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
/* trans_mark: */
static inline void update_replicas_list(struct replicas_delta_list *d,
struct bch_replicas_entry *r,
s64 sectors)
{
d->top->delta = sectors;
memcpy(&d->top->r, r, replicas_entry_bytes(r));
d->top = (void *) d->top + replicas_entry_bytes(r) + 8;
BUG_ON((void *) d->top > (void *) d->d + sizeof(d->pad));
}
static inline void update_cached_sectors_list(struct replicas_delta_list *d,
unsigned dev, s64 sectors)
{
struct bch_replicas_padded r;
bch2_replicas_entry_cached(&r.e, dev);
update_replicas_list(d, &r.e, sectors);
}
void bch2_replicas_delta_list_apply(struct bch_fs *c,
struct bch_fs_usage *fs_usage,
struct replicas_delta_list *r)
{
struct replicas_delta *d = r->d;
acc_u64s((u64 *) fs_usage,
(u64 *) &r->fs_usage, sizeof(*fs_usage) / sizeof(u64));
while (d != r->top) {
BUG_ON((void *) d > (void *) r->top);
update_replicas(c, fs_usage, &d->r, d->delta);
d = (void *) d + replicas_entry_bytes(&d->r) + 8;
}
}
static int trans_get_key(struct btree_trans *trans,
enum btree_id btree_id, struct bpos pos,
struct btree_insert_entry **insert,
@ -1347,8 +1368,7 @@ static int trans_update_key(struct btree_trans *trans,
static int bch2_trans_mark_pointer(struct btree_trans *trans,
struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type,
struct replicas_delta_list *d)
s64 sectors, enum bch_data_type data_type)
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
@ -1409,8 +1429,7 @@ out:
static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
struct bch_extent_stripe_ptr p,
s64 sectors, enum bch_data_type data_type,
struct replicas_delta_list *d)
s64 sectors, enum bch_data_type data_type)
{
struct bch_replicas_padded r;
struct btree_insert_entry *insert;
@ -1455,7 +1474,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
bch2_bkey_to_replicas(&r.e, s.s_c);
update_replicas_list(d, &r.e, sectors);
update_replicas_list(trans, &r.e, sectors);
out:
bch2_trans_iter_put(trans, iter);
return ret;
@ -1463,8 +1482,7 @@ out:
static int bch2_trans_mark_extent(struct btree_trans *trans,
struct bkey_s_c k,
s64 sectors, enum bch_data_type data_type,
struct replicas_delta_list *d)
s64 sectors, enum bch_data_type data_type)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@ -1487,7 +1505,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
: ptr_disk_sectors_delta(p, sectors);
ret = bch2_trans_mark_pointer(trans, p, disk_sectors,
data_type, d);
data_type);
if (ret < 0)
return ret;
@ -1495,7 +1513,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
if (p.ptr.cached) {
if (disk_sectors && !stale)
update_cached_sectors_list(d, p.ptr.dev,
update_cached_sectors_list(trans, p.ptr.dev,
disk_sectors);
} else if (!p.ec_nr) {
dirty_sectors += disk_sectors;
@ -1503,7 +1521,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
} else {
for (i = 0; i < p.ec_nr; i++) {
ret = bch2_trans_mark_stripe_ptr(trans, p.ec[i],
disk_sectors, data_type, d);
disk_sectors, data_type);
if (ret)
return ret;
}
@ -1513,29 +1531,32 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
}
if (dirty_sectors)
update_replicas_list(d, &r.e, dirty_sectors);
update_replicas_list(trans, &r.e, dirty_sectors);
return 0;
}
int bch2_trans_mark_key(struct btree_trans *trans,
struct bkey_s_c k,
bool inserting, s64 sectors,
struct replicas_delta_list *d)
int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
s64 sectors, unsigned flags)
{
struct replicas_delta_list *d;
struct bch_fs *c = trans->c;
switch (k.k->type) {
case KEY_TYPE_btree_ptr:
return bch2_trans_mark_extent(trans, k, inserting
? c->opts.btree_node_size
: -c->opts.btree_node_size,
BCH_DATA_BTREE, d);
sectors = !(flags & BCH_BUCKET_MARK_OVERWRITE)
? c->opts.btree_node_size
: -c->opts.btree_node_size;
return bch2_trans_mark_extent(trans, k, sectors,
BCH_DATA_BTREE);
case KEY_TYPE_extent:
return bch2_trans_mark_extent(trans, k,
sectors, BCH_DATA_USER, d);
return bch2_trans_mark_extent(trans, k, sectors,
BCH_DATA_USER);
case KEY_TYPE_inode:
if (inserting)
d = replicas_deltas_realloc(trans, 0);
if (!(flags & BCH_BUCKET_MARK_OVERWRITE))
d->fs_usage.nr_inodes++;
else
d->fs_usage.nr_inodes--;
@ -1543,6 +1564,8 @@ int bch2_trans_mark_key(struct btree_trans *trans,
case KEY_TYPE_reservation: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
d = replicas_deltas_realloc(trans, 0);
sectors *= replicas;
replicas = clamp_t(unsigned, replicas, 1,
ARRAY_SIZE(d->fs_usage.persistent_reserved));
@ -1557,8 +1580,7 @@ int bch2_trans_mark_key(struct btree_trans *trans,
}
int bch2_trans_mark_update(struct btree_trans *trans,
struct btree_insert_entry *insert,
struct replicas_delta_list *d)
struct btree_insert_entry *insert)
{
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b;
@ -1570,9 +1592,10 @@ int bch2_trans_mark_update(struct btree_trans *trans,
return 0;
ret = bch2_trans_mark_key(trans,
bkey_i_to_s_c(insert->k), true,
bkey_i_to_s_c(insert->k),
bpos_min(insert->k->k.p, b->key.k.p).offset -
bkey_start_offset(&insert->k->k), d);
bkey_start_offset(&insert->k->k),
BCH_BUCKET_MARK_INSERT);
if (ret)
return ret;
@ -1606,8 +1629,8 @@ int bch2_trans_mark_update(struct btree_trans *trans,
sectors = k.k->p.offset - insert->k->k.p.offset;
BUG_ON(sectors <= 0);
ret = bch2_trans_mark_key(trans, k, true,
sectors, d);
ret = bch2_trans_mark_key(trans, k, sectors,
BCH_BUCKET_MARK_INSERT);
if (ret)
return ret;
@ -1619,7 +1642,8 @@ int bch2_trans_mark_update(struct btree_trans *trans,
BUG_ON(sectors >= 0);
}
ret = bch2_trans_mark_key(trans, k, false, sectors, d);
ret = bch2_trans_mark_key(trans, k, sectors,
BCH_BUCKET_MARK_OVERWRITE);
if (ret)
return ret;

View File

@ -248,15 +248,17 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
size_t, enum bch_data_type, unsigned,
struct gc_pos, unsigned);
#define BCH_BUCKET_MARK_GC (1 << 0)
#define BCH_BUCKET_MARK_NOATOMIC (1 << 1)
#define BCH_BUCKET_MARK_INSERT (1 << 0)
#define BCH_BUCKET_MARK_OVERWRITE (1 << 1)
#define BCH_BUCKET_MARK_BUCKET_INVALIDATE (1 << 2)
#define BCH_BUCKET_MARK_GC (1 << 3)
#define BCH_BUCKET_MARK_ALLOC_READ (1 << 4)
#define BCH_BUCKET_MARK_NOATOMIC (1 << 5)
int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
bool, s64, struct bch_fs_usage *,
u64, unsigned);
int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
bool, s64, struct bch_fs_usage *,
u64, unsigned);
int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, s64,
struct bch_fs_usage *, u64, unsigned);
int bch2_mark_key(struct bch_fs *, struct bkey_s_c, s64,
struct bch_fs_usage *, u64, unsigned);
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *, unsigned);
@ -269,11 +271,9 @@ int bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
void bch2_replicas_delta_list_apply(struct bch_fs *,
struct bch_fs_usage *,
struct replicas_delta_list *);
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
bool, s64, struct replicas_delta_list *);
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, s64, unsigned);
int bch2_trans_mark_update(struct btree_trans *,
struct btree_insert_entry *,
struct replicas_delta_list *);
struct btree_insert_entry *);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
/* disk reservations: */

View File

@ -100,11 +100,10 @@ struct replicas_delta {
} __packed;
struct replicas_delta_list {
unsigned size;
unsigned used;
struct bch_fs_usage fs_usage;
struct replicas_delta *top;
struct replicas_delta d[0];
u8 pad[256];
};
/*

View File

@ -280,22 +280,8 @@ void bch2_encrypt_bio(struct bch_fs *c, unsigned type,
do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
}
static inline bool bch2_checksum_mergeable(unsigned type)
{
switch (type) {
case BCH_CSUM_NONE:
case BCH_CSUM_CRC32C:
case BCH_CSUM_CRC64:
return true;
default:
return false;
}
}
static struct bch_csum bch2_checksum_merge(unsigned type,
struct bch_csum a,
struct bch_csum b, size_t b_len)
struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a,
struct bch_csum b, size_t b_len)
{
BUG_ON(!bch2_checksum_mergeable(type));

View File

@ -8,6 +8,22 @@
#include <linux/crc64.h>
#include <crypto/chacha.h>
static inline bool bch2_checksum_mergeable(unsigned type)
{
switch (type) {
case BCH_CSUM_NONE:
case BCH_CSUM_CRC32C:
case BCH_CSUM_CRC64:
return true;
default:
return false;
}
}
struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum,
struct bch_csum, size_t);
static inline u64 bch2_crc64_update(u64 crc, const void *p, size_t len)
{
return crc64_be(crc, p, len);

View File

@ -220,7 +220,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
if (!i->size)
return i->ret;
bch2_trans_init(&trans, i->c);
bch2_trans_init(&trans, i->c, 0, 0);
iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
k = bch2_btree_iter_peek(iter);
@ -274,7 +274,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
if (!i->size || !bkey_cmp(POS_MAX, i->from))
return i->ret;
bch2_trans_init(&trans, i->c);
bch2_trans_init(&trans, i->c, 0, 0);
for_each_btree_node(&trans, iter, i->id, i->from, 0, b) {
bch2_btree_node_to_text(&PBUF(i->buf), i->c, b);
@ -327,7 +327,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
if (!i->size)
return i->ret;
bch2_trans_init(&trans, i->c);
bch2_trans_init(&trans, i->c, 0, 0);
iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);

View File

@ -312,7 +312,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
struct bkey_s_c k;
u64 inum = 0;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_hash_lookup(&trans, bch2_dirent_hash_desc,
hash_info, dir_inum, name, 0);
@ -369,7 +369,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file,
if (!dir_emit_dots(file, ctx))
return 0;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
POS(inode->v.i_ino, ctx->pos), 0, k, ret) {

View File

@ -113,7 +113,7 @@ const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k)
bkey_val_u64s(k.k) < stripe_val_u64s(s))
return "incorrect value size";
return NULL;
return bch2_bkey_ptrs_invalid(c, k);
}
void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
@ -134,6 +134,8 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
pr_buf(out, " %u:%llu:%u", s->ptrs[i].dev,
(u64) s->ptrs[i].offset,
stripe_blockcount_get(s, i));
bch2_bkey_ptrs_to_text(out, c, k);
}
static int ptr_matches_stripe(struct bch_fs *c,
@ -177,6 +179,25 @@ static int extent_matches_stripe(struct bch_fs *c,
return -1;
}
static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
{
struct bkey_s_c_extent e;
const union bch_extent_entry *entry;
if (!bkey_extent_is_data(k.k))
return false;
e = bkey_s_c_to_extent(k);
extent_for_each_entry(e, entry)
if (extent_entry_type(entry) ==
BCH_EXTENT_ENTRY_stripe_ptr &&
entry->stripe_ptr.idx == idx)
return true;
return false;
}
static void ec_stripe_key_init(struct bch_fs *c,
struct bkey_i_stripe *s,
struct open_buckets *blocks,
@ -419,7 +440,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
if (!buf)
return -ENOMEM;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EC,
POS(0, stripe_idx),
@ -541,7 +562,7 @@ static int ec_stripe_mem_alloc(struct bch_fs *c,
if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT))
return ret;
bch2_btree_trans_unlock(iter->trans);
bch2_trans_unlock(iter->trans);
ret = -EINTR;
if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
@ -589,17 +610,21 @@ void bch2_stripes_heap_update(struct bch_fs *c,
ec_stripes_heap *h = &c->ec_stripes_heap;
size_t i;
heap_verify_backpointer(c, idx);
if (m->alive) {
heap_verify_backpointer(c, idx);
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
i = m->heap_idx;
heap_sift_up(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
heap_sift_down(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
i = m->heap_idx;
heap_sift_up(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
heap_sift_down(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
heap_verify_backpointer(c, idx);
heap_verify_backpointer(c, idx);
} else {
bch2_stripes_heap_insert(c, m, idx);
}
if (stripe_idx_to_delete(c) >= 0)
schedule_work(&c->ec_stripe_delete_work);
@ -676,7 +701,7 @@ static int ec_stripe_bkey_insert(struct bch_fs *c,
struct bkey_s_c k;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@ -743,8 +768,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
BKEY_PADDED(k) tmp;
int ret = 0, dev, idx;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(pos),
@ -753,12 +777,19 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k)) &&
bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
if (extent_has_stripe_ptr(k, s->key.k.p.offset)) {
bch2_btree_iter_next(iter);
continue;
}
idx = extent_matches_stripe(c, &s->key.v, k);
if (idx < 0) {
bch2_btree_iter_next(iter);
continue;
}
bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
dev = s->key.v.ptrs[idx].dev;
bkey_reassemble(&tmp.k, k);
@ -1207,7 +1238,7 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL);
BUG_ON(!new_key);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
@ -1243,10 +1274,12 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
if (ret)
return ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, 0, k, ret)
bch2_mark_key(c, k, true, 0, NULL, 0, 0);
bch2_mark_key(c, k, 0, NULL, 0,
BCH_BUCKET_MARK_ALLOC_READ|
BCH_BUCKET_MARK_NOATOMIC);
ret = bch2_trans_exit(&trans) ?: ret;
if (ret) {
@ -1257,7 +1290,9 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
for_each_journal_key(*journal_keys, i)
if (i->btree_id == BTREE_ID_EC)
bch2_mark_key(c, bkey_i_to_s_c(i->k),
true, 0, NULL, 0, 0);
0, NULL, 0,
BCH_BUCKET_MARK_ALLOC_READ|
BCH_BUCKET_MARK_NOATOMIC);
return 0;
}
@ -1270,7 +1305,7 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
size_t i, idx = 0;
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, U64_MAX), 0);

View File

@ -500,43 +500,8 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
}
}
static const char *extent_ptr_invalid(const struct bch_fs *c,
struct bkey_s_c k,
const struct bch_extent_ptr *ptr,
unsigned size_ondisk,
bool metadata)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr2;
struct bch_dev *ca;
if (ptr->dev >= c->sb.nr_devices ||
!c->devs[ptr->dev])
return "pointer to invalid device";
ca = bch_dev_bkey_exists(c, ptr->dev);
if (!ca)
return "pointer to invalid device";
bkey_for_each_ptr(ptrs, ptr2)
if (ptr != ptr2 && ptr->dev == ptr2->dev)
return "multiple pointers to same device";
if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets))
return "offset past end of device";
if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket))
return "offset before first bucket";
if (bucket_remainder(ca, ptr->offset) +
size_ondisk > ca->mi.bucket_size)
return "spans multiple buckets";
return NULL;
}
static void bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@ -590,37 +555,109 @@ static void bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
}
}
/* Btree ptrs */
static const char *extent_ptr_invalid(const struct bch_fs *c,
struct bkey_s_c k,
const struct bch_extent_ptr *ptr,
unsigned size_ondisk,
bool metadata)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr2;
struct bch_dev *ca;
const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
if (!bch2_dev_exists2(c, ptr->dev))
return "pointer to invalid device";
ca = bch_dev_bkey_exists(c, ptr->dev);
if (!ca)
return "pointer to invalid device";
bkey_for_each_ptr(ptrs, ptr2)
if (ptr != ptr2 && ptr->dev == ptr2->dev)
return "multiple pointers to same device";
if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets))
return "offset past end of device";
if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket))
return "offset before first bucket";
if (bucket_remainder(ca, ptr->offset) +
size_ondisk > ca->mi.bucket_size)
return "spans multiple buckets";
return NULL;
}
const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc;
unsigned size_ondisk = k.k->size;
const char *reason;
unsigned nonce = UINT_MAX;
if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
return "value too big";
if (k.k->type == KEY_TYPE_btree_ptr)
size_ondisk = c->opts.btree_node_size;
bkey_extent_entry_for_each(ptrs, entry) {
if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
return "invalid extent entry type";
if (!extent_entry_is_ptr(entry))
if (k.k->type == KEY_TYPE_btree_ptr &&
!extent_entry_is_ptr(entry))
return "has non ptr field";
}
bkey_for_each_ptr(ptrs, ptr) {
reason = extent_ptr_invalid(c, k, ptr,
c->opts.btree_node_size,
true);
if (reason)
return reason;
switch (extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr:
reason = extent_ptr_invalid(c, k, &entry->ptr,
size_ondisk, false);
if (reason)
return reason;
break;
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128:
crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
if (crc.offset + crc.live_size >
crc.uncompressed_size)
return "checksum offset + key size > uncompressed size";
size_ondisk = crc.compressed_size;
if (!bch2_checksum_type_valid(c, crc.csum_type))
return "invalid checksum type";
if (crc.compression_type >= BCH_COMPRESSION_NR)
return "invalid compression type";
if (bch2_csum_type_is_encryption(crc.csum_type)) {
if (nonce == UINT_MAX)
nonce = crc.offset + crc.nonce;
else if (nonce != crc.offset + crc.nonce)
return "incorrect nonce";
}
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
break;
}
}
return NULL;
}
/* Btree ptrs */
const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
return "value too big";
return bch2_bkey_ptrs_invalid(c, k);
}
void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
struct bkey_s_c k)
{
@ -665,13 +702,7 @@ err:
void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
const char *invalid;
bkey_ptrs_to_text(out, c, k);
invalid = bch2_btree_ptr_invalid(c, k);
if (invalid)
pr_buf(out, " invalid: %s", invalid);
bch2_bkey_ptrs_to_text(out, c, k);
}
/* Extents */
@ -1260,60 +1291,10 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct bch_extent_crc_unpacked crc;
const struct bch_extent_ptr *ptr;
unsigned size_ondisk = e.k->size;
const char *reason;
unsigned nonce = UINT_MAX;
if (bkey_val_u64s(e.k) > BKEY_EXTENT_VAL_U64s_MAX)
if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX)
return "value too big";
extent_for_each_entry(e, entry) {
if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
return "invalid extent entry type";
switch (extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr:
ptr = entry_to_ptr(entry);
reason = extent_ptr_invalid(c, e.s_c, &entry->ptr,
size_ondisk, false);
if (reason)
return reason;
break;
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128:
crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry));
if (crc.offset + e.k->size >
crc.uncompressed_size)
return "checksum offset + key size > uncompressed size";
size_ondisk = crc.compressed_size;
if (!bch2_checksum_type_valid(c, crc.csum_type))
return "invalid checksum type";
if (crc.compression_type >= BCH_COMPRESSION_NR)
return "invalid compression type";
if (bch2_csum_type_is_encryption(crc.csum_type)) {
if (nonce == UINT_MAX)
nonce = crc.offset + crc.nonce;
else if (nonce != crc.offset + crc.nonce)
return "incorrect nonce";
}
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
break;
}
}
return NULL;
return bch2_bkey_ptrs_invalid(c, k);
}
void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
@ -1374,62 +1355,66 @@ void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
const char *invalid;
bch2_bkey_ptrs_to_text(out, c, k);
}
bkey_ptrs_to_text(out, c, k);
static unsigned bch2_crc_field_size_max[] = {
[BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
[BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
[BCH_EXTENT_ENTRY_crc128] = CRC128_SIZE_MAX,
};
invalid = bch2_extent_invalid(c, k);
if (invalid)
pr_buf(out, " invalid: %s", invalid);
static void bch2_extent_crc_pack(union bch_extent_crc *dst,
struct bch_extent_crc_unpacked src)
{
#define set_common_fields(_dst, _src) \
_dst.csum_type = _src.csum_type, \
_dst.compression_type = _src.compression_type, \
_dst._compressed_size = _src.compressed_size - 1, \
_dst._uncompressed_size = _src.uncompressed_size - 1, \
_dst.offset = _src.offset
switch (extent_entry_type(to_entry(dst))) {
case BCH_EXTENT_ENTRY_crc32:
set_common_fields(dst->crc32, src);
dst->crc32.csum = *((__le32 *) &src.csum.lo);
break;
case BCH_EXTENT_ENTRY_crc64:
set_common_fields(dst->crc64, src);
dst->crc64.nonce = src.nonce;
dst->crc64.csum_lo = src.csum.lo;
dst->crc64.csum_hi = *((__le16 *) &src.csum.hi);
break;
case BCH_EXTENT_ENTRY_crc128:
set_common_fields(dst->crc128, src);
dst->crc128.nonce = src.nonce;
dst->crc128.csum = src.csum;
break;
default:
BUG();
}
#undef set_common_fields
}
static void bch2_extent_crc_init(union bch_extent_crc *crc,
struct bch_extent_crc_unpacked new)
{
#define common_fields(_crc) \
.csum_type = _crc.csum_type, \
.compression_type = _crc.compression_type, \
._compressed_size = _crc.compressed_size - 1, \
._uncompressed_size = _crc.uncompressed_size - 1, \
.offset = _crc.offset
if (bch_crc_bytes[new.csum_type] <= 4 &&
new.uncompressed_size <= CRC32_SIZE_MAX &&
new.nonce <= CRC32_NONCE_MAX) {
crc->crc32 = (struct bch_extent_crc32) {
.type = 1 << BCH_EXTENT_ENTRY_crc32,
common_fields(new),
.csum = *((__le32 *) &new.csum.lo),
};
return;
}
new.uncompressed_size - 1 <= CRC32_SIZE_MAX &&
new.nonce <= CRC32_NONCE_MAX)
crc->type = 1 << BCH_EXTENT_ENTRY_crc32;
else if (bch_crc_bytes[new.csum_type] <= 10 &&
new.uncompressed_size - 1 <= CRC64_SIZE_MAX &&
new.nonce <= CRC64_NONCE_MAX)
crc->type = 1 << BCH_EXTENT_ENTRY_crc64;
else if (bch_crc_bytes[new.csum_type] <= 16 &&
new.uncompressed_size - 1 <= CRC128_SIZE_MAX &&
new.nonce <= CRC128_NONCE_MAX)
crc->type = 1 << BCH_EXTENT_ENTRY_crc128;
else
BUG();
if (bch_crc_bytes[new.csum_type] <= 10 &&
new.uncompressed_size <= CRC64_SIZE_MAX &&
new.nonce <= CRC64_NONCE_MAX) {
crc->crc64 = (struct bch_extent_crc64) {
.type = 1 << BCH_EXTENT_ENTRY_crc64,
common_fields(new),
.nonce = new.nonce,
.csum_lo = new.csum.lo,
.csum_hi = *((__le16 *) &new.csum.hi),
};
return;
}
if (bch_crc_bytes[new.csum_type] <= 16 &&
new.uncompressed_size <= CRC128_SIZE_MAX &&
new.nonce <= CRC128_NONCE_MAX) {
crc->crc128 = (struct bch_extent_crc128) {
.type = 1 << BCH_EXTENT_ENTRY_crc128,
common_fields(new),
.nonce = new.nonce,
.csum = new.csum,
};
return;
}
#undef common_fields
BUG();
bch2_extent_crc_pack(crc, new);
}
void bch2_extent_crc_append(struct bkey_i_extent *e,
@ -1454,10 +1439,15 @@ static inline void __extent_entry_insert(struct bkey_i_extent *e,
void bch2_extent_ptr_decoded_append(struct bkey_i_extent *e,
struct extent_ptr_decoded *p)
{
struct bch_extent_crc_unpacked crc;
struct bch_extent_crc_unpacked crc = bch2_extent_crc_unpack(&e->k, NULL);
union bch_extent_entry *pos;
unsigned i;
if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
pos = e->v.start;
goto found;
}
extent_for_each_crc(extent_i_to_s(e), crc, pos)
if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
pos = extent_entry_next(pos);
@ -1535,46 +1525,101 @@ enum merge_result bch2_extent_merge(struct bch_fs *c,
{
struct bkey_s_extent el = bkey_i_to_s_extent(l);
struct bkey_s_extent er = bkey_i_to_s_extent(r);
union bch_extent_entry *en_l, *en_r;
union bch_extent_entry *en_l = el.v->start;
union bch_extent_entry *en_r = er.v->start;
struct bch_extent_crc_unpacked crc_l, crc_r;
if (bkey_val_u64s(&l->k) != bkey_val_u64s(&r->k))
return BCH_MERGE_NOMERGE;
crc_l = bch2_extent_crc_unpack(el.k, NULL);
extent_for_each_entry(el, en_l) {
struct bch_extent_ptr *lp, *rp;
struct bch_dev *ca;
en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
if (extent_entry_type(en_l) != extent_entry_type(en_r))
return BCH_MERGE_NOMERGE;
switch (extent_entry_type(en_l)) {
case BCH_EXTENT_ENTRY_ptr: {
const struct bch_extent_ptr *lp = &en_l->ptr;
const struct bch_extent_ptr *rp = &en_r->ptr;
struct bch_dev *ca;
if (lp->offset + crc_l.compressed_size != rp->offset ||
lp->dev != rp->dev ||
lp->gen != rp->gen)
return BCH_MERGE_NOMERGE;
/* We don't allow extents to straddle buckets: */
ca = bch_dev_bkey_exists(c, lp->dev);
if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
return BCH_MERGE_NOMERGE;
break;
}
case BCH_EXTENT_ENTRY_stripe_ptr:
if (en_l->stripe_ptr.block != en_r->stripe_ptr.block ||
en_l->stripe_ptr.idx != en_r->stripe_ptr.idx)
return BCH_MERGE_NOMERGE;
break;
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128:
crc_l = bch2_extent_crc_unpack(el.k, entry_to_crc(en_l));
crc_r = bch2_extent_crc_unpack(er.k, entry_to_crc(en_r));
if (crc_l.csum_type != crc_r.csum_type ||
crc_l.compression_type != crc_r.compression_type ||
crc_l.nonce != crc_r.nonce)
return BCH_MERGE_NOMERGE;
if (crc_l.offset + crc_l.live_size != crc_l.compressed_size ||
crc_r.offset)
return BCH_MERGE_NOMERGE;
if (!bch2_checksum_mergeable(crc_l.csum_type))
return BCH_MERGE_NOMERGE;
if (crc_l.compression_type)
return BCH_MERGE_NOMERGE;
if (crc_l.csum_type &&
crc_l.uncompressed_size +
crc_r.uncompressed_size > c->sb.encoded_extent_max)
return BCH_MERGE_NOMERGE;
if (crc_l.uncompressed_size + crc_r.uncompressed_size - 1 >
bch2_crc_field_size_max[extent_entry_type(en_l)])
return BCH_MERGE_NOMERGE;
break;
default:
return BCH_MERGE_NOMERGE;
}
}
extent_for_each_entry(el, en_l) {
struct bch_extent_crc_unpacked crc_l, crc_r;
en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
if ((extent_entry_type(en_l) !=
extent_entry_type(en_r)) ||
!extent_entry_is_ptr(en_l))
return BCH_MERGE_NOMERGE;
if (!extent_entry_is_crc(en_l))
continue;
lp = &en_l->ptr;
rp = &en_r->ptr;
crc_l = bch2_extent_crc_unpack(el.k, entry_to_crc(en_l));
crc_r = bch2_extent_crc_unpack(er.k, entry_to_crc(en_r));
if (lp->offset + el.k->size != rp->offset ||
lp->dev != rp->dev ||
lp->gen != rp->gen)
return BCH_MERGE_NOMERGE;
crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
crc_l.csum,
crc_r.csum,
crc_r.uncompressed_size << 9);
/* We don't allow extents to straddle buckets: */
ca = bch_dev_bkey_exists(c, lp->dev);
crc_l.uncompressed_size += crc_r.uncompressed_size;
crc_l.compressed_size += crc_r.compressed_size;
if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
return BCH_MERGE_NOMERGE;
}
l->k.needs_whiteout |= r->k.needs_whiteout;
/* Keys with no pointers aren't restricted to one bucket and could
* overflow KEY_SIZE
*/
if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) {
bch2_key_resize(&l->k, KEY_SIZE_MAX);
bch2_cut_front(l->k.p, r);
return BCH_MERGE_PARTIAL;
bch2_extent_crc_pack(entry_to_crc(en_l), crc_l);
}
bch2_key_resize(&l->k, l->k.size + r->k.size);
@ -1670,7 +1715,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
end.offset += size;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos,
BTREE_ITER_SLOTS, k, err) {
@ -1745,11 +1790,6 @@ enum merge_result bch2_reservation_merge(struct bch_fs *c,
li->v.nr_replicas != ri->v.nr_replicas)
return BCH_MERGE_NOMERGE;
l->k.needs_whiteout |= r->k.needs_whiteout;
/* Keys with no pointers aren't restricted to one bucket and could
* overflow KEY_SIZE
*/
if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) {
bch2_key_resize(&l->k, KEY_SIZE_MAX);
bch2_cut_front(l->k.p, r);

View File

@ -358,6 +358,10 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
struct bch_io_failures *,
struct extent_ptr_decoded *);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c);
/* bch_btree_ptr: */
const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);

View File

@ -322,10 +322,10 @@ static int bch2_extent_update(struct btree_trans *trans,
if (i_sectors_delta ||
new_i_size > inode->ei_inode.bi_size) {
if (c->opts.new_inode_updates) {
bch2_btree_trans_unlock(trans);
bch2_trans_unlock(trans);
mutex_lock(&inode->ei_update_lock);
if (!bch2_btree_trans_relock(trans)) {
if (!bch2_trans_relock(trans)) {
mutex_unlock(&inode->ei_update_lock);
return -EINTR;
}
@ -435,8 +435,7 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
BUG_ON(k->k.p.inode != inode->v.i_ino);
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
iter = bch2_trans_get_iter(&trans,
BTREE_ID_EXTENTS,
@ -998,7 +997,7 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
}
bkey_reassemble(&tmp.k, k);
bch2_btree_trans_unlock(trans);
bch2_trans_unlock(trans);
k = bkey_i_to_s_c(&tmp.k);
if (readpages_iter) {
@ -1054,7 +1053,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages);
BUG_ON(ret);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
BTREE_ITER_SLOTS);
@ -1103,7 +1102,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
bio_add_page_contig(&rbio->bio, page);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
BTREE_ITER_SLOTS);
@ -2101,8 +2100,7 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode,
struct bkey_s_c k;
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start,
BTREE_ITER_INTENT);
@ -2148,7 +2146,7 @@ static inline int range_has_data(struct bch_fs *c,
struct bkey_s_c k;
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k, ret) {
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
@ -2404,8 +2402,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
if ((offset | len) & (block_bytes(c) - 1))
return -EINVAL;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
/*
* We need i_mutex to keep the page cache consistent with the extents
@ -2520,8 +2517,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
unsigned replicas = io_opts(c, inode).data_replicas;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
inode_lock(&inode->v);
inode_dio_wait(&inode->v);
@ -2732,7 +2728,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
if (offset >= isize)
return -ENXIO;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(inode->v.i_ino, offset >> 9), 0, k, ret) {
@ -2805,7 +2801,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
if (offset >= isize)
return -ENXIO;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(inode->v.i_ino, offset >> 9),

View File

@ -164,7 +164,7 @@ int __must_check bch2_write_inode(struct bch_fs *c,
struct bch_inode_unpacked inode_u;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@ -355,7 +355,7 @@ __bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
if (!tmpfile)
mutex_lock(&dir->ei_update_lock);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 8, 1024);
retry:
bch2_trans_begin(&trans);
@ -507,7 +507,7 @@ static int __bch2_link(struct bch_fs *c,
int ret;
mutex_lock(&inode->ei_update_lock);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 4, 1024);
retry:
bch2_trans_begin(&trans);
@ -594,7 +594,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
int ret;
bch2_lock_inodes(dir, inode);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 4, 1024);
retry:
bch2_trans_begin(&trans);
@ -801,13 +801,13 @@ static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry,
return ret;
}
bch2_trans_init(&trans, c, 8, 2048);
bch2_lock_inodes(i.src_dir,
i.dst_dir,
i.src_inode,
i.dst_inode);
bch2_trans_init(&trans, c);
if (S_ISDIR(i.src_inode->v.i_mode) &&
inode_attrs_changing(i.dst_dir, i.src_inode)) {
ret = -EXDEV;
@ -968,7 +968,7 @@ static int bch2_setattr_nonsize(struct bch_inode_info *inode, struct iattr *iatt
if (ret)
goto err;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
kfree(acl);
@ -1123,7 +1123,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
if (start + len < start)
return -EINVAL;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(ei->v.i_ino, start >> 9), 0, k, ret)
@ -1511,7 +1511,7 @@ static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * cons
*/
c1 = bch2_path_to_fs(devs[0]);
if (!c1)
if (IS_ERR(c1))
return c;
for (i = 1; i < nr_devs; i++) {

View File

@ -57,7 +57,7 @@ static int remove_dirent(struct btree_trans *trans,
name.name = buf;
/* Unlock so we don't deadlock, after copying name: */
bch2_btree_trans_unlock(trans);
bch2_trans_unlock(trans);
ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
if (ret) {
@ -450,8 +450,7 @@ static int check_extents(struct bch_fs *c)
u64 i_sectors;
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
bch_verbose(c, "checking extents");
@ -546,8 +545,7 @@ static int check_dirents(struct bch_fs *c)
bch_verbose(c, "checking dirents");
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
hash_check_init(&h);
@ -703,8 +701,7 @@ static int check_xattrs(struct bch_fs *c)
hash_check_init(&h);
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
POS(BCACHEFS_ROOT_INO, 0), 0);
@ -917,8 +914,7 @@ static int check_directory_structure(struct bch_fs *c,
u64 d_inum;
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
bch_verbose(c, "checking directory structure");
@ -1014,7 +1010,7 @@ retry:
if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
"unreachable directory found (inum %llu)",
k.k->p.inode)) {
bch2_btree_trans_unlock(&trans);
bch2_trans_unlock(&trans);
ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
if (ret) {
@ -1084,8 +1080,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
u64 d_inum;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
@ -1228,7 +1223,7 @@ static int check_inode(struct btree_trans *trans,
ret = bch2_inode_unpack(inode, &u);
bch2_btree_trans_unlock(trans);
bch2_trans_unlock(trans);
if (bch2_fs_inconsistent_on(ret, c,
"error unpacking inode %llu in fsck",
@ -1333,8 +1328,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
int ret = 0, ret2 = 0;
u64 nlinks_pos;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
POS(range_start, 0), 0);
@ -1458,8 +1452,7 @@ int bch2_fsck_walk_inodes_only(struct bch_fs *c)
struct bkey_s_c_inode inode;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k, ret) {
if (k.k->type != KEY_TYPE_inode)

View File

@ -390,7 +390,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
if (ret)
return ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(inode_nr, 0),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);

View File

@ -285,7 +285,7 @@ int bch2_write_index_default(struct bch_write_op *op)
BUG_ON(bch2_keylist_empty(keys));
bch2_verify_keylist_sorted(keys);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(&bch2_keylist_front(keys)->k),
@ -432,21 +432,32 @@ static void init_append_extent(struct bch_write_op *op,
struct bversion version,
struct bch_extent_crc_unpacked crc)
{
struct bch_fs *c = op->c;
struct bkey_i_extent *e = bkey_extent_init(op->insert_keys.top);
struct bch_extent_ptr *ptr;
struct extent_ptr_decoded p = { .crc = crc };
struct open_bucket *ob;
unsigned i;
op->pos.offset += crc.uncompressed_size;
e->k.p = op->pos;
e->k.size = crc.uncompressed_size;
e->k.version = version;
e->k.p = op->pos;
e->k.size = crc.uncompressed_size;
e->k.version = version;
bch2_extent_crc_append(e, crc);
bch2_alloc_sectors_append_ptrs(op->c, wp, &e->k_i,
crc.compressed_size);
BUG_ON(crc.compressed_size > wp->sectors_free);
wp->sectors_free -= crc.compressed_size;
if (op->flags & BCH_WRITE_CACHED)
extent_for_each_ptr(extent_i_to_s(e), ptr)
ptr->cached = true;
open_bucket_for_each(c, &wp->ptrs, ob, i) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
p.ptr = ob->ptr;
p.ptr.cached = !ca->mi.durability ||
(op->flags & BCH_WRITE_CACHED) != 0;
p.ptr.offset += ca->mi.bucket_size - ob->sectors_free;
bch2_extent_ptr_decoded_append(e, &p);
BUG_ON(crc.compressed_size > ob->sectors_free);
ob->sectors_free -= crc.compressed_size;
}
bch2_keylist_push(&op->insert_keys);
}
@ -1253,7 +1264,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
flags &= ~BCH_READ_LAST_FRAGMENT;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
rbio->pos, BTREE_ITER_SLOTS);
@ -1301,7 +1312,7 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
struct bkey_s_c k;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
flags &= ~BCH_READ_LAST_FRAGMENT;
flags |= BCH_READ_MUST_CLONE;
@ -1314,7 +1325,7 @@ retry:
bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k);
bch2_btree_trans_unlock(&trans);
bch2_trans_unlock(&trans);
bytes = min_t(unsigned, bvec_iter.bi_size,
(k.k->p.offset - bvec_iter.bi_sector) << 9);
@ -1404,13 +1415,13 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
struct bkey_i_extent *e;
BKEY_PADDED(k) new;
struct bch_extent_crc_unpacked new_crc;
unsigned offset;
u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
int ret;
if (rbio->pick.crc.compression_type)
return;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@ -1427,24 +1438,19 @@ retry:
e = bkey_i_to_extent(&new.k);
if (!bch2_extent_matches_ptr(c, extent_i_to_s_c(e),
rbio->pick.ptr,
rbio->pos.offset -
rbio->pick.crc.offset) ||
rbio->pick.ptr, data_offset) ||
bversion_cmp(e->k.version, rbio->version))
goto out;
/* Extent was merged? */
if (bkey_start_offset(&e->k) < rbio->pos.offset ||
e->k.p.offset > rbio->pos.offset + rbio->pick.crc.uncompressed_size)
if (bkey_start_offset(&e->k) < data_offset ||
e->k.p.offset > data_offset + rbio->pick.crc.uncompressed_size)
goto out;
/* The extent might have been partially overwritten since we read it: */
offset = rbio->pick.crc.offset + (bkey_start_offset(&e->k) - rbio->pos.offset);
if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
rbio->pick.crc, NULL, &new_crc,
offset, e->k.size,
rbio->pick.crc.csum_type)) {
rbio->pick.crc, NULL, &new_crc,
bkey_start_offset(&e->k) - data_offset, e->k.size,
rbio->pick.crc.csum_type)) {
bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
goto out;
}
@ -1848,7 +1854,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
BCH_READ_USER_MAPPED;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
BUG_ON(rbio->_state);
BUG_ON(flags & BCH_READ_NODECODE);
@ -1869,7 +1875,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
*/
bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k);
bch2_btree_trans_unlock(&trans);
bch2_trans_unlock(&trans);
bytes = min_t(unsigned, rbio->bio.bi_iter.bi_size,
(k.k->p.offset - rbio->bio.bi_iter.bi_sector) << 9);

View File

@ -963,6 +963,8 @@ void bch2_fs_journal_stop(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
bch2_journal_flush_all_pins(j);
wait_event(j->wait, journal_entry_close(j));
/* do we need to write another journal entry? */

View File

@ -257,7 +257,7 @@ void bch2_blacklist_entries_gc(struct work_struct *work)
unsigned i, nr, new_nr;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for (i = 0; i < BTREE_ID_NR; i++) {
struct btree_iter *iter;

View File

@ -41,8 +41,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
BKEY_PADDED(key) tmp;
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS_MIN, BTREE_ITER_PREFETCH);
@ -112,7 +111,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
if (flags & BCH_FORCE_IF_METADATA_LOST)
return -EINVAL;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
closure_init_stack(&cl);
for (id = 0; id < BTREE_ID_NR; id++) {

View File

@ -61,8 +61,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
struct keylist *keys = &op->insert_keys;
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(&bch2_keylist_front(keys)->k),
@ -500,7 +499,7 @@ int bch2_move_data(struct bch_fs *c,
INIT_LIST_HEAD(&ctxt.reads);
init_waitqueue_head(&ctxt.wait);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
stats->data_type = BCH_DATA_USER;
stats->btree_id = BTREE_ID_EXTENTS;
@ -634,7 +633,7 @@ static int bch2_move_btree(struct bch_fs *c,
enum data_cmd cmd;
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
stats->data_type = BCH_DATA_BTREE;

View File

@ -360,7 +360,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
struct bkey_s_c k;
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0),
BTREE_ITER_PREFETCH, k, ret) {
@ -432,7 +432,7 @@ int bch2_fs_quota_read(struct bch_fs *c)
return ret;
}
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
@ -725,7 +725,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
bkey_quota_init(&new_quota.k_i);
new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_QUOTAS, new_quota.k.p,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);

View File

@ -213,8 +213,7 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
bool split_compressed = false;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
retry:
bch2_trans_begin(&trans);
@ -258,13 +257,9 @@ retry:
} while (bkey_cmp(iter->pos, k->k.p) < 0);
if (split_compressed) {
memset(&trans.fs_usage_deltas.fs_usage, 0,
sizeof(trans.fs_usage_deltas.fs_usage));
trans.fs_usage_deltas.top = trans.fs_usage_deltas.d;
ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k), false,
ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k),
-((s64) k->k.size),
&trans.fs_usage_deltas) ?:
BCH_BUCKET_MARK_OVERWRITE) ?:
bch2_trans_commit(&trans, &disk_res, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|

View File

@ -262,7 +262,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k, ret)
if (k.k->type == KEY_TYPE_extent) {

View File

@ -34,7 +34,7 @@ static void test_delete(struct bch_fs *c, u64 nr)
bkey_cookie_init(&k.k_i);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, k.k.p,
BTREE_ITER_INTENT);
@ -66,7 +66,7 @@ static void test_delete_written(struct bch_fs *c, u64 nr)
bkey_cookie_init(&k.k_i);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, k.k.p,
BTREE_ITER_INTENT);
@ -94,7 +94,7 @@ static void test_iterate(struct bch_fs *c, u64 nr)
u64 i;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
delete_test_keys(c);
@ -139,7 +139,7 @@ static void test_iterate_extents(struct bch_fs *c, u64 nr)
u64 i;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
delete_test_keys(c);
@ -189,7 +189,7 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr)
u64 i;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
delete_test_keys(c);
@ -243,7 +243,7 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
u64 i;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
delete_test_keys(c);
@ -304,7 +304,7 @@ static void test_peek_end(struct bch_fs *c, u64 nr)
struct btree_iter *iter;
struct bkey_s_c k;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN, 0);
@ -323,7 +323,7 @@ static void test_peek_end_extents(struct bch_fs *c, u64 nr)
struct btree_iter *iter;
struct bkey_s_c k;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, 0);
@ -429,7 +429,7 @@ static void rand_lookup(struct bch_fs *c, u64 nr)
struct bkey_s_c k;
u64 i;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for (i = 0; i < nr; i++) {
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
@ -450,7 +450,7 @@ static void rand_mixed(struct bch_fs *c, u64 nr)
int ret;
u64 i;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for (i = 0; i < nr; i++) {
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
@ -502,10 +502,10 @@ static void seq_insert(struct bch_fs *c, u64 nr)
bkey_cookie_init(&insert.k_i);
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k) {
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
insert.k.p = iter->pos;
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &insert.k_i));
@ -523,10 +523,11 @@ static void seq_lookup(struct bch_fs *c, u64 nr)
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k)
for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k, ret)
;
bch2_trans_exit(&trans);
}
@ -538,10 +539,10 @@ static void seq_overwrite(struct bch_fs *c, u64 nr)
struct bkey_s_c k;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN,
BTREE_ITER_INTENT, k) {
BTREE_ITER_INTENT, k, ret) {
struct bkey_i_cookie u;
bkey_reassemble(&u.k_i, k);

View File

@ -125,7 +125,7 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
struct bkey_s_c_xattr xattr;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
&inode->ei_str_hash, inode->v.i_ino,
@ -276,7 +276,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
u64 inum = dentry->d_inode->i_ino;
int ret;
bch2_trans_init(&trans, c);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_XATTRS,
POS(inum, 0), 0, k, ret) {