Update bcachefs sources to 5241335413 bcachefs: Fix for spinning in journal reclaim on startup

This commit is contained in:
Kent Overstreet 2020-12-19 18:05:09 -05:00
parent db931a4571
commit 80846e9c28
30 changed files with 381 additions and 214 deletions

View File

@ -1 +1 @@
e1d0fb8c5fbc70df1007ebf5d9ab03018dc05275 5241335413ef160e309fd41ab909532fec656a3a

View File

@ -505,8 +505,9 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
BTREE_ITER_CACHED| BTREE_ITER_CACHED|
BTREE_ITER_CACHED_NOFILL| BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
if (IS_ERR(iter)) ret = bch2_btree_iter_traverse(iter);
return PTR_ERR(iter); if (ret)
goto out;
a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
ret = PTR_ERR_OR_ZERO(a); ret = PTR_ERR_OR_ZERO(a);

View File

@ -1359,6 +1359,8 @@ enum bch_sb_compat {
#define BCH_REPLICAS_MAX 4U #define BCH_REPLICAS_MAX 4U
#define BCH_BKEY_PTRS_MAX 16U
enum bch_error_actions { enum bch_error_actions {
BCH_ON_ERROR_CONTINUE = 0, BCH_ON_ERROR_CONTINUE = 0,
BCH_ON_ERROR_RO = 1, BCH_ON_ERROR_RO = 1,

View File

@ -635,21 +635,26 @@ enum btree_validate_ret {
({ \ ({ \
__label__ out; \ __label__ out; \
char _buf[300]; \ char _buf[300]; \
char *buf2 = _buf; \
struct printbuf out = PBUF(_buf); \ struct printbuf out = PBUF(_buf); \
\ \
buf2 = kmalloc(4096, GFP_ATOMIC); \
if (buf2) \
out = _PBUF(buf2, 4986); \
\
btree_err_msg(&out, c, b, i, b->written, write); \ btree_err_msg(&out, c, b, i, b->written, write); \
pr_buf(&out, ": " msg, ##__VA_ARGS__); \ pr_buf(&out, ": " msg, ##__VA_ARGS__); \
\ \
if (type == BTREE_ERR_FIXABLE && \ if (type == BTREE_ERR_FIXABLE && \
write == READ && \ write == READ && \
!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \ !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \
mustfix_fsck_err(c, "%s", _buf); \ mustfix_fsck_err(c, "%s", buf2); \
goto out; \ goto out; \
} \ } \
\ \
switch (write) { \ switch (write) { \
case READ: \ case READ: \
bch_err(c, "%s", _buf); \ bch_err(c, "%s", buf2); \
\ \
switch (type) { \ switch (type) { \
case BTREE_ERR_FIXABLE: \ case BTREE_ERR_FIXABLE: \
@ -670,7 +675,7 @@ enum btree_validate_ret {
} \ } \
break; \ break; \
case WRITE: \ case WRITE: \
bch_err(c, "corrupt metadata before write: %s", _buf); \ bch_err(c, "corrupt metadata before write: %s", buf2); \
\ \
if (bch2_fs_inconsistent(c)) { \ if (bch2_fs_inconsistent(c)) { \
ret = BCH_FSCK_ERRORS_NOT_FIXED; \ ret = BCH_FSCK_ERRORS_NOT_FIXED; \
@ -679,6 +684,8 @@ enum btree_validate_ret {
break; \ break; \
} \ } \
out: \ out: \
if (buf2 != _buf) \
kfree(buf2); \
true; \ true; \
}) })
@ -844,7 +851,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c); bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
btree_err(BTREE_ERR_FIXABLE, c, b, i, btree_err(BTREE_ERR_FIXABLE, c, b, i,
"invalid bkey:\n%s\n%s", invalid, buf); "invalid bkey: %s\n%s", invalid, buf);
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
memmove_u64s_down(k, bkey_next(k), memmove_u64s_down(k, bkey_next(k),

View File

@ -875,9 +875,19 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
char buf[100]; char buf[100];
struct bkey uk = bkey_unpack_key(b, k); struct bkey uk = bkey_unpack_key(b, k);
bch2_dump_btree_node(iter->trans->c, l->b);
bch2_bkey_to_text(&PBUF(buf), &uk); bch2_bkey_to_text(&PBUF(buf), &uk);
panic("parent iter doesn't point to new node:\n%s\n%llu:%llu\n", panic("parent iter doesn't point to new node:\n"
buf, b->key.k.p.inode, b->key.k.p.offset); "iter pos %s %llu:%llu\n"
"iter key %s\n"
"new node %llu:%llu-%llu:%llu\n",
bch2_btree_ids[iter->btree_id],
iter->pos.inode,
iter->pos.offset,
buf,
b->data->min_key.inode,
b->data->min_key.offset,
b->key.k.p.inode, b->key.k.p.offset);
} }
if (!parent_locked) if (!parent_locked)
@ -892,6 +902,13 @@ static inline void __btree_iter_init(struct btree_iter *iter,
bch2_btree_node_iter_init(&l->iter, l->b, &pos); bch2_btree_node_iter_init(&l->iter, l->b, &pos);
/*
* Iterators to interior nodes should always be pointed at the first non
* whiteout:
*/
if (level)
bch2_btree_node_iter_peek(&l->iter, l->b);
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
} }
@ -2007,9 +2024,10 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans)
{ {
struct btree_iter *iter; struct btree_iter *iter;
struct btree_insert_entry *i;
trans_for_each_iter(trans, iter) trans_for_each_iter(trans, iter)
pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps", printk(KERN_ERR "iter: btree %s pos %llu:%llu%s%s%s %ps\n",
bch2_btree_ids[iter->btree_id], bch2_btree_ids[iter->btree_id],
iter->pos.inode, iter->pos.inode,
iter->pos.offset, iter->pos.offset,
@ -2017,6 +2035,14 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans)
(trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "", (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "", iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
(void *) iter->ip_allocated); (void *) iter->ip_allocated);
trans_for_each_update(trans, i) {
char buf[300];
bch2_bkey_val_to_text(&PBUF(buf), trans->c, bkey_i_to_s_c(i->k));
printk(KERN_ERR "update: btree %s %s\n",
bch2_btree_ids[i->iter->btree_id], buf);
}
panic("trans iter oveflow\n"); panic("trans iter oveflow\n");
} }

View File

@ -51,11 +51,17 @@ static inline int btree_iter_err(const struct btree_iter *iter)
static inline struct btree_iter * static inline struct btree_iter *
__trans_next_iter(struct btree_trans *trans, unsigned idx) __trans_next_iter(struct btree_trans *trans, unsigned idx)
{ {
u64 l = trans->iters_linked >> idx; u64 l;
if (idx == BTREE_ITER_MAX)
return NULL;
l = trans->iters_linked >> idx;
if (!l) if (!l)
return NULL; return NULL;
idx += __ffs64(l); idx += __ffs64(l);
EBUG_ON(idx >= BTREE_ITER_MAX);
EBUG_ON(trans->iters[idx].idx != idx); EBUG_ON(trans->iters[idx].idx != idx);
return &trans->iters[idx]; return &trans->iters[idx];
} }

View File

@ -580,6 +580,8 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
list_splice(&bc->dirty, &bc->clean); list_splice(&bc->dirty, &bc->clean);
list_for_each_entry_safe(ck, n, &bc->clean, list) { list_for_each_entry_safe(ck, n, &bc->clean, list) {
cond_resched();
bch2_journal_pin_drop(&c->journal, &ck->journal); bch2_journal_pin_drop(&c->journal, &ck->journal);
bch2_journal_preres_put(&c->journal, &ck->res); bch2_journal_preres_put(&c->journal, &ck->res);
@ -593,6 +595,8 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
BUG_ON(bc->nr_keys); BUG_ON(bc->nr_keys);
list_for_each_entry_safe(ck, n, &bc->freed, list) { list_for_each_entry_safe(ck, n, &bc->freed, list) {
cond_resched();
list_del(&ck->list); list_del(&ck->list);
kmem_cache_free(bch2_key_cache, ck); kmem_cache_free(bch2_key_cache, ck);
} }

View File

@ -4,8 +4,8 @@
static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c) static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
{ {
size_t nr_dirty = READ_ONCE(c->btree_key_cache.nr_dirty); size_t nr_dirty = READ_ONCE(c->btree_key_cache.nr_dirty);
size_t nr_keys = READ_ONCE(c->btree_key_cache.nr_dirty); size_t nr_keys = READ_ONCE(c->btree_key_cache.nr_keys);
size_t max_dirty = 4096 + nr_keys / 2; size_t max_dirty = 1024 + nr_keys / 2;
return max_t(ssize_t, 0, nr_dirty - max_dirty); return max_t(ssize_t, 0, nr_dirty - max_dirty);
} }
@ -13,10 +13,11 @@ static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
{ {
size_t nr_dirty = READ_ONCE(c->btree_key_cache.nr_dirty); size_t nr_dirty = READ_ONCE(c->btree_key_cache.nr_dirty);
size_t nr_keys = READ_ONCE(c->btree_key_cache.nr_dirty); size_t nr_keys = READ_ONCE(c->btree_key_cache.nr_keys);
size_t max_dirty = 4096 + (nr_keys * 3) / 4; size_t max_dirty = 4096 + (nr_keys * 3) / 4;
return nr_dirty > max_dirty; return nr_dirty > max_dirty &&
test_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
} }
struct bkey_cached * struct bkey_cached *

View File

@ -519,14 +519,18 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
trans->journal_pin = &as->journal; trans->journal_pin = &as->journal;
for_each_keylist_key(&as->new_keys, k) { for_each_keylist_key(&as->new_keys, k) {
ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k), ret = bch2_trans_mark_key(trans,
bkey_s_c_null,
bkey_i_to_s_c(k),
0, 0, BTREE_TRIGGER_INSERT); 0, 0, BTREE_TRIGGER_INSERT);
if (ret) if (ret)
return ret; return ret;
} }
for_each_keylist_key(&as->old_keys, k) { for_each_keylist_key(&as->old_keys, k) {
ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k), ret = bch2_trans_mark_key(trans,
bkey_i_to_s_c(k),
bkey_s_c_null,
0, 0, BTREE_TRIGGER_OVERWRITE); 0, 0, BTREE_TRIGGER_OVERWRITE);
if (ret) if (ret)
return ret; return ret;

View File

@ -508,6 +508,10 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
/* /*
* Can't be holding any read locks when we go to take write locks: * Can't be holding any read locks when we go to take write locks:
* another thread could be holding an intent lock on the same node we
* have a read lock on, and it'll block trying to take a write lock
* (because we hold a read lock) and it could be blocking us by holding
* its own read lock (while we're trying to to take write locks).
* *
* note - this must be done after bch2_trans_journal_preres_get_cold() * note - this must be done after bch2_trans_journal_preres_get_cold()
* or anything else that might call bch2_trans_relock(), since that * or anything else that might call bch2_trans_relock(), since that
@ -515,9 +519,15 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
*/ */
trans_for_each_iter(trans, iter) { trans_for_each_iter(trans, iter) {
if (iter->nodes_locked != iter->nodes_intent_locked) { if (iter->nodes_locked != iter->nodes_intent_locked) {
EBUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT); if ((iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) ||
EBUG_ON(trans->iters_live & (1ULL << iter->idx)); (trans->iters_live & (1ULL << iter->idx))) {
bch2_btree_iter_unlock_noinline(iter); if (!bch2_btree_iter_upgrade(iter, 1)) {
trace_trans_restart_upgrade(trans->ip);
return -EINTR;
}
} else {
bch2_btree_iter_unlock_noinline(iter);
}
} }
} }

View File

@ -1334,10 +1334,8 @@ static int bch2_mark_key_locked(struct bch_fs *c,
ret = bch2_mark_stripe(c, old, new, fs_usage, journal_seq, flags); ret = bch2_mark_stripe(c, old, new, fs_usage, journal_seq, flags);
break; break;
case KEY_TYPE_inode: case KEY_TYPE_inode:
if (!(flags & BTREE_TRIGGER_OVERWRITE)) fs_usage->nr_inodes += new.k->type == KEY_TYPE_inode;
fs_usage->nr_inodes++; fs_usage->nr_inodes -= old.k->type == KEY_TYPE_inode;
else
fs_usage->nr_inodes--;
break; break;
case KEY_TYPE_reservation: { case KEY_TYPE_reservation: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
@ -1401,10 +1399,10 @@ int bch2_mark_update(struct btree_trans *trans,
old = (struct bkey_s_c) { &unpacked, NULL }; old = (struct bkey_s_c) { &unpacked, NULL };
if (!btree_node_type_is_extents(iter->btree_id)) { if (!btree_node_type_is_extents(iter->btree_id)) {
/* iterators should be uptodate, shouldn't get errors here: */
if (btree_iter_type(iter) != BTREE_ITER_CACHED) { if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
_old = bch2_btree_node_iter_peek(&node_iter, b); old = bch2_btree_iter_peek_slot(iter);
if (_old) BUG_ON(bkey_err(old));
old = bkey_disassemble(b, _old, &unpacked);
} else { } else {
struct bkey_cached *ck = (void *) iter->l[0].b; struct bkey_cached *ck = (void *) iter->l[0].b;
@ -1749,59 +1747,92 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
return 0; return 0;
} }
static int bch2_trans_mark_stripe(struct btree_trans *trans, static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans,
struct bkey_s_c k, const struct bch_extent_ptr *ptr,
unsigned flags) s64 sectors, bool parity)
{ {
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
unsigned nr_data = s->nr_blocks - s->nr_redundant;
struct bch_replicas_padded r;
struct bkey_alloc_unpacked u;
struct bkey_i_alloc *a; struct bkey_i_alloc *a;
struct btree_iter *iter; struct btree_iter *iter;
bool deleting = flags & BTREE_TRIGGER_OVERWRITE; struct bkey_alloc_unpacked u;
s64 sectors = le16_to_cpu(s->sectors); int ret;
ret = bch2_trans_start_alloc_update(trans, &iter, ptr, &u);
if (ret)
return ret;
if (parity) {
u.dirty_sectors += sectors;
u.data_type = u.dirty_sectors
? BCH_DATA_parity
: 0;
}
a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
ret = PTR_ERR_OR_ZERO(a);
if (ret)
goto err;
bkey_alloc_init(&a->k_i);
a->k.p = iter->pos;
bch2_alloc_pack(a, u);
bch2_trans_update(trans, iter, &a->k_i, 0);
err:
bch2_trans_iter_put(trans, iter);
return ret;
}
static int bch2_trans_mark_stripe(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
? bkey_s_c_to_stripe(old).v : NULL;
const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
? bkey_s_c_to_stripe(new).v : NULL;
struct bch_replicas_padded r;
unsigned i; unsigned i;
int ret = 0; int ret = 0;
if (deleting)
sectors = -sectors;
bch2_bkey_to_replicas(&r.e, k);
update_replicas_list(trans, &r.e, sectors * s->nr_redundant);
/* /*
* The allocator code doesn't necessarily update bucket gens in the * If the pointers aren't changing, we don't need to do anything:
* btree when incrementing them, right before handing out new buckets -
* we just need to persist those updates here along with the new stripe:
*/ */
if (new_s && old_s &&
!memcmp(old_s->ptrs, new_s->ptrs,
new_s->nr_blocks * sizeof(struct bch_extent_ptr)))
return 0;
for (i = 0; i < s->nr_blocks && !ret; i++) { if (new_s) {
bool parity = i >= nr_data; unsigned nr_data = new_s->nr_blocks - new_s->nr_redundant;
s64 sectors = le16_to_cpu(new_s->sectors);
ret = bch2_trans_start_alloc_update(trans, &iter, bch2_bkey_to_replicas(&r.e, new);
&s->ptrs[i], &u); update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant);
if (ret)
break;
if (parity) { for (i = 0; i < new_s->nr_blocks; i++) {
u.dirty_sectors += sectors; bool parity = i >= nr_data;
u.data_type = u.dirty_sectors
? BCH_DATA_parity ret = bch2_trans_mark_stripe_alloc_ref(trans,
: 0; &new_s->ptrs[i], sectors, parity);
if (ret)
return ret;
} }
}
a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); if (old_s) {
ret = PTR_ERR_OR_ZERO(a); unsigned nr_data = old_s->nr_blocks - old_s->nr_redundant;
if (ret) s64 sectors = -((s64) le16_to_cpu(old_s->sectors));
goto put_iter;
bkey_alloc_init(&a->k_i); bch2_bkey_to_replicas(&r.e, old);
a->k.p = iter->pos; update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant);
bch2_alloc_pack(a, u);
bch2_trans_update(trans, iter, &a->k_i, 0); for (i = 0; i < old_s->nr_blocks; i++) {
put_iter: bool parity = i >= nr_data;
bch2_trans_iter_put(trans, iter);
ret = bch2_trans_mark_stripe_alloc_ref(trans,
&old_s->ptrs[i], sectors, parity);
if (ret)
return ret;
}
} }
return ret; return ret;
@ -1900,11 +1931,16 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
return ret; return ret;
} }
int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k, int bch2_trans_mark_key(struct btree_trans *trans,
struct bkey_s_c old,
struct bkey_s_c new,
unsigned offset, s64 sectors, unsigned flags) unsigned offset, s64 sectors, unsigned flags)
{ {
struct replicas_delta_list *d;
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old;
struct replicas_delta_list *d;
BUG_ON(!(flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)));
switch (k.k->type) { switch (k.k->type) {
case KEY_TYPE_btree_ptr: case KEY_TYPE_btree_ptr:
@ -1920,15 +1956,18 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
return bch2_trans_mark_extent(trans, k, offset, sectors, return bch2_trans_mark_extent(trans, k, offset, sectors,
flags, BCH_DATA_user); flags, BCH_DATA_user);
case KEY_TYPE_stripe: case KEY_TYPE_stripe:
return bch2_trans_mark_stripe(trans, k, flags); return bch2_trans_mark_stripe(trans, old, new, flags);
case KEY_TYPE_inode: case KEY_TYPE_inode: {
d = replicas_deltas_realloc(trans, 0); int nr = (new.k->type == KEY_TYPE_inode) -
(old.k->type == KEY_TYPE_inode);
if (nr) {
d = replicas_deltas_realloc(trans, 0);
d->nr_inodes += nr;
}
if (!(flags & BTREE_TRIGGER_OVERWRITE))
d->nr_inodes++;
else
d->nr_inodes--;
return 0; return 0;
}
case KEY_TYPE_reservation: { case KEY_TYPE_reservation: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
@ -1952,12 +1991,10 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
int bch2_trans_mark_update(struct btree_trans *trans, int bch2_trans_mark_update(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_i *insert, struct bkey_i *new,
unsigned flags) unsigned flags)
{ {
struct btree *b = iter_l(iter)->b; struct bkey_s_c old;
struct btree_node_iter node_iter = iter_l(iter)->iter;
struct bkey_packed *_k;
int ret; int ret;
if (unlikely(flags & BTREE_TRIGGER_NORUN)) if (unlikely(flags & BTREE_TRIGGER_NORUN))
@ -1966,68 +2003,93 @@ int bch2_trans_mark_update(struct btree_trans *trans,
if (!btree_node_type_needs_gc(iter->btree_id)) if (!btree_node_type_needs_gc(iter->btree_id))
return 0; return 0;
ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(insert), if (!btree_node_type_is_extents(iter->btree_id)) {
0, insert->k.size, BTREE_TRIGGER_INSERT); /* iterators should be uptodate, shouldn't get errors here: */
if (ret) if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
return ret; old = bch2_btree_iter_peek_slot(iter);
BUG_ON(bkey_err(old));
} else {
struct bkey_cached *ck = (void *) iter->l[0].b;
if (btree_iter_type(iter) == BTREE_ITER_CACHED) { BUG_ON(!ck->valid);
struct bkey_cached *ck = (void *) iter->l[0].b; old = bkey_i_to_s_c(ck->k);
}
return bch2_trans_mark_key(trans, bkey_i_to_s_c(ck->k), if (old.k->type == new->k.type) {
0, 0, BTREE_TRIGGER_OVERWRITE); ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
} BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
} else {
while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) { ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
BTREE_TRIGGER_INSERT|flags) ?:
bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
BTREE_TRIGGER_OVERWRITE|flags);
}
} else {
struct btree *b = iter_l(iter)->b;
struct btree_node_iter node_iter = iter_l(iter)->iter;
struct bkey_packed *_old;
struct bkey unpacked; struct bkey unpacked;
struct bkey_s_c k;
unsigned offset = 0;
s64 sectors = 0;
unsigned flags = BTREE_TRIGGER_OVERWRITE;
k = bkey_disassemble(b, _k, &unpacked); EBUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
if (btree_node_is_extents(b) bkey_init(&unpacked);
? bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0 old = (struct bkey_s_c) { &unpacked, NULL };
: bkey_cmp(insert->k.p, k.k->p))
break;
if (btree_node_is_extents(b)) { ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
switch (bch2_extent_overlap(&insert->k, k.k)) { 0, new->k.size,
BTREE_TRIGGER_INSERT);
if (ret)
return ret;
while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) {
unsigned flags = BTREE_TRIGGER_OVERWRITE;
unsigned offset = 0;
s64 sectors;
old = bkey_disassemble(b, _old, &unpacked);
sectors = -((s64) old.k->size);
flags |= BTREE_TRIGGER_OVERWRITE;
if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
return 0;
switch (bch2_extent_overlap(&new->k, old.k)) {
case BCH_EXTENT_OVERLAP_ALL: case BCH_EXTENT_OVERLAP_ALL:
offset = 0; offset = 0;
sectors = -((s64) k.k->size); sectors = -((s64) old.k->size);
break; break;
case BCH_EXTENT_OVERLAP_BACK: case BCH_EXTENT_OVERLAP_BACK:
offset = bkey_start_offset(&insert->k) - offset = bkey_start_offset(&new->k) -
bkey_start_offset(k.k); bkey_start_offset(old.k);
sectors = bkey_start_offset(&insert->k) - sectors = bkey_start_offset(&new->k) -
k.k->p.offset; old.k->p.offset;
break; break;
case BCH_EXTENT_OVERLAP_FRONT: case BCH_EXTENT_OVERLAP_FRONT:
offset = 0; offset = 0;
sectors = bkey_start_offset(k.k) - sectors = bkey_start_offset(old.k) -
insert->k.p.offset; new->k.p.offset;
break; break;
case BCH_EXTENT_OVERLAP_MIDDLE: case BCH_EXTENT_OVERLAP_MIDDLE:
offset = bkey_start_offset(&insert->k) - offset = bkey_start_offset(&new->k) -
bkey_start_offset(k.k); bkey_start_offset(old.k);
sectors = -((s64) insert->k.size); sectors = -((s64) new->k.size);
flags |= BTREE_TRIGGER_OVERWRITE_SPLIT; flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
break; break;
} }
BUG_ON(sectors >= 0); BUG_ON(sectors >= 0);
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
offset, sectors, flags);
if (ret)
return ret;
bch2_btree_node_iter_advance(&node_iter, b);
} }
ret = bch2_trans_mark_key(trans, k, offset, sectors, flags);
if (ret)
return ret;
bch2_btree_node_iter_advance(&node_iter, b);
} }
return 0; return ret;
} }
/* Disk reservations: */ /* Disk reservations: */

View File

@ -264,7 +264,7 @@ int bch2_mark_update(struct btree_trans *, struct btree_iter *,
int bch2_replicas_delta_list_apply(struct bch_fs *, int bch2_replicas_delta_list_apply(struct bch_fs *,
struct bch_fs_usage *, struct bch_fs_usage *,
struct replicas_delta_list *); struct replicas_delta_list *);
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c,
unsigned, s64, unsigned); unsigned, s64, unsigned);
int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter, int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
struct bkey_i *insert, unsigned); struct bkey_i *insert, unsigned);

View File

@ -300,7 +300,7 @@ static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf) static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
{ {
struct bch_stripe *v = &buf->key.v; struct bch_stripe *v = &buf->key.v;
unsigned i, failed[EC_STRIPE_MAX], nr_failed = 0; unsigned i, failed[BCH_BKEY_PTRS_MAX], nr_failed = 0;
unsigned nr_data = v->nr_blocks - v->nr_redundant; unsigned nr_data = v->nr_blocks - v->nr_redundant;
unsigned bytes = buf->size << 9; unsigned bytes = buf->size << 9;
@ -874,7 +874,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
for_each_keylist_key(&s->keys, k) { for_each_keylist_key(&s->keys, k) {
ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k); ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k);
if (ret) { if (ret) {
bch_err(c, "error creating stripe: error updating pointers"); bch_err(c, "error creating stripe: error %i updating pointers", ret);
break; break;
} }
} }
@ -1101,7 +1101,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
s->c = c; s->c = c;
s->h = h; s->h = h;
s->nr_data = min_t(unsigned, h->nr_active_devs, s->nr_data = min_t(unsigned, h->nr_active_devs,
EC_STRIPE_MAX) - h->redundancy; BCH_BKEY_PTRS_MAX) - h->redundancy;
s->nr_parity = h->redundancy; s->nr_parity = h->redundancy;
bch2_keylist_init(&s->keys, s->inline_keys); bch2_keylist_init(&s->keys, s->inline_keys);
@ -1211,13 +1211,13 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
struct open_bucket *ob; struct open_bucket *ob;
unsigned i, nr_have, nr_data = unsigned i, nr_have, nr_data =
min_t(unsigned, h->nr_active_devs, min_t(unsigned, h->nr_active_devs,
EC_STRIPE_MAX) - h->redundancy; BCH_BKEY_PTRS_MAX) - h->redundancy;
bool have_cache = true; bool have_cache = true;
int ret = 0; int ret = 0;
devs = h->devs; devs = h->devs;
for_each_set_bit(i, h->s->blocks_allocated, EC_STRIPE_MAX) { for_each_set_bit(i, h->s->blocks_allocated, BCH_BKEY_PTRS_MAX) {
__clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d); __clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d);
--nr_data; --nr_data;
} }
@ -1341,16 +1341,14 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
if (!h) if (!h)
return NULL; return NULL;
if (!h->s && ec_new_stripe_alloc(c, h)) { if (!h->s) {
bch2_ec_stripe_head_put(c, h); if (ec_new_stripe_alloc(c, h)) {
return NULL; bch2_ec_stripe_head_put(c, h);
} return NULL;
}
if (!h->s->allocated) {
if (!h->s->existing_stripe &&
(idx = get_existing_stripe(c, target, algo, redundancy)) >= 0) {
//pr_info("got existing stripe %llu", idx);
idx = get_existing_stripe(c, target, algo, redundancy);
if (idx >= 0) {
h->s->existing_stripe = true; h->s->existing_stripe = true;
h->s->existing_stripe_idx = idx; h->s->existing_stripe_idx = idx;
if (get_stripe_key(c, idx, &h->s->stripe)) { if (get_stripe_key(c, idx, &h->s->stripe)) {
@ -1364,7 +1362,9 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
ec_block_io(c, &h->s->stripe, READ, i, &cl); ec_block_io(c, &h->s->stripe, READ, i, &cl);
} }
} }
}
if (!h->s->allocated) {
if (!h->s->existing_stripe && if (!h->s->existing_stripe &&
!h->s->res.sectors) { !h->s->res.sectors) {
ret = bch2_disk_reservation_get(c, &h->s->res, ret = bch2_disk_reservation_get(c, &h->s->res,

View File

@ -71,9 +71,9 @@ struct ec_stripe_buf {
/* might not be buffering the entire stripe: */ /* might not be buffering the entire stripe: */
unsigned offset; unsigned offset;
unsigned size; unsigned size;
unsigned long valid[BITS_TO_LONGS(EC_STRIPE_MAX)]; unsigned long valid[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
void *data[EC_STRIPE_MAX]; void *data[BCH_BKEY_PTRS_MAX];
union { union {
struct bkey_i_stripe key; struct bkey_i_stripe key;
@ -101,10 +101,10 @@ struct ec_stripe_new {
bool existing_stripe; bool existing_stripe;
u64 existing_stripe_idx; u64 existing_stripe_idx;
unsigned long blocks_allocated[BITS_TO_LONGS(EC_STRIPE_MAX)]; unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
struct open_buckets blocks; struct open_buckets blocks;
u8 data_block_idx[EC_STRIPE_MAX]; u8 data_block_idx[BCH_BKEY_PTRS_MAX];
struct open_buckets parity; struct open_buckets parity;
struct disk_reservation res; struct disk_reservation res;

View File

@ -4,11 +4,9 @@
#include <linux/llist.h> #include <linux/llist.h>
#define EC_STRIPE_MAX 16
struct bch_replicas_padded { struct bch_replicas_padded {
struct bch_replicas_entry e; struct bch_replicas_entry e;
u8 pad[EC_STRIPE_MAX]; u8 pad[BCH_BKEY_PTRS_MAX];
}; };
struct stripe { struct stripe {
@ -24,7 +22,7 @@ struct stripe {
unsigned dirty:1; unsigned dirty:1;
unsigned on_heap:1; unsigned on_heap:1;
u8 blocks_nonempty; u8 blocks_nonempty;
u16 block_sectors[EC_STRIPE_MAX]; u16 block_sectors[BCH_BKEY_PTRS_MAX];
struct bch_replicas_padded r; struct bch_replicas_padded r;
}; };

View File

@ -1046,11 +1046,13 @@ static const char *extent_ptr_invalid(const struct bch_fs *c,
const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k) const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
{ {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
struct bch_devs_list devs;
const union bch_extent_entry *entry; const union bch_extent_entry *entry;
struct bch_extent_crc_unpacked crc; struct bch_extent_crc_unpacked crc;
unsigned size_ondisk = k.k->size; unsigned size_ondisk = k.k->size;
const char *reason; const char *reason;
unsigned nonce = UINT_MAX; unsigned nonce = UINT_MAX;
unsigned i;
if (k.k->type == KEY_TYPE_btree_ptr) if (k.k->type == KEY_TYPE_btree_ptr)
size_ondisk = c->opts.btree_node_size; size_ondisk = c->opts.btree_node_size;
@ -1101,6 +1103,12 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
} }
} }
devs = bch2_bkey_devs(k);
bubble_sort(devs.devs, devs.nr, u8_cmp);
for (i = 0; i + 1 < devs.nr; i++)
if (devs.devs[i] == devs.devs[i + 1])
return "multiple ptrs to same device";
return NULL; return NULL;
} }

View File

@ -3019,8 +3019,8 @@ static loff_t page_hole_offset(struct address_space *mapping, loff_t offset)
int pg_offset; int pg_offset;
loff_t ret = -1; loff_t ret = -1;
page = find_lock_entry(mapping, index); page = find_lock_page(mapping, index);
if (!page || xa_is_value(page)) if (!page)
return offset; return offset;
pg_offset = __page_hole_offset(page, offset & (PAGE_SIZE - 1)); pg_offset = __page_hole_offset(page, offset & (PAGE_SIZE - 1));

View File

@ -186,34 +186,33 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
static int sum_sector_overwrites(struct btree_trans *trans, static int sum_sector_overwrites(struct btree_trans *trans,
struct btree_iter *extent_iter, struct btree_iter *extent_iter,
struct bkey_i *new, struct bkey_i *new,
bool may_allocate,
bool *maybe_extending, bool *maybe_extending,
s64 *delta) s64 *i_sectors_delta,
s64 *disk_sectors_delta)
{ {
struct btree_iter *iter; struct btree_iter *iter;
struct bkey_s_c old; struct bkey_s_c old;
int ret = 0; int ret = 0;
*maybe_extending = true; *maybe_extending = true;
*delta = 0; *i_sectors_delta = 0;
*disk_sectors_delta = 0;
iter = bch2_trans_copy_iter(trans, extent_iter); iter = bch2_trans_copy_iter(trans, extent_iter);
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) { for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) {
if (!may_allocate && s64 sectors = min(new->k.p.offset, old.k->p.offset) -
bch2_bkey_nr_ptrs_fully_allocated(old) < max(bkey_start_offset(&new->k),
bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new))) { bkey_start_offset(old.k));
ret = -ENOSPC;
break;
}
*delta += (min(new->k.p.offset, *i_sectors_delta += sectors *
old.k->p.offset) -
max(bkey_start_offset(&new->k),
bkey_start_offset(old.k))) *
(bkey_extent_is_allocation(&new->k) - (bkey_extent_is_allocation(&new->k) -
bkey_extent_is_allocation(old.k)); bkey_extent_is_allocation(old.k));
*disk_sectors_delta += sectors *
(int) (bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new)) -
bch2_bkey_nr_ptrs_fully_allocated(old));
if (bkey_cmp(old.k->p, new->k.p) >= 0) { if (bkey_cmp(old.k->p, new->k.p) >= 0) {
/* /*
* Check if there's already data above where we're * Check if there's already data above where we're
@ -247,12 +246,12 @@ int bch2_extent_update(struct btree_trans *trans,
struct disk_reservation *disk_res, struct disk_reservation *disk_res,
u64 *journal_seq, u64 *journal_seq,
u64 new_i_size, u64 new_i_size,
s64 *i_sectors_delta) s64 *i_sectors_delta_total)
{ {
/* this must live until after bch2_trans_commit(): */ /* this must live until after bch2_trans_commit(): */
struct bkey_inode_buf inode_p; struct bkey_inode_buf inode_p;
bool extending = false; bool extending = false;
s64 delta = 0; s64 i_sectors_delta = 0, disk_sectors_delta = 0;
int ret; int ret;
ret = bch2_extent_trim_atomic(k, iter); ret = bch2_extent_trim_atomic(k, iter);
@ -260,16 +259,26 @@ int bch2_extent_update(struct btree_trans *trans,
return ret; return ret;
ret = sum_sector_overwrites(trans, iter, k, ret = sum_sector_overwrites(trans, iter, k,
disk_res && disk_res->sectors != 0, &extending,
&extending, &delta); &i_sectors_delta,
&disk_sectors_delta);
if (ret) if (ret)
return ret; return ret;
if (disk_res &&
disk_sectors_delta > (s64) disk_res->sectors) {
ret = bch2_disk_reservation_add(trans->c, disk_res,
disk_sectors_delta - disk_res->sectors,
0);
if (ret)
return ret;
}
new_i_size = extending new_i_size = extending
? min(k->k.p.offset << 9, new_i_size) ? min(k->k.p.offset << 9, new_i_size)
: 0; : 0;
if (delta || new_i_size) { if (i_sectors_delta || new_i_size) {
struct btree_iter *inode_iter; struct btree_iter *inode_iter;
struct bch_inode_unpacked inode_u; struct bch_inode_unpacked inode_u;
@ -296,9 +305,9 @@ int bch2_extent_update(struct btree_trans *trans,
else else
new_i_size = 0; new_i_size = 0;
inode_u.bi_sectors += delta; inode_u.bi_sectors += i_sectors_delta;
if (delta || new_i_size) { if (i_sectors_delta || new_i_size) {
bch2_inode_pack(trans->c, &inode_p, &inode_u); bch2_inode_pack(trans->c, &inode_p, &inode_u);
bch2_trans_update(trans, inode_iter, bch2_trans_update(trans, inode_iter,
&inode_p.inode.k_i, 0); &inode_p.inode.k_i, 0);
@ -313,10 +322,12 @@ int bch2_extent_update(struct btree_trans *trans,
BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE); BTREE_INSERT_USE_RESERVE);
if (!ret && i_sectors_delta) if (ret)
*i_sectors_delta += delta; return ret;
return ret; if (i_sectors_delta_total)
*i_sectors_delta_total += i_sectors_delta;
return 0;
} }
int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,

View File

@ -443,20 +443,6 @@ unlock:
if (!ret) if (!ret)
goto retry; goto retry;
if (WARN_ONCE(ret == cur_entry_journal_full &&
!can_discard &&
(flags & JOURNAL_RES_GET_RESERVED),
"JOURNAL_RES_GET_RESERVED set but journal full")) {
char *buf;
buf = kmalloc(4096, GFP_NOFS);
if (buf) {
bch2_journal_debug_to_text(&_PBUF(buf, 4096), j);
pr_err("\n%s", buf);
kfree(buf);
}
}
/* /*
* Journal is full - can't rely on reclaim from work item due to * Journal is full - can't rely on reclaim from work item due to
* freezing: * freezing:
@ -1137,7 +1123,7 @@ out:
/* debug: */ /* debug: */
void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
union journal_res_state s; union journal_res_state s;
@ -1145,7 +1131,6 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
unsigned i; unsigned i;
rcu_read_lock(); rcu_read_lock();
spin_lock(&j->lock);
s = READ_ONCE(j->reservations); s = READ_ONCE(j->reservations);
pr_buf(out, pr_buf(out,
@ -1245,10 +1230,16 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
ja->cur_idx, ja->bucket_seq[ja->cur_idx]); ja->cur_idx, ja->bucket_seq[ja->cur_idx]);
} }
spin_unlock(&j->lock);
rcu_read_unlock(); rcu_read_unlock();
} }
void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
{
spin_lock(&j->lock);
__bch2_journal_debug_to_text(out, j);
spin_unlock(&j->lock);
}
void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
{ {
struct journal_entry_pin_list *pin_list; struct journal_entry_pin_list *pin_list;

View File

@ -384,7 +384,7 @@ out:
static inline bool journal_check_may_get_unreserved(struct journal *j) static inline bool journal_check_may_get_unreserved(struct journal *j)
{ {
union journal_preres_state s = READ_ONCE(j->prereserved); union journal_preres_state s = READ_ONCE(j->prereserved);
bool ret = s.reserved <= s.remaining && bool ret = s.reserved < s.remaining &&
fifo_free(&j->pin) > 8; fifo_free(&j->pin) > 8;
lockdep_assert_held(&j->lock); lockdep_assert_held(&j->lock);
@ -508,6 +508,7 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
void bch2_journal_unblock(struct journal *); void bch2_journal_unblock(struct journal *);
void bch2_journal_block(struct journal *); void bch2_journal_block(struct journal *);
void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
void bch2_journal_debug_to_text(struct printbuf *, struct journal *); void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
void bch2_journal_pins_to_text(struct printbuf *, struct journal *); void bch2_journal_pins_to_text(struct printbuf *, struct journal *);

View File

@ -1099,7 +1099,6 @@ static void journal_write_done(struct closure *cl)
if (!w->noflush) { if (!w->noflush) {
j->flushed_seq_ondisk = seq; j->flushed_seq_ondisk = seq;
j->last_seq_ondisk = last_seq; j->last_seq_ondisk = last_seq;
bch2_journal_space_available(j);
} }
/* /*
@ -1123,6 +1122,8 @@ static void journal_write_done(struct closure *cl)
} while ((v = atomic64_cmpxchg(&j->reservations.counter, } while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v); old.v, new.v)) != old.v);
bch2_journal_space_available(j);
closure_wake_up(&w->wait); closure_wake_up(&w->wait);
journal_wake(j); journal_wake(j);

View File

@ -2,6 +2,7 @@
#include "bcachefs.h" #include "bcachefs.h"
#include "btree_key_cache.h" #include "btree_key_cache.h"
#include "error.h"
#include "journal.h" #include "journal.h"
#include "journal_io.h" #include "journal_io.h"
#include "journal_reclaim.h" #include "journal_reclaim.h"
@ -159,7 +160,7 @@ void bch2_journal_space_available(struct journal *j)
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca; struct bch_dev *ca;
unsigned clean, clean_ondisk, total; unsigned clean, clean_ondisk, total;
unsigned overhead, u64s_remaining = 0; s64 u64s_remaining = 0;
unsigned max_entry_size = min(j->buf[0].buf_size >> 9, unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
j->buf[1].buf_size >> 9); j->buf[1].buf_size >> 9);
unsigned i, nr_online = 0, nr_devs_want; unsigned i, nr_online = 0, nr_devs_want;
@ -208,22 +209,38 @@ void bch2_journal_space_available(struct journal *j)
clean = j->space[journal_space_clean].total; clean = j->space[journal_space_clean].total;
total = j->space[journal_space_total].total; total = j->space[journal_space_total].total;
if (!j->space[journal_space_discarded].next_entry) if (!clean_ondisk &&
j->reservations.idx ==
j->reservations.unwritten_idx) {
char *buf = kmalloc(4096, GFP_ATOMIC);
bch_err(c, "journal stuck");
if (buf) {
__bch2_journal_debug_to_text(&_PBUF(buf, 4096), j);
pr_err("\n%s", buf);
kfree(buf);
}
bch2_fatal_error(c);
ret = cur_entry_journal_stuck;
} else if (!j->space[journal_space_discarded].next_entry)
ret = cur_entry_journal_full; ret = cur_entry_journal_full;
else if (!fifo_free(&j->pin)) else if (!fifo_free(&j->pin))
ret = cur_entry_journal_pin_full; ret = cur_entry_journal_pin_full;
if ((clean - clean_ondisk <= total / 8) && if ((j->space[journal_space_clean_ondisk].next_entry <
j->space[journal_space_clean_ondisk].total) &&
(clean - clean_ondisk <= total / 8) &&
(clean_ondisk * 2 > clean )) (clean_ondisk * 2 > clean ))
set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
else else
clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
overhead = DIV_ROUND_UP(clean, max_entry_size) * u64s_remaining = (u64) clean << 6;
journal_entry_overhead(j); u64s_remaining -= (u64) total << 3;
u64s_remaining = clean << 6; u64s_remaining = max(0LL, u64s_remaining);
u64s_remaining = max_t(int, 0, u64s_remaining - overhead); u64s_remaining /= 2;
u64s_remaining /= 4; u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
out: out:
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0; j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
j->cur_entry_error = ret; j->cur_entry_error = ret;
@ -367,12 +384,22 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
struct journal_entry_pin_list *pin_list; struct journal_entry_pin_list *pin_list;
spin_lock(&j->lock); spin_lock(&j->lock);
if (seq < journal_last_seq(j)) {
/*
* bch2_journal_pin_copy() raced with bch2_journal_pin_drop() on
* the src pin - with the pin dropped, the entry to pin might no
* longer to exist, but that means there's no longer anything to
* copy and we can bail out here:
*/
spin_unlock(&j->lock);
return;
}
pin_list = journal_seq_pin(j, seq); pin_list = journal_seq_pin(j, seq);
__journal_pin_drop(j, pin); __journal_pin_drop(j, pin);
BUG_ON(!atomic_read(&pin_list->count) && seq == journal_last_seq(j));
atomic_inc(&pin_list->count); atomic_inc(&pin_list->count);
pin->seq = seq; pin->seq = seq;
pin->flush = flush_fn; pin->flush = flush_fn;
@ -572,6 +599,9 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
c->btree_cache.used * 3) c->btree_cache.used * 3)
min_nr = 1; min_nr = 1;
if (fifo_free(&j->pin) <= 32)
min_nr = 1;
min_nr = max(min_nr, bch2_nr_btree_keys_need_flush(c)); min_nr = max(min_nr, bch2_nr_btree_keys_need_flush(c));
trace_journal_reclaim_start(c, trace_journal_reclaim_start(c,
@ -590,7 +620,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
else else
j->nr_background_reclaim += nr_flushed; j->nr_background_reclaim += nr_flushed;
trace_journal_reclaim_finish(c, nr_flushed); trace_journal_reclaim_finish(c, nr_flushed);
} while (min_nr); } while (min_nr && nr_flushed);
memalloc_noreclaim_restore(flags); memalloc_noreclaim_restore(flags);

View File

@ -53,8 +53,11 @@ static inline void bch2_journal_pin_copy(struct journal *j,
struct journal_entry_pin *src, struct journal_entry_pin *src,
journal_pin_flush_fn flush_fn) journal_pin_flush_fn flush_fn)
{ {
if (journal_pin_active(src)) /* Guard against racing with journal_pin_drop(src): */
bch2_journal_pin_add(j, src->seq, dst, flush_fn); u64 seq = READ_ONCE(src->seq);
if (seq)
bch2_journal_pin_add(j, seq, dst, flush_fn);
} }
static inline void bch2_journal_pin_update(struct journal *j, u64 seq, static inline void bch2_journal_pin_update(struct journal *j, u64 seq,

View File

@ -172,6 +172,7 @@ struct journal {
cur_entry_blocked, cur_entry_blocked,
cur_entry_journal_full, cur_entry_journal_full,
cur_entry_journal_pin_full, cur_entry_journal_pin_full,
cur_entry_journal_stuck,
cur_entry_insufficient_devices, cur_entry_insufficient_devices,
} cur_entry_error; } cur_entry_error;

View File

@ -458,7 +458,9 @@ retry:
bch2_btree_iter_set_pos(iter, split->k.p); bch2_btree_iter_set_pos(iter, split->k.p);
if (remark) { if (remark) {
ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(split), ret = bch2_trans_mark_key(&trans,
bkey_s_c_null,
bkey_i_to_s_c(split),
0, split->k.size, 0, split->k.size,
BTREE_TRIGGER_INSERT); BTREE_TRIGGER_INSERT);
if (ret) if (ret)
@ -467,7 +469,9 @@ retry:
} while (bkey_cmp(iter->pos, k->k.p) < 0); } while (bkey_cmp(iter->pos, k->k.p) < 0);
if (remark) { if (remark) {
ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k), ret = bch2_trans_mark_key(&trans,
bkey_i_to_s_c(k),
bkey_s_c_null,
0, -((s64) k->k.size), 0, -((s64) k->k.size),
BTREE_TRIGGER_OVERWRITE); BTREE_TRIGGER_OVERWRITE);
if (ret) if (ret)

View File

@ -11,11 +11,6 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
/* Replicas tracking - in memory: */ /* Replicas tracking - in memory: */
static inline int u8_cmp(u8 l, u8 r)
{
return cmp_int(l, r);
}
static void verify_replicas_entry(struct bch_replicas_entry *e) static void verify_replicas_entry(struct bch_replicas_entry *e)
{ {
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG

View File

@ -614,9 +614,6 @@ got_super:
bdev_logical_block_size(sb->bdev)) bdev_logical_block_size(sb->bdev))
goto err; goto err;
if (sb->mode & FMODE_WRITE)
bdev_get_queue(sb->bdev)->backing_dev_info->capabilities
|= BDI_CAP_STABLE_WRITES;
ret = 0; ret = 0;
sb->have_layout = true; sb->have_layout = true;
out: out:

View File

@ -20,7 +20,7 @@ struct bch_devs_mask {
struct bch_devs_list { struct bch_devs_list {
u8 nr; u8 nr;
u8 devs[BCH_REPLICAS_MAX + 1]; u8 devs[BCH_BKEY_PTRS_MAX];
}; };
struct bch_member_cpu { struct bch_member_cpu {

View File

@ -563,15 +563,14 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos)
iter = bch2_trans_get_iter(trans, BTREE_ID_XATTRS, pos, iter = bch2_trans_get_iter(trans, BTREE_ID_XATTRS, pos,
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
ret = PTR_ERR_OR_ZERO(iter);
if (ret)
goto err;
k = bch2_btree_iter_peek(iter); k = bch2_btree_iter_peek(iter);
ret = bkey_err(k); ret = bkey_err(k);
if (ret) if (ret)
goto err; goto err;
if (!k.k)
goto err;
bkey_init(&delete.k); bkey_init(&delete.k);
delete.k.p = k.k->p; delete.k.p = k.k->p;

View File

@ -747,4 +747,9 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned);
#define cmp_int(l, r) ((l > r) - (l < r)) #define cmp_int(l, r) ((l > r) - (l < r))
static inline int u8_cmp(u8 l, u8 r)
{
return cmp_int(l, r);
}
#endif /* _BCACHEFS_UTIL_H */ #endif /* _BCACHEFS_UTIL_H */