Update bcachefs sources to fcf8a0889c bcachefs: bch2_alloc_write() should be writing for all devices

This commit is contained in:
Kent Overstreet 2021-01-07 19:49:15 -05:00
parent 41dc1733f1
commit f39f0bde78
40 changed files with 589 additions and 441 deletions

View File

@ -1 +1 @@
5241335413ef160e309fd41ab909532fec656a3a
fcf8a0889c125511ae841960c73df62237ab05a7

View File

@ -301,7 +301,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
while (length) {
struct bkey_i_extent *e;
BKEY_PADDED(k) k;
__BKEY_PADDED(k, BKEY_EXTENT_VAL_U64s_MAX) k;
u64 b = sector_to_bucket(ca, physical);
struct disk_reservation res;
unsigned sectors;

View File

@ -319,9 +319,7 @@ retry:
bch2_trans_update(trans, iter, &a->k_i,
BTREE_TRIGGER_NORUN);
ret = bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
flags);
BTREE_INSERT_NOFAIL|flags);
err:
if (ret == -EINTR)
goto retry;
@ -368,7 +366,7 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags)
unsigned i;
int ret = 0;
for_each_rw_member(ca, c, i) {
for_each_member_device(ca, c, i) {
bch2_dev_alloc_write(c, ca, flags);
if (ret) {
percpu_ref_put(&ca->io_ref);
@ -575,8 +573,7 @@ static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca)
if (available > fifo_free(&ca->free_inc) ||
(available &&
(!fifo_full(&ca->free[RESERVE_BTREE]) ||
!fifo_full(&ca->free[RESERVE_MOVINGGC]))))
!fifo_full(&ca->free[RESERVE_MOVINGGC])))
break;
up_read(&c->gc_lock);
@ -977,8 +974,7 @@ retry:
BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
BTREE_INSERT_JOURNAL_RESERVED|
flags);
if (ret == -EINTR)
goto retry;

View File

@ -204,9 +204,10 @@ success:
static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
{
switch (reserve) {
case RESERVE_ALLOC:
return 0;
case RESERVE_BTREE:
case RESERVE_BTREE_MOVINGGC:
return 0;
case RESERVE_MOVINGGC:
return OPEN_BUCKETS_COUNT / 4;
default:
return OPEN_BUCKETS_COUNT / 2;
@ -263,16 +264,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
goto out;
switch (reserve) {
case RESERVE_ALLOC:
if (fifo_pop(&ca->free[RESERVE_BTREE], bucket))
goto out;
break;
case RESERVE_BTREE:
if (fifo_used(&ca->free[RESERVE_BTREE]) * 2 >=
ca->free[RESERVE_BTREE].size &&
fifo_pop(&ca->free[RESERVE_BTREE], bucket))
goto out;
break;
case RESERVE_BTREE_MOVINGGC:
case RESERVE_MOVINGGC:
if (fifo_pop(&ca->free[RESERVE_MOVINGGC], bucket))
goto out;
@ -458,16 +450,18 @@ bch2_bucket_alloc_set(struct bch_fs *c,
* it's to a device we don't want:
*/
static void bucket_alloc_from_stripe(struct bch_fs *c,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_mask *devs_may_alloc,
u16 target,
unsigned erasure_code,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
unsigned flags)
static enum bucket_alloc_ret
bucket_alloc_from_stripe(struct bch_fs *c,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_mask *devs_may_alloc,
u16 target,
unsigned erasure_code,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
unsigned flags,
struct closure *cl)
{
struct dev_alloc_list devs_sorted;
struct ec_stripe_head *h;
@ -476,17 +470,21 @@ static void bucket_alloc_from_stripe(struct bch_fs *c,
unsigned i, ec_idx;
if (!erasure_code)
return;
return 0;
if (nr_replicas < 2)
return;
return 0;
if (ec_open_bucket(c, ptrs))
return;
return 0;
h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1);
h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1,
wp == &c->copygc_write_point,
cl);
if (IS_ERR(h))
return -PTR_ERR(h);
if (!h)
return;
return 0;
devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
@ -508,6 +506,7 @@ got_bucket:
atomic_inc(&h->s->pin);
out_put_head:
bch2_ec_stripe_head_put(c, h);
return 0;
}
/* Sector allocator */
@ -585,10 +584,13 @@ open_bucket_add_buckets(struct bch_fs *c,
}
if (!ec_open_bucket(c, ptrs)) {
bucket_alloc_from_stripe(c, ptrs, wp, &devs,
ret = bucket_alloc_from_stripe(c, ptrs, wp, &devs,
target, erasure_code,
nr_replicas, nr_effective,
have_cache, flags);
have_cache, flags, _cl);
if (ret == FREELIST_EMPTY ||
ret == OPEN_BUCKETS_EMPTY)
return ret;
if (*nr_effective >= nr_replicas)
return 0;
}

View File

@ -34,14 +34,12 @@ struct bucket_clock {
struct mutex lock;
};
/* There is one reserve for each type of btree, one for prios and gens
* and one for moving GC */
enum alloc_reserve {
RESERVE_ALLOC = -1,
RESERVE_BTREE = 0,
RESERVE_MOVINGGC = 1,
RESERVE_NONE = 2,
RESERVE_NR = 3,
RESERVE_BTREE_MOVINGGC = -2,
RESERVE_BTREE = -1,
RESERVE_MOVINGGC = 0,
RESERVE_NONE = 1,
RESERVE_NR = 2,
};
typedef FIFO(long) alloc_fifo;
@ -89,7 +87,6 @@ struct write_point {
u64 last_used;
unsigned long write_point;
enum bch_data_type type;
bool is_ec;
/* calculated based on how many pointers we're actually going to use: */
unsigned sectors_free;

View File

@ -510,7 +510,7 @@ enum {
/* misc: */
BCH_FS_FIXED_GENS,
BCH_FS_ALLOC_WRITTEN,
BCH_FS_NEED_ALLOC_WRITE,
BCH_FS_REBUILD_REPLICAS,
BCH_FS_HOLD_BTREE_WRITES,
};

View File

@ -634,8 +634,6 @@ struct bch_reservation {
#define BKEY_EXTENT_VAL_U64s_MAX \
(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
#define BKEY_PADDED(key) __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
/* * Maximum possible size of an entire extent, key + value: */
#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)

60
libbcachefs/bkey_buf.h Normal file
View File

@ -0,0 +1,60 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BKEY_BUF_H
#define _BCACHEFS_BKEY_BUF_H
#include "bcachefs.h"
struct bkey_buf {
struct bkey_i *k;
u64 onstack[12];
};
static inline void bch2_bkey_buf_realloc(struct bkey_buf *s,
struct bch_fs *c, unsigned u64s)
{
if (s->k == (void *) s->onstack &&
u64s > ARRAY_SIZE(s->onstack)) {
s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
memcpy(s->k, s->onstack, sizeof(s->onstack));
}
}
static inline void bch2_bkey_buf_reassemble(struct bkey_buf *s,
struct bch_fs *c,
struct bkey_s_c k)
{
bch2_bkey_buf_realloc(s, c, k.k->u64s);
bkey_reassemble(s->k, k);
}
static inline void bch2_bkey_buf_copy(struct bkey_buf *s,
struct bch_fs *c,
struct bkey_i *src)
{
bch2_bkey_buf_realloc(s, c, src->k.u64s);
bkey_copy(s->k, src);
}
static inline void bch2_bkey_buf_unpack(struct bkey_buf *s,
struct bch_fs *c,
struct btree *b,
struct bkey_packed *src)
{
bch2_bkey_buf_realloc(s, c, BKEY_U64s +
bkeyp_val_u64s(&b->format, src));
bch2_bkey_unpack(b, s->k, src);
}
static inline void bch2_bkey_buf_init(struct bkey_buf *s)
{
s->k = (void *) s->onstack;
}
static inline void bch2_bkey_buf_exit(struct bkey_buf *s, struct bch_fs *c)
{
if (s->k != (void *) s->onstack)
mempool_free(s->k, &c->large_bkey_pool);
s->k = NULL;
}
#endif /* _BCACHEFS_BKEY_BUF_H */

View File

@ -1,43 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BKEY_ON_STACK_H
#define _BCACHEFS_BKEY_ON_STACK_H
#include "bcachefs.h"
struct bkey_on_stack {
struct bkey_i *k;
u64 onstack[12];
};
static inline void bkey_on_stack_realloc(struct bkey_on_stack *s,
struct bch_fs *c, unsigned u64s)
{
if (s->k == (void *) s->onstack &&
u64s > ARRAY_SIZE(s->onstack)) {
s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
memcpy(s->k, s->onstack, sizeof(s->onstack));
}
}
static inline void bkey_on_stack_reassemble(struct bkey_on_stack *s,
struct bch_fs *c,
struct bkey_s_c k)
{
bkey_on_stack_realloc(s, c, k.k->u64s);
bkey_reassemble(s->k, k);
}
static inline void bkey_on_stack_init(struct bkey_on_stack *s)
{
s->k = (void *) s->onstack;
}
static inline void bkey_on_stack_exit(struct bkey_on_stack *s,
struct bch_fs *c)
{
if (s->k != (void *) s->onstack)
mempool_free(s->k, &c->large_bkey_pool);
s->k = NULL;
}
#endif /* _BCACHEFS_BKEY_ON_STACK_H */

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "bkey_on_stack.h"
#include "bkey_buf.h"
#include "bkey_sort.h"
#include "bset.h"
#include "extents.h"
@ -187,11 +187,11 @@ bch2_sort_repack_merge(struct bch_fs *c,
bool filter_whiteouts)
{
struct bkey_packed *out = vstruct_last(dst), *k_packed;
struct bkey_on_stack k;
struct bkey_buf k;
struct btree_nr_keys nr;
memset(&nr, 0, sizeof(nr));
bkey_on_stack_init(&k);
bch2_bkey_buf_init(&k);
while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
if (filter_whiteouts && bkey_whiteout(k_packed))
@ -204,7 +204,7 @@ bch2_sort_repack_merge(struct bch_fs *c,
* node; we have to make a copy of the entire key before calling
* normalize
*/
bkey_on_stack_realloc(&k, c, k_packed->u64s + BKEY_U64s);
bch2_bkey_buf_realloc(&k, c, k_packed->u64s + BKEY_U64s);
bch2_bkey_unpack(src, k.k, k_packed);
if (filter_whiteouts &&
@ -215,7 +215,7 @@ bch2_sort_repack_merge(struct bch_fs *c,
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
bkey_on_stack_exit(&k, c);
bch2_bkey_buf_exit(&k, c);
return nr;
}
@ -315,11 +315,11 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
struct bkey l_unpacked, r_unpacked;
struct bkey_s l, r;
struct btree_nr_keys nr;
struct bkey_on_stack split;
struct bkey_buf split;
unsigned i;
memset(&nr, 0, sizeof(nr));
bkey_on_stack_init(&split);
bch2_bkey_buf_init(&split);
sort_iter_sort(iter, extent_sort_fix_overlapping_cmp);
for (i = 0; i < iter->used;) {
@ -379,7 +379,7 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
/*
* r wins, but it overlaps in the middle of l - split l:
*/
bkey_on_stack_reassemble(&split, c, l.s_c);
bch2_bkey_buf_reassemble(&split, c, l.s_c);
bch2_cut_back(bkey_start_pos(r.k), split.k);
bch2_cut_front_s(r.k->p, l);
@ -398,7 +398,7 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
bkey_on_stack_exit(&split, c);
bch2_bkey_buf_exit(&split, c);
return nr;
}

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_io.h"
#include "btree_iter.h"
@ -898,10 +899,12 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
struct btree *parent;
struct btree_node_iter node_iter;
struct bkey_packed *k;
BKEY_PADDED(k) tmp;
struct bkey_buf tmp;
struct btree *ret = NULL;
unsigned level = b->c.level;
bch2_bkey_buf_init(&tmp);
parent = btree_iter_node(iter, level + 1);
if (!parent)
return NULL;
@ -935,9 +938,9 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
if (!k)
goto out;
bch2_bkey_unpack(parent, &tmp.k, k);
bch2_bkey_buf_unpack(&tmp, c, parent, k);
ret = bch2_btree_node_get(c, iter, &tmp.k, level,
ret = bch2_btree_node_get(c, iter, tmp.k, level,
SIX_LOCK_intent, _THIS_IP_);
if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
@ -957,7 +960,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
if (sib == btree_prev_sib)
btree_node_unlock(iter, level);
ret = bch2_btree_node_get(c, iter, &tmp.k, level,
ret = bch2_btree_node_get(c, iter, tmp.k, level,
SIX_LOCK_intent, _THIS_IP_);
/*
@ -998,6 +1001,8 @@ out:
bch2_btree_trans_verify_locks(trans);
bch2_bkey_buf_exit(&tmp, c);
return ret;
}

View File

@ -8,7 +8,7 @@
#include "alloc_background.h"
#include "alloc_foreground.h"
#include "bkey_methods.h"
#include "bkey_on_stack.h"
#include "bkey_buf.h"
#include "btree_locking.h"
#include "btree_update_interior.h"
#include "btree_io.h"
@ -132,6 +132,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
ptr->gen)) {
g2->_mark.gen = g->_mark.gen = ptr->gen;
g2->gen_valid = g->gen_valid = true;
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
}
if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
@ -145,6 +146,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
g2->_mark.dirty_sectors = 0;
g2->_mark.cached_sectors = 0;
set_bit(BCH_FS_FIXED_GENS, &c->flags);
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
}
}
}
@ -233,7 +235,6 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
if (max_stale > 64)
bch2_btree_node_rewrite(c, iter,
b->data->keys.seq,
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
else if (!bch2_btree_gc_rewrite_disabled &&
@ -268,10 +269,12 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
struct btree_and_journal_iter iter;
struct bkey_s_c k;
struct bpos next_node_start = b->data->min_key;
struct bkey_buf tmp;
u8 max_stale = 0;
int ret = 0;
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
bch2_bkey_buf_init(&tmp);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
bch2_bkey_debugcheck(c, b, k);
@ -285,10 +288,9 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
if (b->c.level) {
struct btree *child;
BKEY_PADDED(k) tmp;
bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k);
bch2_bkey_buf_reassemble(&tmp, c, k);
k = bkey_i_to_s_c(tmp.k);
bch2_btree_and_journal_iter_advance(&iter);
@ -300,7 +302,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
break;
if (b->c.level > target_depth) {
child = bch2_btree_node_get_noiter(c, &tmp.k,
child = bch2_btree_node_get_noiter(c, tmp.k,
b->c.btree_id, b->c.level - 1);
ret = PTR_ERR_OR_ZERO(child);
if (ret)
@ -318,6 +320,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
}
}
bch2_bkey_buf_exit(&tmp, c);
return ret;
}
@ -570,7 +573,7 @@ static int bch2_gc_done(struct bch_fs *c,
fsck_err(c, _msg ": got %llu, should be %llu" \
, ##__VA_ARGS__, dst->_f, src->_f); \
dst->_f = src->_f; \
ret = 1; \
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
}
#define copy_stripe_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \
@ -581,7 +584,7 @@ static int bch2_gc_done(struct bch_fs *c,
dst->_f, src->_f); \
dst->_f = src->_f; \
dst->dirty = true; \
ret = 1; \
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
}
#define copy_bucket_field(_f) \
if (dst->b[b].mark._f != src->b[b].mark._f) { \
@ -592,7 +595,7 @@ static int bch2_gc_done(struct bch_fs *c,
bch2_data_types[dst->b[b].mark.data_type],\
dst->b[b].mark._f, src->b[b].mark._f); \
dst->b[b]._mark._f = src->b[b].mark._f; \
ret = 1; \
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
}
#define copy_dev_field(_f, _msg, ...) \
copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
@ -930,10 +933,10 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_on_stack sk;
struct bkey_buf sk;
int ret = 0;
bkey_on_stack_init(&sk);
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
@ -942,7 +945,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k))) {
if (gc_btree_gens_key(c, k)) {
bkey_on_stack_reassemble(&sk, c, k);
bch2_bkey_buf_reassemble(&sk, c, k);
bch2_extent_normalize(c, bkey_i_to_s(sk.k));
bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
@ -962,7 +965,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
}
bch2_trans_exit(&trans);
bkey_on_stack_exit(&sk, c);
bch2_bkey_buf_exit(&sk, c);
return ret;
}
@ -1074,7 +1077,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
}
if (bch2_keylist_realloc(&keylist, NULL, 0,
(BKEY_U64s + BKEY_EXTENT_U64s_MAX) * nr_old_nodes)) {
BKEY_BTREE_PTR_U64s_MAX * nr_old_nodes)) {
trace_btree_gc_coalesce_fail(c,
BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC);
return;

View File

@ -1320,12 +1320,13 @@ static void bch2_btree_node_write_error(struct bch_fs *c,
struct btree_write_bio *wbio)
{
struct btree *b = wbio->wbio.bio.bi_private;
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
struct bkey_buf k;
struct bch_extent_ptr *ptr;
struct btree_trans trans;
struct btree_iter *iter;
int ret;
bch2_bkey_buf_init(&k);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_node_iter(&trans, b->c.btree_id, b->key.k.p,
@ -1344,21 +1345,22 @@ retry:
BUG_ON(!btree_node_hashed(b));
bkey_copy(&tmp.k, &b->key);
bch2_bkey_buf_copy(&k, c, &b->key);
bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr,
bch2_bkey_drop_ptrs(bkey_i_to_s(k.k), ptr,
bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&tmp.k)))
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(k.k)))
goto err;
ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
ret = bch2_btree_node_update_key(c, iter, b, k.k);
if (ret == -EINTR)
goto retry;
if (ret)
goto err;
out:
bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&k, c);
bio_put(&wbio->wbio.bio);
btree_node_write_done(c, b);
return;
@ -1476,7 +1478,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
struct bset *i;
struct btree_node *bn = NULL;
struct btree_node_entry *bne = NULL;
BKEY_PADDED(key) k;
struct bkey_buf k;
struct bch_extent_ptr *ptr;
struct sort_iter sort_iter;
struct nonce nonce;
@ -1487,6 +1489,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
bool validate_before_checksum = false;
void *data;
bch2_bkey_buf_init(&k);
if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
return;
@ -1695,15 +1699,16 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
* just make all btree node writes FUA to keep things sane.
*/
bkey_copy(&k.key, &b->key);
bch2_bkey_buf_copy(&k, c, &b->key);
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr)
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(k.k)), ptr)
ptr->offset += b->written;
b->written += sectors_to_write;
/* XXX: submitting IO with btree locks held: */
bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, &k.key);
bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, k.k);
bch2_bkey_buf_exit(&k, c);
return;
err:
set_btree_node_noevict(b);

View File

@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "bkey_methods.h"
#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_iter.h"
#include "btree_key_cache.h"
@ -1048,27 +1049,31 @@ static void btree_iter_prefetch(struct btree_iter *iter)
struct btree_iter_level *l = &iter->l[iter->level];
struct btree_node_iter node_iter = l->iter;
struct bkey_packed *k;
BKEY_PADDED(k) tmp;
struct bkey_buf tmp;
unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
? (iter->level > 1 ? 0 : 2)
: (iter->level > 1 ? 1 : 16);
bool was_locked = btree_node_locked(iter, iter->level);
bch2_bkey_buf_init(&tmp);
while (nr) {
if (!bch2_btree_node_relock(iter, iter->level))
return;
break;
bch2_btree_node_iter_advance(&node_iter, l->b);
k = bch2_btree_node_iter_peek(&node_iter, l->b);
if (!k)
break;
bch2_bkey_unpack(l->b, &tmp.k, k);
bch2_btree_node_prefetch(c, iter, &tmp.k, iter->level - 1);
bch2_bkey_buf_unpack(&tmp, c, l->b, k);
bch2_btree_node_prefetch(c, iter, tmp.k, iter->level - 1);
}
if (!was_locked)
btree_node_unlock(iter, iter->level);
bch2_bkey_buf_exit(&tmp, c);
}
static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
@ -1100,30 +1105,34 @@ static __always_inline int btree_iter_down(struct btree_iter *iter,
struct btree *b;
unsigned level = iter->level - 1;
enum six_lock_type lock_type = __btree_lock_want(iter, level);
BKEY_PADDED(k) tmp;
struct bkey_buf tmp;
int ret;
EBUG_ON(!btree_node_locked(iter, iter->level));
bch2_bkey_unpack(l->b, &tmp.k,
bch2_bkey_buf_init(&tmp);
bch2_bkey_buf_unpack(&tmp, c, l->b,
bch2_btree_node_iter_peek(&l->iter, l->b));
b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, trace_ip);
if (unlikely(IS_ERR(b)))
return PTR_ERR(b);
b = bch2_btree_node_get(c, iter, tmp.k, level, lock_type, trace_ip);
ret = PTR_ERR_OR_ZERO(b);
if (unlikely(ret))
goto err;
mark_btree_node_locked(iter, level, lock_type);
btree_iter_node_set(iter, b);
if (tmp.k.k.type == KEY_TYPE_btree_ptr_v2 &&
unlikely(b != btree_node_mem_ptr(&tmp.k)))
if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 &&
unlikely(b != btree_node_mem_ptr(tmp.k)))
btree_node_mem_ptr_set(iter, level + 1, b);
if (iter->flags & BTREE_ITER_PREFETCH)
btree_iter_prefetch(iter);
iter->level = level;
return 0;
err:
bch2_bkey_buf_exit(&tmp, c);
return ret;
}
static void btree_iter_up(struct btree_iter *iter)
@ -2124,9 +2133,12 @@ static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
iter->flags &= ~BTREE_ITER_USER_FLAGS;
iter->flags |= flags & BTREE_ITER_USER_FLAGS;
if (iter->flags & BTREE_ITER_INTENT)
bch2_btree_iter_upgrade(iter, 1);
else
if (iter->flags & BTREE_ITER_INTENT) {
if (!iter->locks_want) {
__bch2_btree_iter_unlock(iter);
iter->locks_want = 1;
}
} else
bch2_btree_iter_downgrade(iter);
BUG_ON(iter->btree_id != btree_id);

View File

@ -349,8 +349,6 @@ retry:
BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
BTREE_INSERT_JOURNAL_RESERVED|
BTREE_INSERT_JOURNAL_RECLAIM);
err:

View File

@ -57,7 +57,7 @@ struct btree_write {
struct btree_alloc {
struct open_buckets ob;
BKEY_PADDED(k);
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX);
};
struct btree_bkey_cached_common {

View File

@ -20,7 +20,6 @@ enum btree_insert_flags {
__BTREE_INSERT_NOCHECK_RW,
__BTREE_INSERT_LAZY_RW,
__BTREE_INSERT_USE_RESERVE,
__BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY,
__BTREE_INSERT_JOURNAL_RESERVED,
__BTREE_INSERT_JOURNAL_RECLAIM,
@ -43,7 +42,6 @@ enum btree_insert_flags {
/* for copygc, or when merging btree nodes */
#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE)
#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
/* Insert is for journal replay - don't get journal reservations: */
#define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY)

View File

@ -195,21 +195,18 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
{
struct write_point *wp;
struct btree *b;
BKEY_PADDED(k) tmp;
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
struct open_buckets ob = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
unsigned nr_reserve;
enum alloc_reserve alloc_reserve;
if (flags & BTREE_INSERT_USE_ALLOC_RESERVE) {
if (flags & BTREE_INSERT_USE_RESERVE) {
nr_reserve = 0;
alloc_reserve = RESERVE_ALLOC;
} else if (flags & BTREE_INSERT_USE_RESERVE) {
nr_reserve = BTREE_NODE_RESERVE / 2;
alloc_reserve = RESERVE_BTREE;
alloc_reserve = RESERVE_BTREE_MOVINGGC;
} else {
nr_reserve = BTREE_NODE_RESERVE;
alloc_reserve = RESERVE_NONE;
alloc_reserve = RESERVE_BTREE;
}
mutex_lock(&c->btree_reserve_cache_lock);
@ -577,8 +574,6 @@ static void btree_update_nodes_written(struct btree_update *as)
bch2_trans_init(&trans, c, 0, 512);
ret = __bch2_trans_do(&trans, &as->disk_res, &journal_seq,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_JOURNAL_RECLAIM|
BTREE_INSERT_JOURNAL_RESERVED,
@ -1232,6 +1227,9 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
src = n;
}
/* Also clear out the unwritten whiteouts area: */
b->whiteout_u64s = 0;
i->u64s = cpu_to_le16((u64 *) dst - i->_data);
set_btree_bset_end(b, b->set);
@ -1457,15 +1455,6 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
struct btree_update *as;
struct closure cl;
int ret = 0;
struct btree_insert_entry *i;
/*
* We already have a disk reservation and open buckets pinned; this
* allocation must not block:
*/
trans_for_each_update(trans, i)
if (btree_node_type_needs_gc(i->iter->btree_id))
flags |= BTREE_INSERT_USE_RESERVE;
closure_init_stack(&cl);
@ -1926,10 +1915,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
retry:
as = bch2_btree_update_start(iter->trans, iter->btree_id,
parent ? btree_update_reserve_required(c, parent) : 0,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE,
&cl);
BTREE_INSERT_NOFAIL, &cl);
if (IS_ERR(as)) {
ret = PTR_ERR(as);

View File

@ -869,8 +869,8 @@ int __bch2_trans_commit(struct btree_trans *trans)
trans_trigger_run = false;
trans_for_each_update(trans, i) {
if (unlikely(i->iter->uptodate > BTREE_ITER_NEED_PEEK &&
(ret = bch2_btree_iter_traverse(i->iter)))) {
ret = bch2_btree_iter_traverse(i->iter);
if (unlikely(ret)) {
trace_trans_restart_traverse(trans->ip);
goto out;
}
@ -879,8 +879,8 @@ int __bch2_trans_commit(struct btree_trans *trans)
* We're not using bch2_btree_iter_upgrade here because
* we know trans->nounlock can't be set:
*/
if (unlikely(i->iter->locks_want < 1 &&
!__bch2_btree_iter_upgrade(i->iter, 1))) {
if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) &&
!__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) {
trace_trans_restart_upgrade(trans->ip);
ret = -EINTR;
goto out;
@ -1084,8 +1084,7 @@ int bch2_btree_delete_at(struct btree_trans *trans,
bch2_trans_update(trans, iter, &k, 0);
return bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|flags);
BTREE_INSERT_NOFAIL|flags);
}
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,

View File

@ -2192,7 +2192,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
ca->mi.bucket_size / c->opts.btree_node_size);
/* XXX: these should be tunable */
size_t reserve_none = max_t(size_t, 1, nbuckets >> 9);
size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 7);
size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 6);
size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12),
btree_reserve * 2);
bool resize = ca->buckets[0] != NULL;
@ -2209,7 +2209,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
!(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long),
GFP_KERNEL|__GFP_ZERO)) ||
!init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
!init_fifo(&free[RESERVE_MOVINGGC],
copygc_reserve, GFP_KERNEL) ||
!init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||

View File

@ -336,8 +336,19 @@ static int attempt_compress(struct bch_fs *c,
ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
/*
* ZSTD requires that when we decompress we pass in the exact
* compressed size - rounding it up to the nearest sector
* doesn't work, so we use the first 4 bytes of the buffer for
* that.
*
* Additionally, the ZSTD code seems to have a bug where it will
* write just past the end of the buffer - so subtract a fudge
* factor (7 bytes) from the dst buffer size to account for
* that.
*/
size_t len = ZSTD_compressCCtx(ctx,
dst + 4, dst_len - 4,
dst + 4, dst_len - 4 - 7,
src, src_len,
c->zstd_params);
if (ZSTD_isError(len))

View File

@ -4,7 +4,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
#include "bkey_on_stack.h"
#include "bkey_buf.h"
#include "bset.h"
#include "btree_gc.h"
#include "btree_update.h"
@ -200,6 +200,36 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
return false;
}
/* Stripe bufs: */
static void ec_stripe_buf_free(struct ec_stripe_buf *stripe)
{
unsigned i;
for (i = 0; i < stripe->key.v.nr_blocks; i++) {
kvpfree(stripe->data[i], stripe->size << 9);
stripe->data[i] = NULL;
}
}
static int ec_stripe_buf_alloc(struct ec_stripe_buf *stripe)
{
unsigned i;
memset(stripe->valid, 0xFF, sizeof(stripe->valid));
for (i = 0; i < stripe->key.v.nr_blocks; i++) {
stripe->data[i] = kvpmalloc(stripe->size << 9, GFP_KERNEL);
if (!stripe->data[i])
goto err;
}
return 0;
err:
ec_stripe_buf_free(stripe);
return -ENOMEM;
}
/* Checksumming: */
static void ec_generate_checksums(struct ec_stripe_buf *buf)
@ -287,14 +317,10 @@ static void ec_generate_ec(struct ec_stripe_buf *buf)
raid_gen(nr_data, v->nr_redundant, bytes, buf->data);
}
static unsigned __ec_nr_failed(struct ec_stripe_buf *buf, unsigned nr)
{
return nr - bitmap_weight(buf->valid, nr);
}
static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
{
return __ec_nr_failed(buf, buf->key.v.nr_blocks);
return buf->key.v.nr_blocks -
bitmap_weight(buf->valid, buf->key.v.nr_blocks);
}
static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
@ -757,10 +783,10 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_s_extent e;
struct bkey_on_stack sk;
struct bkey_buf sk;
int ret = 0, dev, idx;
bkey_on_stack_init(&sk);
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
/* XXX this doesn't support the reflink btree */
@ -787,7 +813,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
dev = s->key.v.ptrs[idx].dev;
bkey_on_stack_reassemble(&sk, c, k);
bch2_bkey_buf_reassemble(&sk, c, k);
e = bkey_i_to_s_extent(sk.k);
bch2_bkey_drop_ptrs(e.s, ptr, ptr->dev != dev);
@ -800,8 +826,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
bch2_trans_update(&trans, iter, sk.k, 0);
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE);
BTREE_INSERT_NOFAIL);
if (ret == -EINTR)
ret = 0;
if (ret)
@ -809,7 +834,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
}
bch2_trans_exit(&trans);
bkey_on_stack_exit(&sk, c);
bch2_bkey_buf_exit(&sk, c);
return ret;
}
@ -823,14 +848,13 @@ static void ec_stripe_create(struct ec_stripe_new *s)
struct open_bucket *ob;
struct bkey_i *k;
struct stripe *m;
struct bch_stripe *v = &s->stripe.key.v;
struct bch_stripe *v = &s->new_stripe.key.v;
unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
struct closure cl;
int ret;
BUG_ON(s->h->s == s);
closure_init_stack(&cl);
closure_sync(&s->iodone);
if (s->err) {
if (s->err != -EROFS)
@ -838,6 +862,22 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err;
}
if (s->have_existing_stripe) {
ec_validate_checksums(c, &s->existing_stripe);
if (ec_do_recov(c, &s->existing_stripe)) {
bch_err(c, "error creating stripe: error reading existing stripe");
goto err;
}
for (i = 0; i < nr_data; i++)
if (stripe_blockcount_get(&s->existing_stripe.key.v, i))
swap(s->new_stripe.data[i],
s->existing_stripe.data[i]);
ec_stripe_buf_free(&s->existing_stripe);
}
BUG_ON(!s->allocated);
if (!percpu_ref_tryget(&c->writes))
@ -846,33 +886,31 @@ static void ec_stripe_create(struct ec_stripe_new *s)
BUG_ON(bitmap_weight(s->blocks_allocated,
s->blocks.nr) != s->blocks.nr);
ec_generate_ec(&s->stripe);
ec_generate_ec(&s->new_stripe);
ec_generate_checksums(&s->stripe);
ec_generate_checksums(&s->new_stripe);
/* write p/q: */
for (i = nr_data; i < v->nr_blocks; i++)
ec_block_io(c, &s->stripe, REQ_OP_WRITE, i, &cl);
ec_block_io(c, &s->new_stripe, REQ_OP_WRITE, i, &s->iodone);
closure_sync(&s->iodone);
closure_sync(&cl);
if (ec_nr_failed(&s->new_stripe)) {
bch_err(c, "error creating stripe: error writing redundancy buckets");
goto err_put_writes;
}
for (i = nr_data; i < v->nr_blocks; i++)
if (!test_bit(i, s->stripe.valid)) {
bch_err(c, "error creating stripe: error writing redundancy buckets");
goto err_put_writes;
}
ret = s->existing_stripe
? bch2_btree_insert(c, BTREE_ID_EC, &s->stripe.key.k_i,
ret = s->have_existing_stripe
? bch2_btree_insert(c, BTREE_ID_EC, &s->new_stripe.key.k_i,
&s->res, NULL, BTREE_INSERT_NOFAIL)
: ec_stripe_bkey_insert(c, s, &s->stripe.key);
: ec_stripe_bkey_insert(c, s, &s->new_stripe.key);
if (ret) {
bch_err(c, "error creating stripe: error creating stripe key");
goto err_put_writes;
}
for_each_keylist_key(&s->keys, k) {
ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k);
ret = ec_stripe_update_ptrs(c, &s->new_stripe, &k->k);
if (ret) {
bch_err(c, "error creating stripe: error %i updating pointers", ret);
break;
@ -880,14 +918,14 @@ static void ec_stripe_create(struct ec_stripe_new *s)
}
spin_lock(&c->ec_stripes_heap_lock);
m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset);
m = genradix_ptr(&c->stripes[0], s->new_stripe.key.k.p.offset);
#if 0
pr_info("created a %s stripe %llu",
s->existing_stripe ? "existing" : "new",
s->have_existing_stripe ? "existing" : "new",
s->stripe.key.k.p.offset);
#endif
BUG_ON(m->on_heap);
bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset);
bch2_stripes_heap_insert(c, m, s->new_stripe.key.k.p.offset);
spin_unlock(&c->ec_stripes_heap_lock);
err_put_writes:
percpu_ref_put(&c->writes);
@ -903,8 +941,9 @@ err:
bch2_keylist_free(&s->keys, s->inline_keys);
for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
kvpfree(s->stripe.data[i], s->stripe.size << 9);
ec_stripe_buf_free(&s->existing_stripe);
ec_stripe_buf_free(&s->new_stripe);
closure_debug_destroy(&s->iodone);
kfree(s);
}
@ -981,7 +1020,7 @@ void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
ca = bch_dev_bkey_exists(c, ob->ptr.dev);
offset = ca->mi.bucket_size - ob->sectors_free;
return ob->ec->stripe.data[ob->ec_idx] + (offset << 9);
return ob->ec->new_stripe.data[ob->ec_idx] + (offset << 9);
}
void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp,
@ -1088,7 +1127,6 @@ static void ec_stripe_key_init(struct bch_fs *c,
static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
{
struct ec_stripe_new *s;
unsigned i;
lockdep_assert_held(&h->lock);
@ -1097,6 +1135,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
return -ENOMEM;
mutex_init(&s->lock);
closure_init(&s->iodone, NULL);
atomic_set(&s->pin, 1);
s->c = c;
s->h = h;
@ -1106,32 +1145,20 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
bch2_keylist_init(&s->keys, s->inline_keys);
s->stripe.offset = 0;
s->stripe.size = h->blocksize;
memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid));
s->new_stripe.offset = 0;
s->new_stripe.size = h->blocksize;
ec_stripe_key_init(c, &s->stripe.key, s->nr_data,
ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data,
s->nr_parity, h->blocksize);
for (i = 0; i < s->stripe.key.v.nr_blocks; i++) {
s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL);
if (!s->stripe.data[i])
goto err;
}
h->s = s;
return 0;
err:
for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
kvpfree(s->stripe.data[i], s->stripe.size << 9);
kfree(s);
return -ENOMEM;
}
static struct ec_stripe_head *
ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
unsigned algo, unsigned redundancy)
unsigned algo, unsigned redundancy,
bool copygc)
{
struct ec_stripe_head *h;
struct bch_dev *ca;
@ -1147,6 +1174,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
h->target = target;
h->algo = algo;
h->redundancy = redundancy;
h->copygc = copygc;
rcu_read_lock();
h->devs = target_rw_devs(c, BCH_DATA_user, target);
@ -1178,9 +1206,10 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
}
struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c,
unsigned target,
unsigned algo,
unsigned redundancy)
unsigned target,
unsigned algo,
unsigned redundancy,
bool copygc)
{
struct ec_stripe_head *h;
@ -1191,21 +1220,21 @@ struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c,
list_for_each_entry(h, &c->ec_stripe_head_list, list)
if (h->target == target &&
h->algo == algo &&
h->redundancy == redundancy) {
h->redundancy == redundancy &&
h->copygc == copygc) {
mutex_lock(&h->lock);
goto found;
}
h = ec_new_stripe_head_alloc(c, target, algo, redundancy);
h = ec_new_stripe_head_alloc(c, target, algo, redundancy, copygc);
found:
mutex_unlock(&c->ec_stripe_head_lock);
return h;
}
/*
* XXX: use a higher watermark for allocating open buckets here:
*/
static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
static enum bucket_alloc_ret
new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h,
struct closure *cl)
{
struct bch_devs_mask devs;
struct open_bucket *ob;
@ -1213,12 +1242,12 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
min_t(unsigned, h->nr_active_devs,
BCH_BKEY_PTRS_MAX) - h->redundancy;
bool have_cache = true;
int ret = 0;
enum bucket_alloc_ret ret = ALLOC_SUCCESS;
devs = h->devs;
for_each_set_bit(i, h->s->blocks_allocated, BCH_BKEY_PTRS_MAX) {
__clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d);
__clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d);
--nr_data;
}
@ -1242,9 +1271,11 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
h->redundancy,
&nr_have,
&have_cache,
RESERVE_NONE,
h->copygc
? RESERVE_MOVINGGC
: RESERVE_NONE,
0,
NULL);
cl);
if (ret)
goto err;
}
@ -1258,9 +1289,11 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
nr_data,
&nr_have,
&have_cache,
RESERVE_NONE,
h->copygc
? RESERVE_MOVINGGC
: RESERVE_NONE,
0,
NULL);
cl);
if (ret)
goto err;
}
@ -1326,64 +1359,84 @@ static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *strip
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
unsigned target,
unsigned algo,
unsigned redundancy)
unsigned redundancy,
bool copygc,
struct closure *cl)
{
struct closure cl;
struct ec_stripe_head *h;
struct open_bucket *ob;
unsigned i, data_idx = 0;
s64 idx;
int ret;
closure_init_stack(&cl);
h = __bch2_ec_stripe_head_get(c, target, algo, redundancy);
if (!h)
h = __bch2_ec_stripe_head_get(c, target, algo, redundancy, copygc);
if (!h) {
bch_err(c, "no stripe head");
return NULL;
}
if (!h->s) {
if (ec_new_stripe_alloc(c, h)) {
bch2_ec_stripe_head_put(c, h);
bch_err(c, "failed to allocate new stripe");
return NULL;
}
idx = get_existing_stripe(c, target, algo, redundancy);
if (idx >= 0) {
h->s->existing_stripe = true;
h->s->existing_stripe_idx = idx;
if (get_stripe_key(c, idx, &h->s->stripe)) {
/* btree error */
h->s->have_existing_stripe = true;
ret = get_stripe_key(c, idx, &h->s->existing_stripe);
if (ret) {
bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
bch2_ec_stripe_head_put(c, h);
return NULL;
}
if (ec_stripe_buf_alloc(&h->s->existing_stripe)) {
/*
* this is a problem: we have deleted from the
* stripes heap already
*/
BUG();
}
for (i = 0; i < h->s->stripe.key.v.nr_blocks; i++)
if (stripe_blockcount_get(&h->s->stripe.key.v, i)) {
for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i))
__set_bit(i, h->s->blocks_allocated);
ec_block_io(c, &h->s->stripe, READ, i, &cl);
}
ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
}
bkey_copy(&h->s->new_stripe.key.k_i,
&h->s->existing_stripe.key.k_i);
}
if (ec_stripe_buf_alloc(&h->s->new_stripe)) {
BUG();
}
}
if (!h->s->allocated) {
if (!h->s->existing_stripe &&
if (!h->s->have_existing_stripe &&
!h->s->res.sectors) {
ret = bch2_disk_reservation_get(c, &h->s->res,
h->blocksize,
h->s->nr_parity, 0);
h->blocksize,
h->s->nr_parity, 0);
if (ret) {
/* What should we do here? */
bch_err(c, "unable to create new stripe: %i", ret);
/*
* This means we need to wait for copygc to
* empty out buckets from existing stripes:
*/
bch2_ec_stripe_head_put(c, h);
h = NULL;
goto out;
}
}
if (new_stripe_alloc_buckets(c, h)) {
ret = new_stripe_alloc_buckets(c, h, cl);
if (ret) {
bch2_ec_stripe_head_put(c, h);
h = NULL;
h = ERR_PTR(-ret);
goto out;
}
@ -1392,19 +1445,18 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
h->s->nr_data, data_idx);
BUG_ON(data_idx >= h->s->nr_data);
h->s->stripe.key.v.ptrs[data_idx] = ob->ptr;
h->s->new_stripe.key.v.ptrs[data_idx] = ob->ptr;
h->s->data_block_idx[i] = data_idx;
data_idx++;
}
open_bucket_for_each(c, &h->s->parity, ob, i)
h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
h->s->new_stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
//pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]);
h->s->allocated = true;
}
out:
closure_sync(&cl);
return h;
}

View File

@ -88,6 +88,7 @@ struct ec_stripe_new {
struct ec_stripe_head *h;
struct mutex lock;
struct list_head list;
struct closure iodone;
/* counts in flight writes, stripe is created when pin == 0 */
atomic_t pin;
@ -98,8 +99,7 @@ struct ec_stripe_new {
u8 nr_parity;
bool allocated;
bool pending;
bool existing_stripe;
u64 existing_stripe_idx;
bool have_existing_stripe;
unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
@ -111,7 +111,8 @@ struct ec_stripe_new {
struct keylist keys;
u64 inline_keys[BKEY_U64s * 8];
struct ec_stripe_buf stripe;
struct ec_stripe_buf new_stripe;
struct ec_stripe_buf existing_stripe;
};
struct ec_stripe_head {
@ -121,6 +122,7 @@ struct ec_stripe_head {
unsigned target;
unsigned algo;
unsigned redundancy;
bool copygc;
struct bch_devs_mask devs;
unsigned nr_active_devs;
@ -145,8 +147,8 @@ void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *);
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned,
unsigned, unsigned);
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *,
unsigned, unsigned, unsigned, bool, struct closure *);
void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);

View File

@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "bkey_on_stack.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "buckets.h"

View File

@ -665,7 +665,7 @@ bool bch2_bkey_is_incompressible(struct bkey_s_c k)
}
bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
unsigned nr_replicas)
unsigned nr_replicas, bool compressed)
{
struct btree_trans trans;
struct btree_iter *iter;
@ -683,7 +683,8 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
break;
if (nr_replicas > bch2_bkey_nr_ptrs_fully_allocated(k)) {
if (nr_replicas > bch2_bkey_replicas(c, k) ||
(!compressed && bch2_bkey_sectors_compressed(k))) {
ret = false;
break;
}
@ -693,6 +694,33 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
return ret;
}
unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
unsigned replicas = 0;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
if (p.ptr.cached)
continue;
if (p.has_ec) {
struct stripe *s =
genradix_ptr(&c->stripes[0], p.ec.idx);
WARN_ON(!s);
if (s)
replicas += s->nr_redundant;
}
replicas++;
}
return replicas;
}
static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
struct extent_ptr_decoded p)
{

View File

@ -538,7 +538,9 @@ unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
bool bch2_bkey_is_incompressible(struct bkey_s_c);
unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned);
bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned, bool);
unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c);
unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,

View File

@ -3,7 +3,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
#include "bkey_on_stack.h"
#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
#include "clock.h"
@ -791,7 +791,7 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
struct readpages_iter *readpages_iter)
{
struct bch_fs *c = trans->c;
struct bkey_on_stack sk;
struct bkey_buf sk;
int flags = BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE;
int ret = 0;
@ -799,7 +799,7 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
rbio->c = c;
rbio->start_time = local_clock();
bkey_on_stack_init(&sk);
bch2_bkey_buf_init(&sk);
retry:
while (1) {
struct bkey_s_c k;
@ -817,7 +817,7 @@ retry:
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
bkey_on_stack_reassemble(&sk, c, k);
bch2_bkey_buf_reassemble(&sk, c, k);
ret = bch2_read_indirect_extent(trans,
&offset_into_extent, &sk);
@ -862,7 +862,7 @@ retry:
bio_endio(&rbio->bio);
}
bkey_on_stack_exit(&sk, c);
bch2_bkey_buf_exit(&sk, c);
}
void bch2_readahead(struct readahead_control *ractl)
@ -1863,7 +1863,9 @@ static long bch2_dio_write_loop(struct dio_write *dio)
dio->op.opts.data_replicas, 0);
if (unlikely(ret) &&
!bch2_check_range_allocated(c, dio->op.pos,
bio_sectors(bio), dio->op.opts.data_replicas))
bio_sectors(bio),
dio->op.opts.data_replicas,
dio->op.opts.compression != 0))
goto err;
task_io_account_write(bio->bi_iter.bi_size);
@ -2414,7 +2416,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
struct bkey_on_stack copy;
struct bkey_buf copy;
struct btree_trans trans;
struct btree_iter *src, *dst;
loff_t shift, new_size;
@ -2424,7 +2426,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
if ((offset | len) & (block_bytes(c) - 1))
return -EINVAL;
bkey_on_stack_init(&copy);
bch2_bkey_buf_init(&copy);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
/*
@ -2512,7 +2514,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
break;
reassemble:
bkey_on_stack_reassemble(&copy, c, k);
bch2_bkey_buf_reassemble(&copy, c, k);
if (insert &&
bkey_cmp(bkey_start_pos(k.k), move_pos) < 0)
@ -2589,7 +2591,7 @@ bkey_err:
}
err:
bch2_trans_exit(&trans);
bkey_on_stack_exit(&copy, c);
bch2_bkey_buf_exit(&copy, c);
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
return ret;

View File

@ -3,7 +3,7 @@
#include "bcachefs.h"
#include "acl.h"
#include "bkey_on_stack.h"
#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
#include "chardev.h"
@ -886,7 +886,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_on_stack cur, prev;
struct bkey_buf cur, prev;
struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
unsigned offset_into_extent, sectors;
bool have_extent = false;
@ -899,8 +899,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
if (start + len < start)
return -EINVAL;
bkey_on_stack_init(&cur);
bkey_on_stack_init(&prev);
bch2_bkey_buf_init(&cur);
bch2_bkey_buf_init(&prev);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@ -919,7 +919,7 @@ retry:
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
bkey_on_stack_reassemble(&cur, c, k);
bch2_bkey_buf_reassemble(&cur, c, k);
ret = bch2_read_indirect_extent(&trans,
&offset_into_extent, &cur);
@ -927,7 +927,7 @@ retry:
break;
k = bkey_i_to_s_c(cur.k);
bkey_on_stack_realloc(&prev, c, k.k->u64s);
bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
sectors = min(sectors, k.k->size - offset_into_extent);
@ -961,8 +961,8 @@ retry:
FIEMAP_EXTENT_LAST);
ret = bch2_trans_exit(&trans) ?: ret;
bkey_on_stack_exit(&cur, c);
bkey_on_stack_exit(&prev, c);
bch2_bkey_buf_exit(&cur, c);
bch2_bkey_buf_exit(&prev, c);
return ret < 0 ? ret : 0;
}
@ -1007,10 +1007,7 @@ static const struct file_operations bch_file_operations = {
.open = generic_file_open,
.fsync = bch2_fsync,
.splice_read = generic_file_splice_read,
/*
* Broken, on v5.3:
.splice_write = iter_file_splice_write,
*/
.fallocate = bch2_fallocate_dispatch,
.unlocked_ioctl = bch2_fs_file_ioctl,
#ifdef CONFIG_COMPAT

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "bkey_on_stack.h"
#include "bkey_buf.h"
#include "btree_update.h"
#include "dirent.h"
#include "error.h"
@ -464,11 +464,11 @@ static int check_extents(struct bch_fs *c)
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_on_stack prev;
struct bkey_buf prev;
u64 i_sectors;
int ret = 0;
bkey_on_stack_init(&prev);
bch2_bkey_buf_init(&prev);
prev.k->k = KEY(0, 0, 0);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
@ -500,7 +500,7 @@ retry:
goto err;
}
}
bkey_on_stack_reassemble(&prev, c, k);
bch2_bkey_buf_reassemble(&prev, c, k);
ret = walk_inode(&trans, &w, k.k->p.inode);
if (ret)
@ -569,7 +569,7 @@ err:
fsck_err:
if (ret == -EINTR)
goto retry;
bkey_on_stack_exit(&prev, c);
bch2_bkey_buf_exit(&prev, c);
return bch2_trans_exit(&trans) ?: ret;
}

View File

@ -9,7 +9,7 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
#include "bkey_on_stack.h"
#include "bkey_buf.h"
#include "bset.h"
#include "btree_update.h"
#include "buckets.h"
@ -183,18 +183,23 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
/* Extent update path: */
static int sum_sector_overwrites(struct btree_trans *trans,
struct btree_iter *extent_iter,
struct bkey_i *new,
bool *maybe_extending,
s64 *i_sectors_delta,
s64 *disk_sectors_delta)
int bch2_sum_sector_overwrites(struct btree_trans *trans,
struct btree_iter *extent_iter,
struct bkey_i *new,
bool *maybe_extending,
bool *should_check_enospc,
s64 *i_sectors_delta,
s64 *disk_sectors_delta)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter;
struct bkey_s_c old;
unsigned new_replicas = bch2_bkey_replicas(c, bkey_i_to_s_c(new));
bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new));
int ret = 0;
*maybe_extending = true;
*should_check_enospc = false;
*i_sectors_delta = 0;
*disk_sectors_delta = 0;
@ -213,6 +218,11 @@ static int sum_sector_overwrites(struct btree_trans *trans,
(int) (bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new)) -
bch2_bkey_nr_ptrs_fully_allocated(old));
if (!*should_check_enospc &&
(new_replicas > bch2_bkey_replicas(c, old) ||
(!new_compressed && bch2_bkey_sectors_compressed(old))))
*should_check_enospc = true;
if (bkey_cmp(old.k->p, new->k.p) >= 0) {
/*
* Check if there's already data above where we're
@ -250,7 +260,7 @@ int bch2_extent_update(struct btree_trans *trans,
{
/* this must live until after bch2_trans_commit(): */
struct bkey_inode_buf inode_p;
bool extending = false;
bool extending = false, should_check_enospc;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
int ret;
@ -258,8 +268,9 @@ int bch2_extent_update(struct btree_trans *trans,
if (ret)
return ret;
ret = sum_sector_overwrites(trans, iter, k,
ret = bch2_sum_sector_overwrites(trans, iter, k,
&extending,
&should_check_enospc,
&i_sectors_delta,
&disk_sectors_delta);
if (ret)
@ -269,7 +280,8 @@ int bch2_extent_update(struct btree_trans *trans,
disk_sectors_delta > (s64) disk_res->sectors) {
ret = bch2_disk_reservation_add(trans->c, disk_res,
disk_sectors_delta - disk_res->sectors,
0);
!should_check_enospc
? BCH_DISK_RESERVATION_NOFAIL : 0);
if (ret)
return ret;
}
@ -320,8 +332,7 @@ int bch2_extent_update(struct btree_trans *trans,
ret = bch2_trans_commit(trans, disk_res, journal_seq,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE);
BTREE_INSERT_NOFAIL);
if (ret)
return ret;
@ -404,14 +415,14 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end,
int bch2_write_index_default(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
struct bkey_on_stack sk;
struct bkey_buf sk;
struct keylist *keys = &op->insert_keys;
struct bkey_i *k = bch2_keylist_front(keys);
struct btree_trans trans;
struct btree_iter *iter;
int ret;
bkey_on_stack_init(&sk);
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@ -423,7 +434,7 @@ int bch2_write_index_default(struct bch_write_op *op)
k = bch2_keylist_front(keys);
bkey_on_stack_realloc(&sk, c, k->k.u64s);
bch2_bkey_buf_realloc(&sk, c, k->k.u64s);
bkey_copy(sk.k, k);
bch2_cut_front(iter->pos, sk.k);
@ -440,7 +451,7 @@ int bch2_write_index_default(struct bch_write_op *op)
} while (!bch2_keylist_empty(keys));
bch2_trans_exit(&trans);
bkey_on_stack_exit(&sk, c);
bch2_bkey_buf_exit(&sk, c);
return ret;
}
@ -1617,14 +1628,14 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_on_stack sk;
struct bkey_buf sk;
struct bkey_s_c k;
int ret;
flags &= ~BCH_READ_LAST_FRAGMENT;
flags |= BCH_READ_MUST_CLONE;
bkey_on_stack_init(&sk);
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@ -1636,7 +1647,7 @@ retry:
if (bkey_err(k))
goto err;
bkey_on_stack_reassemble(&sk, c, k);
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);
@ -1657,7 +1668,7 @@ retry:
out:
bch2_rbio_done(rbio);
bch2_trans_exit(&trans);
bkey_on_stack_exit(&sk, c);
bch2_bkey_buf_exit(&sk, c);
return;
err:
rbio->bio.bi_status = BLK_STS_IOERR;
@ -1670,14 +1681,14 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_on_stack sk;
struct bkey_buf sk;
struct bkey_s_c k;
int ret;
flags &= ~BCH_READ_LAST_FRAGMENT;
flags |= BCH_READ_MUST_CLONE;
bkey_on_stack_init(&sk);
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@ -1687,7 +1698,7 @@ retry:
BTREE_ITER_SLOTS, k, ret) {
unsigned bytes, sectors, offset_into_extent;
bkey_on_stack_reassemble(&sk, c, k);
bch2_bkey_buf_reassemble(&sk, c, k);
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
@ -1736,7 +1747,7 @@ err:
rbio->bio.bi_status = BLK_STS_IOERR;
out:
bch2_trans_exit(&trans);
bkey_on_stack_exit(&sk, c);
bch2_bkey_buf_exit(&sk, c);
bch2_rbio_done(rbio);
}
@ -1807,17 +1818,6 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
if ((ret = bkey_err(k)))
goto out;
/*
* going to be temporarily appending another checksum entry:
*/
new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
BKEY_EXTENT_U64s_MAX * 8);
if ((ret = PTR_ERR_OR_ZERO(new)))
goto out;
bkey_reassemble(new, k);
k = bkey_i_to_s_c(new);
if (bversion_cmp(k.k->version, rbio->version) ||
!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
goto out;
@ -1836,6 +1836,16 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
goto out;
}
/*
* going to be temporarily appending another checksum entry:
*/
new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
sizeof(struct bch_extent_crc128));
if ((ret = PTR_ERR_OR_ZERO(new)))
goto out;
bkey_reassemble(new, k);
if (!bch2_bkey_narrow_crcs(new, new_crc))
goto out;
@ -2002,7 +2012,7 @@ static void bch2_read_endio(struct bio *bio)
int __bch2_read_indirect_extent(struct btree_trans *trans,
unsigned *offset_into_extent,
struct bkey_on_stack *orig_k)
struct bkey_buf *orig_k)
{
struct btree_iter *iter;
struct bkey_s_c k;
@ -2029,7 +2039,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
}
*offset_into_extent = iter->pos.offset - bkey_start_offset(k.k);
bkey_on_stack_reassemble(orig_k, trans->c, k);
bch2_bkey_buf_reassemble(orig_k, trans->c, k);
err:
bch2_trans_iter_put(trans, iter);
return ret;
@ -2208,7 +2218,11 @@ get_bio:
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
if (pick.ptr.cached)
/*
* If it's being moved internally, we don't want to flag it as a cache
* hit:
*/
if (pick.ptr.cached && !(flags & BCH_READ_NODECODE))
bch2_bucket_io_time_reset(trans, pick.ptr.dev,
PTR_BUCKET_NR(ca, &pick.ptr), READ);
@ -2290,7 +2304,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_on_stack sk;
struct bkey_buf sk;
struct bkey_s_c k;
unsigned flags = BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE|
@ -2304,7 +2318,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
rbio->c = c;
rbio->start_time = local_clock();
bkey_on_stack_init(&sk);
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@ -2327,7 +2341,7 @@ retry:
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
bkey_on_stack_reassemble(&sk, c, k);
bch2_bkey_buf_reassemble(&sk, c, k);
ret = bch2_read_indirect_extent(&trans,
&offset_into_extent, &sk);
@ -2364,7 +2378,7 @@ retry:
}
out:
bch2_trans_exit(&trans);
bkey_on_stack_exit(&sk, c);
bch2_bkey_buf_exit(&sk, c);
return;
err:
if (ret == -EINTR)

View File

@ -3,7 +3,7 @@
#define _BCACHEFS_IO_H
#include "checksum.h"
#include "bkey_on_stack.h"
#include "bkey_buf.h"
#include "io_types.h"
#define to_wbio(_bio) \
@ -60,6 +60,8 @@ static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
: op->c->wq;
}
int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
struct bkey_i *, bool *, bool *, s64 *, s64 *);
int bch2_extent_update(struct btree_trans *, struct btree_iter *,
struct bkey_i *, struct disk_reservation *,
u64 *, u64, s64 *);
@ -112,11 +114,11 @@ struct cache_promote_op;
struct extent_ptr_decoded;
int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
struct bkey_on_stack *);
struct bkey_buf *);
static inline int bch2_read_indirect_extent(struct btree_trans *trans,
unsigned *offset_into_extent,
struct bkey_on_stack *k)
struct bkey_buf *k)
{
return k->k->k.type == KEY_TYPE_reflink_p
? __bch2_read_indirect_extent(trans, offset_into_extent, k)

View File

@ -777,7 +777,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
}
} else {
rcu_read_lock();
ob = bch2_bucket_alloc(c, ca, RESERVE_ALLOC,
ob = bch2_bucket_alloc(c, ca, RESERVE_NONE,
false, cl);
rcu_read_unlock();
if (IS_ERR(ob)) {
@ -1095,7 +1095,7 @@ int bch2_fs_journal_init(struct journal *j)
/* Btree roots: */
j->entry_u64s_reserved +=
BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX);
BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_BTREE_PTR_U64s_MAX);
atomic64_set(&j->reservations.counter,
((union journal_res_state)

View File

@ -577,8 +577,15 @@ reread:
if (bch2_dev_io_err_on(ret, ca,
"journal read error: sector %llu",
offset) ||
bch2_meta_read_fault("journal"))
return -EIO;
bch2_meta_read_fault("journal")) {
/*
* We don't error out of the recovery process
* here, since the relevant journal entry may be
* found on a different device, and missing or
* no journal entries will be handled later
*/
return 0;
}
j = buf->data;
}
@ -990,6 +997,8 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
done:
rcu_read_unlock();
BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS;
}
@ -1050,9 +1059,13 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
return;
memcpy(new_buf, buf->data, buf->buf_size);
kvpfree(buf->data, buf->buf_size);
buf->data = new_buf;
buf->buf_size = new_size;
spin_lock(&j->lock);
swap(buf->data, new_buf);
swap(buf->buf_size, new_size);
spin_unlock(&j->lock);
kvpfree(new_buf, new_size);
}
static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)

View File

@ -20,7 +20,7 @@
struct journal_buf {
struct jset *data;
BKEY_PADDED(key);
__BKEY_PADDED(key, BCH_REPLICAS_MAX);
struct closure_waitlist wait;

View File

@ -4,7 +4,7 @@
*/
#include "bcachefs.h"
#include "bkey_on_stack.h"
#include "bkey_buf.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "buckets.h"
@ -41,10 +41,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_on_stack sk;
struct bkey_buf sk;
int ret = 0;
bkey_on_stack_init(&sk);
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
@ -57,7 +57,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
continue;
}
bkey_on_stack_reassemble(&sk, c, k);
bch2_bkey_buf_reassemble(&sk, c, k);
ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k),
dev_idx, flags, false);
@ -90,7 +90,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
}
ret = bch2_trans_exit(&trans) ?: ret;
bkey_on_stack_exit(&sk, c);
bch2_bkey_buf_exit(&sk, c);
BUG_ON(ret == -EINTR);
@ -109,6 +109,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
struct btree_iter *iter;
struct closure cl;
struct btree *b;
struct bkey_buf k;
unsigned id;
int ret;
@ -116,28 +117,28 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
if (flags & BCH_FORCE_IF_METADATA_LOST)
return -EINVAL;
bch2_bkey_buf_init(&k);
bch2_trans_init(&trans, c, 0, 0);
closure_init_stack(&cl);
for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&trans, iter, id, POS_MIN,
BTREE_ITER_PREFETCH, b) {
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
retry:
if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key),
dev_idx))
continue;
bkey_copy(&tmp.k, &b->key);
bch2_bkey_buf_copy(&k, c, &b->key);
ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.k),
ret = drop_dev_ptrs(c, bkey_i_to_s(k.k),
dev_idx, flags, true);
if (ret) {
bch_err(c, "Cannot drop device without losing data");
goto err;
}
ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
ret = bch2_btree_node_update_key(c, iter, b, k.k);
if (ret == -EINTR) {
b = bch2_btree_iter_peek_node(iter);
goto retry;
@ -157,6 +158,7 @@ retry:
ret = 0;
err:
ret = bch2_trans_exit(&trans) ?: ret;
bch2_bkey_buf_exit(&k, c);
BUG_ON(ret == -EINTR);

View File

@ -2,7 +2,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
#include "bkey_on_stack.h"
#include "bkey_buf.h"
#include "btree_gc.h"
#include "btree_update.h"
#include "btree_update_interior.h"
@ -61,8 +61,13 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
struct migrate_write *m =
container_of(op, struct migrate_write, op);
struct keylist *keys = &op->insert_keys;
struct bkey_buf _new, _insert;
int ret = 0;
bch2_bkey_buf_init(&_new);
bch2_bkey_buf_init(&_insert);
bch2_bkey_buf_realloc(&_insert, c, U8_MAX);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, m->btree_id,
@ -73,21 +78,18 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
struct bkey_s_c k;
struct bkey_i *insert;
struct bkey_i_extent *new;
BKEY_PADDED(k) _new, _insert;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
bool did_work = false;
int nr;
bool extending = false, should_check_enospc;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
bch2_trans_reset(&trans, 0);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret) {
if (ret == -EINTR)
continue;
break;
}
if (ret)
goto err;
new = bkey_i_to_extent(bch2_keylist_front(keys));
@ -95,11 +97,11 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
!bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
goto nomatch;
bkey_reassemble(&_insert.k, k);
insert = &_insert.k;
bkey_reassemble(_insert.k, k);
insert = _insert.k;
bkey_copy(&_new.k, bch2_keylist_front(keys));
new = bkey_i_to_extent(&_new.k);
bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys));
new = bkey_i_to_extent(_new.k);
bch2_cut_front(iter->pos, &new->k_i);
bch2_cut_front(iter->pos, insert);
@ -144,23 +146,21 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
op->opts.background_target,
op->opts.data_replicas);
/*
* If we're not fully overwriting @k, and it's compressed, we
* need a reservation for all the pointers in @insert
*/
nr = bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(insert)) -
m->nr_ptrs_reserved;
ret = bch2_sum_sector_overwrites(&trans, iter, insert,
&extending,
&should_check_enospc,
&i_sectors_delta,
&disk_sectors_delta);
if (ret)
goto err;
if (insert->k.size < k.k->size &&
bch2_bkey_sectors_compressed(k) &&
nr > 0) {
if (disk_sectors_delta > (s64) &op->res.sectors) {
ret = bch2_disk_reservation_add(c, &op->res,
keylist_sectors(keys) * nr, 0);
disk_sectors_delta - op->res.sectors,
!should_check_enospc
? BCH_DISK_RESERVATION_NOFAIL : 0);
if (ret)
goto out;
m->nr_ptrs_reserved += nr;
goto next;
}
bch2_trans_update(&trans, iter, insert, 0);
@ -168,8 +168,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
ret = bch2_trans_commit(&trans, &op->res,
op_journal_seq(op),
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
m->data_opts.btree_insert_flags);
err:
if (!ret)
atomic_long_inc(&c->extent_migrate_done);
if (ret == -EINTR)
@ -197,6 +197,8 @@ nomatch:
}
out:
bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&_insert, c);
bch2_bkey_buf_exit(&_new, c);
BUG_ON(ret == -EINTR);
return ret;
}
@ -516,7 +518,7 @@ static int __bch2_move_data(struct bch_fs *c,
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct bkey_on_stack sk;
struct bkey_buf sk;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
@ -525,7 +527,7 @@ static int __bch2_move_data(struct bch_fs *c,
u64 delay, cur_inum = U64_MAX;
int ret = 0, ret2;
bkey_on_stack_init(&sk);
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
stats->data_type = BCH_DATA_user;
@ -605,13 +607,19 @@ peek:
}
/* unlock before doing IO: */
bkey_on_stack_reassemble(&sk, c, k);
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);
ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
data_cmd, data_opts);
if (ret2) {
if (ret2 == -EINTR) {
bch2_trans_reset(&trans, 0);
bch2_trans_cond_resched(&trans);
continue;
}
if (ret2 == -ENOMEM) {
/* memory allocation failure, wait for some IO to finish */
bch2_move_ctxt_wait_for_io(ctxt);
@ -633,7 +641,7 @@ next_nondata:
}
out:
ret = bch2_trans_exit(&trans) ?: ret;
bkey_on_stack_exit(&sk, c);
bch2_bkey_buf_exit(&sk, c);
return ret;
}

View File

@ -200,6 +200,11 @@ static int bch2_copygc(struct bch_fs *c)
return -1;
}
/*
* Our btree node allocations also come out of RESERVE_MOVINGGC:
*/
sectors_to_move = (sectors_to_move * 3) / 4;
for (i = h->data; i < h->data + h->used; i++)
sectors_to_move += i->sectors * i->replicas;

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "bkey_buf.h"
#include "alloc_background.h"
#include "btree_gc.h"
#include "btree_update.h"
@ -224,28 +225,29 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
if (b->c.level) {
struct btree *child;
BKEY_PADDED(k) tmp;
struct bkey_buf tmp;
bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k);
bch2_bkey_buf_init(&tmp);
bch2_bkey_buf_reassemble(&tmp, c, k);
k = bkey_i_to_s_c(tmp.k);
bch2_btree_and_journal_iter_advance(&iter);
if (b->c.level > 0) {
child = bch2_btree_node_get_noiter(c, &tmp.k,
b->c.btree_id, b->c.level - 1);
ret = PTR_ERR_OR_ZERO(child);
if (ret)
break;
child = bch2_btree_node_get_noiter(c, tmp.k,
b->c.btree_id, b->c.level - 1);
bch2_bkey_buf_exit(&tmp, c);
ret = (node_fn ? node_fn(c, b) : 0) ?:
bch2_btree_and_journal_walk_recurse(c, child,
journal_keys, btree_id, node_fn, key_fn);
six_unlock_read(&child->c.lock);
ret = PTR_ERR_OR_ZERO(child);
if (ret)
break;
if (ret)
break;
}
ret = (node_fn ? node_fn(c, b) : 0) ?:
bch2_btree_and_journal_walk_recurse(c, child,
journal_keys, btree_id, node_fn, key_fn);
six_unlock_read(&child->c.lock);
if (ret)
break;
} else {
bch2_btree_and_journal_iter_advance(&iter);
}
@ -936,7 +938,7 @@ int bch2_fs_recovery(struct bch_fs *c)
struct bch_sb_field_clean *clean = NULL;
struct jset *last_journal_entry = NULL;
u64 blacklist_seq, journal_seq;
bool write_sb = false, need_write_alloc = false;
bool write_sb = false;
int ret;
if (c->sb.clean)
@ -1082,10 +1084,8 @@ use_clean:
bch_info(c, "starting metadata mark and sweep");
err = "error in mark and sweep";
ret = bch2_gc(c, &c->journal_keys, true, true);
if (ret < 0)
goto err;
if (ret)
need_write_alloc = true;
goto err;
bch_verbose(c, "mark and sweep done");
}
@ -1095,10 +1095,8 @@ use_clean:
bch_info(c, "starting mark and sweep");
err = "error in mark and sweep";
ret = bch2_gc(c, &c->journal_keys, true, false);
if (ret < 0)
goto err;
if (ret)
need_write_alloc = true;
goto err;
bch_verbose(c, "mark and sweep done");
}
@ -1122,7 +1120,8 @@ use_clean:
goto err;
bch_verbose(c, "journal replay done");
if (need_write_alloc && !c->opts.nochanges) {
if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) &&
!c->opts.nochanges) {
/*
* note that even when filesystem was clean there might be work
* to do here, if we ran gc (because of fsck) which recalculated
@ -1137,8 +1136,6 @@ use_clean:
goto err;
}
bch_verbose(c, "alloc write done");
set_bit(BCH_FS_ALLOC_WRITTEN, &c->flags);
}
if (!c->sb.clean) {

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "bkey_on_stack.h"
#include "bkey_buf.h"
#include "btree_update.h"
#include "extents.h"
#include "inode.h"
@ -198,8 +198,7 @@ s64 bch2_remap_range(struct bch_fs *c,
struct btree_trans trans;
struct btree_iter *dst_iter, *src_iter;
struct bkey_s_c src_k;
BKEY_PADDED(k) new_dst;
struct bkey_on_stack new_src;
struct bkey_buf new_dst, new_src;
struct bpos dst_end = dst_start, src_end = src_start;
struct bpos dst_want, src_want;
u64 src_done, dst_done;
@ -216,7 +215,8 @@ s64 bch2_remap_range(struct bch_fs *c,
dst_end.offset += remap_sectors;
src_end.offset += remap_sectors;
bkey_on_stack_init(&new_src);
bch2_bkey_buf_init(&new_dst);
bch2_bkey_buf_init(&new_src);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start,
@ -257,7 +257,7 @@ s64 bch2_remap_range(struct bch_fs *c,
break;
if (src_k.k->type != KEY_TYPE_reflink_p) {
bkey_on_stack_reassemble(&new_src, c, src_k);
bch2_bkey_buf_reassemble(&new_src, c, src_k);
src_k = bkey_i_to_s_c(new_src.k);
bch2_cut_front(src_iter->pos, new_src.k);
@ -275,7 +275,7 @@ s64 bch2_remap_range(struct bch_fs *c,
struct bkey_s_c_reflink_p src_p =
bkey_s_c_to_reflink_p(src_k);
struct bkey_i_reflink_p *dst_p =
bkey_reflink_p_init(&new_dst.k);
bkey_reflink_p_init(new_dst.k);
u64 offset = le64_to_cpu(src_p.v->idx) +
(src_iter->pos.offset -
@ -286,12 +286,12 @@ s64 bch2_remap_range(struct bch_fs *c,
BUG();
}
new_dst.k.k.p = dst_iter->pos;
bch2_key_resize(&new_dst.k.k,
new_dst.k->k.p = dst_iter->pos;
bch2_key_resize(&new_dst.k->k,
min(src_k.k->p.offset - src_iter->pos.offset,
dst_end.offset - dst_iter->pos.offset));
ret = bch2_extent_update(&trans, dst_iter, &new_dst.k,
ret = bch2_extent_update(&trans, dst_iter, new_dst.k,
NULL, journal_seq,
new_i_size, i_sectors_delta);
if (ret)
@ -333,7 +333,8 @@ err:
} while (ret2 == -EINTR);
ret = bch2_trans_exit(&trans) ?: ret;
bkey_on_stack_exit(&new_src, c);
bch2_bkey_buf_exit(&new_src, c);
bch2_bkey_buf_exit(&new_dst, c);
percpu_ref_put(&c->writes);

View File

@ -798,7 +798,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
pr_buf(out,
"free_inc: %zu/%zu\n"
"free[RESERVE_BTREE]: %zu/%zu\n"
"free[RESERVE_MOVINGGC]: %zu/%zu\n"
"free[RESERVE_NONE]: %zu/%zu\n"
"buckets:\n"
@ -827,7 +826,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
"open_buckets_user: %u\n"
"btree reserve cache: %u\n",
fifo_used(&ca->free_inc), ca->free_inc.size,
fifo_used(&ca->free[RESERVE_BTREE]), ca->free[RESERVE_BTREE].size,
fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size,
ca->mi.nbuckets - ca->mi.first_bucket,