Update bcachefs sources to ca97ee3577 bcachefs: bch2_btree_iter_peek_and_restart_outlined()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-02-26 21:36:39 -05:00
parent bf359ac1ad
commit 30cca2e94d
25 changed files with 540 additions and 321 deletions

View File

@ -1 +1 @@
8e1519ccb62b76736d5b9ca97e58b41ed9a11274 ca97ee357774427208e4c251bfaa5957ae7f8c2c

View File

@ -70,6 +70,7 @@ static inline void submit_bio(struct bio *bio)
} }
int blkdev_issue_discard(struct block_device *, sector_t, sector_t, gfp_t); int blkdev_issue_discard(struct block_device *, sector_t, sector_t, gfp_t);
int blkdev_issue_zeroout(struct block_device *, sector_t, sector_t, gfp_t, unsigned);
#define bdev_get_queue(bdev) (&((bdev)->queue)) #define bdev_get_queue(bdev) (&((bdev)->queue))

View File

@ -516,7 +516,6 @@ DEFINE_EVENT(bch_fs, gc_gens_end,
DECLARE_EVENT_CLASS(bucket_alloc, DECLARE_EVENT_CLASS(bucket_alloc,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve, TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
bool user,
u64 bucket, u64 bucket,
u64 free, u64 free,
u64 avail, u64 avail,
@ -525,14 +524,13 @@ DECLARE_EVENT_CLASS(bucket_alloc,
struct bucket_alloc_state *s, struct bucket_alloc_state *s,
bool nonblocking, bool nonblocking,
const char *err), const char *err),
TP_ARGS(ca, alloc_reserve, user, bucket, free, avail, TP_ARGS(ca, alloc_reserve, bucket, free, avail,
copygc_wait_amount, copygc_waiting_for, copygc_wait_amount, copygc_waiting_for,
s, nonblocking, err), s, nonblocking, err),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev ) __field(u8, dev )
__array(char, reserve, 16 ) __array(char, reserve, 16 )
__field(bool, user )
__field(u64, bucket ) __field(u64, bucket )
__field(u64, free ) __field(u64, free )
__field(u64, avail ) __field(u64, avail )
@ -548,9 +546,8 @@ DECLARE_EVENT_CLASS(bucket_alloc,
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = ca->dev; __entry->dev = ca->dev_idx;
strscpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve)); strscpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve));
__entry->user = user;
__entry->bucket = bucket; __entry->bucket = bucket;
__entry->free = free; __entry->free = free;
__entry->avail = avail; __entry->avail = avail;
@ -565,10 +562,9 @@ DECLARE_EVENT_CLASS(bucket_alloc,
strscpy(__entry->err, err, sizeof(__entry->err)); strscpy(__entry->err, err, sizeof(__entry->err));
), ),
TP_printk("%d,%d reserve %s user %u bucket %llu free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nocow %llu nonblocking %u err %s", TP_printk("reserve %s bucket %u:%llu free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nocow %llu nonblocking %u err %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->reserve, __entry->reserve,
__entry->user, __entry->dev,
__entry->bucket, __entry->bucket,
__entry->free, __entry->free,
__entry->avail, __entry->avail,
@ -585,7 +581,6 @@ DECLARE_EVENT_CLASS(bucket_alloc,
DEFINE_EVENT(bucket_alloc, bucket_alloc, DEFINE_EVENT(bucket_alloc, bucket_alloc,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve, TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
bool user,
u64 bucket, u64 bucket,
u64 free, u64 free,
u64 avail, u64 avail,
@ -594,14 +589,13 @@ DEFINE_EVENT(bucket_alloc, bucket_alloc,
struct bucket_alloc_state *s, struct bucket_alloc_state *s,
bool nonblocking, bool nonblocking,
const char *err), const char *err),
TP_ARGS(ca, alloc_reserve, user, bucket, free, avail, TP_ARGS(ca, alloc_reserve, bucket, free, avail,
copygc_wait_amount, copygc_waiting_for, copygc_wait_amount, copygc_waiting_for,
s, nonblocking, err) s, nonblocking, err)
); );
DEFINE_EVENT(bucket_alloc, bucket_alloc_fail, DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve, TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
bool user,
u64 bucket, u64 bucket,
u64 free, u64 free,
u64 avail, u64 avail,
@ -610,7 +604,7 @@ DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
struct bucket_alloc_state *s, struct bucket_alloc_state *s,
bool nonblocking, bool nonblocking,
const char *err), const char *err),
TP_ARGS(ca, alloc_reserve, user, bucket, free, avail, TP_ARGS(ca, alloc_reserve, bucket, free, avail,
copygc_wait_amount, copygc_waiting_for, copygc_wait_amount, copygc_waiting_for,
s, nonblocking, err) s, nonblocking, err)
); );

View File

@ -2175,21 +2175,24 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
} }
mutex_unlock(&c->btree_reserve_cache_lock); mutex_unlock(&c->btree_reserve_cache_lock);
while (1) { spin_lock(&c->freelist_lock);
struct open_bucket *ob; i = 0;
while (i < c->open_buckets_partial_nr) {
struct open_bucket *ob =
c->open_buckets + c->open_buckets_partial[i];
spin_lock(&c->freelist_lock); if (ob->dev == ca->dev_idx) {
if (!ca->open_buckets_partial_nr) { swap(c->open_buckets_partial[i],
c->open_buckets_partial[--c->open_buckets_partial_nr]);
ob->on_partial_list = false;
spin_unlock(&c->freelist_lock); spin_unlock(&c->freelist_lock);
break; bch2_open_bucket_put(c, ob);
spin_lock(&c->freelist_lock);
} else {
i++;
} }
ob = c->open_buckets +
ca->open_buckets_partial[--ca->open_buckets_partial_nr];
ob->on_partial_list = false;
spin_unlock(&c->freelist_lock);
bch2_open_bucket_put(c, ob);
} }
spin_unlock(&c->freelist_lock);
bch2_ec_stop_dev(c, ca); bch2_ec_stop_dev(c, ca);

View File

@ -154,26 +154,17 @@ static void open_bucket_free_unused(struct bch_fs *c,
struct write_point *wp, struct write_point *wp,
struct open_bucket *ob) struct open_bucket *ob)
{ {
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); BUG_ON(c->open_buckets_partial_nr >=
bool may_realloc = wp->data_type == BCH_DATA_user; ARRAY_SIZE(c->open_buckets_partial));
BUG_ON(ca->open_buckets_partial_nr > spin_lock(&c->freelist_lock);
ARRAY_SIZE(ca->open_buckets_partial)); ob->on_partial_list = true;
c->open_buckets_partial[c->open_buckets_partial_nr++] =
ob - c->open_buckets;
spin_unlock(&c->freelist_lock);
if (ca->open_buckets_partial_nr < closure_wake_up(&c->open_buckets_wait);
ARRAY_SIZE(ca->open_buckets_partial) && closure_wake_up(&c->freelist_wait);
may_realloc) {
spin_lock(&c->freelist_lock);
ob->on_partial_list = true;
ca->open_buckets_partial[ca->open_buckets_partial_nr++] =
ob - c->open_buckets;
spin_unlock(&c->freelist_lock);
closure_wake_up(&c->open_buckets_wait);
closure_wake_up(&c->freelist_wait);
} else {
bch2_open_bucket_put(c, ob);
}
} }
/* _only_ for allocating the journal on a new device: */ /* _only_ for allocating the journal on a new device: */
@ -259,7 +250,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
ob->valid = true; ob->valid = true;
ob->sectors_free = ca->mi.bucket_size; ob->sectors_free = ca->mi.bucket_size;
ob->alloc_reserve = reserve;
ob->dev = ca->dev_idx; ob->dev = ca->dev_idx;
ob->gen = a->gen; ob->gen = a->gen;
ob->bucket = bucket; ob->bucket = bucket;
@ -386,32 +376,6 @@ err:
return ob; return ob;
} }
static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch_dev *ca,
enum alloc_reserve reserve)
{
struct open_bucket *ob;
int i;
spin_lock(&c->freelist_lock);
for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
ob = c->open_buckets + ca->open_buckets_partial[i];
if (reserve <= ob->alloc_reserve) {
array_remove_item(ca->open_buckets_partial,
ca->open_buckets_partial_nr,
i);
ob->on_partial_list = false;
ob->alloc_reserve = reserve;
spin_unlock(&c->freelist_lock);
return ob;
}
}
spin_unlock(&c->freelist_lock);
return NULL;
}
/* /*
* This path is for before the freespace btree is initialized: * This path is for before the freespace btree is initialized:
* *
@ -535,7 +499,6 @@ again:
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
struct bch_dev *ca, struct bch_dev *ca,
enum alloc_reserve reserve, enum alloc_reserve reserve,
bool may_alloc_partial,
struct closure *cl, struct closure *cl,
struct bch_dev_usage *usage) struct bch_dev_usage *usage)
{ {
@ -574,12 +537,6 @@ again:
if (waiting) if (waiting)
closure_wake_up(&c->freelist_wait); closure_wake_up(&c->freelist_wait);
if (may_alloc_partial) {
ob = try_alloc_partial_bucket(c, ca, reserve);
if (ob)
return ob;
}
alloc: alloc:
ob = likely(freespace) ob = likely(freespace)
? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl) ? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl)
@ -599,7 +556,6 @@ err:
if (!IS_ERR(ob)) if (!IS_ERR(ob))
trace_and_count(c, bucket_alloc, ca, trace_and_count(c, bucket_alloc, ca,
bch2_alloc_reserves[reserve], bch2_alloc_reserves[reserve],
may_alloc_partial,
ob->bucket, ob->bucket,
usage->d[BCH_DATA_free].buckets, usage->d[BCH_DATA_free].buckets,
avail, avail,
@ -611,7 +567,6 @@ err:
else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart)) else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart))
trace_and_count(c, bucket_alloc_fail, ca, trace_and_count(c, bucket_alloc_fail, ca,
bch2_alloc_reserves[reserve], bch2_alloc_reserves[reserve],
may_alloc_partial,
0, 0,
usage->d[BCH_DATA_free].buckets, usage->d[BCH_DATA_free].buckets,
avail, avail,
@ -626,7 +581,6 @@ err:
struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
enum alloc_reserve reserve, enum alloc_reserve reserve,
bool may_alloc_partial,
struct closure *cl) struct closure *cl)
{ {
struct bch_dev_usage usage; struct bch_dev_usage usage;
@ -634,7 +588,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
bch2_trans_do(c, NULL, NULL, 0, bch2_trans_do(c, NULL, NULL, 0,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve, PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
may_alloc_partial, cl, &usage))); cl, &usage)));
return ob; return ob;
} }
@ -691,12 +645,10 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
bch2_dev_stripe_increment_inlined(ca, stripe, &usage); bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
} }
#define BUCKET_MAY_ALLOC_PARTIAL (1 << 0) static int add_new_bucket(struct bch_fs *c,
#define BUCKET_ALLOC_USE_DURABILITY (1 << 1)
static void add_new_bucket(struct bch_fs *c,
struct open_buckets *ptrs, struct open_buckets *ptrs,
struct bch_devs_mask *devs_may_alloc, struct bch_devs_mask *devs_may_alloc,
unsigned nr_replicas,
unsigned *nr_effective, unsigned *nr_effective,
bool *have_cache, bool *have_cache,
unsigned flags, unsigned flags,
@ -705,12 +657,19 @@ static void add_new_bucket(struct bch_fs *c,
unsigned durability = unsigned durability =
bch_dev_bkey_exists(c, ob->dev)->mi.durability; bch_dev_bkey_exists(c, ob->dev)->mi.durability;
BUG_ON(*nr_effective >= nr_replicas);
__clear_bit(ob->dev, devs_may_alloc->d); __clear_bit(ob->dev, devs_may_alloc->d);
*nr_effective += (flags & BUCKET_ALLOC_USE_DURABILITY) *nr_effective += durability;
? durability : 1;
*have_cache |= !durability; *have_cache |= !durability;
ob_push(c, ptrs, ob); ob_push(c, ptrs, ob);
if (*nr_effective >= nr_replicas)
return 1;
if (ob->ec)
return 1;
return 0;
} }
int bch2_bucket_alloc_set_trans(struct btree_trans *trans, int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
@ -720,8 +679,8 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
unsigned nr_replicas, unsigned nr_replicas,
unsigned *nr_effective, unsigned *nr_effective,
bool *have_cache, bool *have_cache,
enum bch_data_type data_type,
enum alloc_reserve reserve, enum alloc_reserve reserve,
unsigned flags,
struct closure *cl) struct closure *cl)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
@ -754,8 +713,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
continue; continue;
} }
ob = bch2_bucket_alloc_trans(trans, ca, reserve, ob = bch2_bucket_alloc_trans(trans, ca, reserve, cl, &usage);
flags & BUCKET_MAY_ALLOC_PARTIAL, cl, &usage);
if (!IS_ERR(ob)) if (!IS_ERR(ob))
bch2_dev_stripe_increment_inlined(ca, stripe, &usage); bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
percpu_ref_put(&ca->ref); percpu_ref_put(&ca->ref);
@ -767,10 +725,11 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
continue; continue;
} }
add_new_bucket(c, ptrs, devs_may_alloc, ob->data_type = data_type;
nr_effective, have_cache, flags, ob);
if (*nr_effective >= nr_replicas) { if (add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, 0, ob)) {
ret = 0; ret = 0;
break; break;
} }
@ -792,7 +751,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
struct write_point *wp, struct write_point *wp,
struct bch_devs_mask *devs_may_alloc, struct bch_devs_mask *devs_may_alloc,
u16 target, u16 target,
unsigned erasure_code,
unsigned nr_replicas, unsigned nr_replicas,
unsigned *nr_effective, unsigned *nr_effective,
bool *have_cache, bool *have_cache,
@ -805,9 +763,7 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
struct open_bucket *ob; struct open_bucket *ob;
struct bch_dev *ca; struct bch_dev *ca;
unsigned i, ec_idx; unsigned i, ec_idx;
int ret = 0;
if (!erasure_code)
return 0;
if (nr_replicas < 2) if (nr_replicas < 2)
return 0; return 0;
@ -842,46 +798,187 @@ got_bucket:
ob->ec_idx = ec_idx; ob->ec_idx = ec_idx;
ob->ec = h->s; ob->ec = h->s;
add_new_bucket(c, ptrs, devs_may_alloc, ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_effective, have_cache, flags, ob); nr_replicas, nr_effective,
have_cache, flags, ob);
atomic_inc(&h->s->pin); atomic_inc(&h->s->pin);
out_put_head: out_put_head:
bch2_ec_stripe_head_put(c, h); bch2_ec_stripe_head_put(c, h);
return 0; return ret;
} }
/* Sector allocator */ /* Sector allocator */
static void get_buckets_from_writepoint(struct bch_fs *c, static bool want_bucket(struct bch_fs *c,
struct open_buckets *ptrs, struct write_point *wp,
struct write_point *wp, struct bch_devs_mask *devs_may_alloc,
struct bch_devs_mask *devs_may_alloc, bool *have_cache, bool ec,
unsigned nr_replicas, struct open_bucket *ob)
unsigned *nr_effective, {
bool *have_cache, struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
unsigned flags,
bool need_ec) if (!test_bit(ob->dev, devs_may_alloc->d))
return false;
if (ob->data_type != wp->data_type)
return false;
if (!ca->mi.durability &&
(wp->data_type != BCH_DATA_user || !*have_cache))
return false;
if (ec != (ob->ec != NULL))
return false;
return true;
}
static int bucket_alloc_set_writepoint(struct bch_fs *c,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_mask *devs_may_alloc,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
bool ec, unsigned flags)
{ {
struct open_buckets ptrs_skip = { .nr = 0 }; struct open_buckets ptrs_skip = { .nr = 0 };
struct open_bucket *ob; struct open_bucket *ob;
unsigned i; unsigned i;
int ret = 0;
open_bucket_for_each(c, &wp->ptrs, ob, i) { open_bucket_for_each(c, &wp->ptrs, ob, i) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); if (!ret && want_bucket(c, wp, devs_may_alloc,
have_cache, ec, ob))
if (*nr_effective < nr_replicas && ret = add_new_bucket(c, ptrs, devs_may_alloc,
test_bit(ob->dev, devs_may_alloc->d) && nr_replicas, nr_effective,
(ca->mi.durability || have_cache, flags, ob);
(wp->data_type == BCH_DATA_user && !*have_cache)) && else
(ob->ec || !need_ec)) {
add_new_bucket(c, ptrs, devs_may_alloc,
nr_effective, have_cache,
flags, ob);
} else {
ob_push(c, &ptrs_skip, ob); ob_push(c, &ptrs_skip, ob);
}
} }
wp->ptrs = ptrs_skip; wp->ptrs = ptrs_skip;
return ret;
}
static int bucket_alloc_set_partial(struct bch_fs *c,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_mask *devs_may_alloc,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache, bool ec,
enum alloc_reserve reserve,
unsigned flags)
{
int i, ret = 0;
if (!c->open_buckets_partial_nr)
return 0;
spin_lock(&c->freelist_lock);
for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
if (want_bucket(c, wp, devs_may_alloc, have_cache, ec, ob)) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
struct bch_dev_usage usage;
u64 avail;
bch2_dev_usage_read_fast(ca, &usage);
avail = dev_buckets_free(ca, usage, reserve);
if (!avail)
continue;
array_remove_item(c->open_buckets_partial,
c->open_buckets_partial_nr,
i);
ob->on_partial_list = false;
ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, flags, ob);
if (ret)
break;
}
}
spin_unlock(&c->freelist_lock);
return ret;
}
static int __open_bucket_add_buckets(struct btree_trans *trans,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_list *devs_have,
u16 target,
bool erasure_code,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
enum alloc_reserve reserve,
unsigned flags,
struct closure *_cl)
{
struct bch_fs *c = trans->c;
struct bch_devs_mask devs;
struct open_bucket *ob;
struct closure *cl = NULL;
unsigned i;
int ret;
rcu_read_lock();
devs = target_rw_devs(c, wp->data_type, target);
rcu_read_unlock();
/* Don't allocate from devices we already have pointers to: */
for (i = 0; i < devs_have->nr; i++)
__clear_bit(devs_have->devs[i], devs.d);
open_bucket_for_each(c, ptrs, ob, i)
__clear_bit(ob->dev, devs.d);
if (erasure_code && ec_open_bucket(c, ptrs))
return 0;
ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs,
nr_replicas, nr_effective,
have_cache, erasure_code, flags);
if (ret)
return ret;
ret = bucket_alloc_set_partial(c, ptrs, wp, &devs,
nr_replicas, nr_effective,
have_cache, erasure_code, reserve, flags);
if (ret)
return ret;
if (erasure_code) {
ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
target,
nr_replicas, nr_effective,
have_cache, flags, _cl);
} else {
retry_blocking:
/*
* Try nonblocking first, so that if one device is full we'll try from
* other devices:
*/
ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
nr_replicas, nr_effective, have_cache,
wp->data_type, reserve, cl);
if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
!bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
!cl && _cl) {
cl = _cl;
goto retry_blocking;
}
}
return ret;
} }
static int open_bucket_add_buckets(struct btree_trans *trans, static int open_bucket_add_buckets(struct btree_trans *trans,
@ -895,72 +992,29 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
bool *have_cache, bool *have_cache,
enum alloc_reserve reserve, enum alloc_reserve reserve,
unsigned flags, unsigned flags,
struct closure *_cl) struct closure *cl)
{ {
struct bch_fs *c = trans->c;
struct bch_devs_mask devs;
struct open_bucket *ob;
struct closure *cl = NULL;
int ret; int ret;
unsigned i;
rcu_read_lock();
devs = target_rw_devs(c, wp->data_type, target);
rcu_read_unlock();
/* Don't allocate from devices we already have pointers to: */
for (i = 0; i < devs_have->nr; i++)
__clear_bit(devs_have->devs[i], devs.d);
open_bucket_for_each(c, ptrs, ob, i)
__clear_bit(ob->dev, devs.d);
if (erasure_code) { if (erasure_code) {
if (!ec_open_bucket(c, ptrs)) { ret = __open_bucket_add_buckets(trans, ptrs, wp,
get_buckets_from_writepoint(c, ptrs, wp, &devs, devs_have, target, erasure_code,
nr_replicas, nr_effective,
have_cache, flags, true);
if (*nr_effective >= nr_replicas)
return 0;
}
if (!ec_open_bucket(c, ptrs)) {
ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
target, erasure_code,
nr_replicas, nr_effective,
have_cache, flags, _cl);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
return ret;
if (*nr_effective >= nr_replicas)
return 0;
}
}
get_buckets_from_writepoint(c, ptrs, wp, &devs,
nr_replicas, nr_effective,
have_cache, flags, false);
if (*nr_effective >= nr_replicas)
return 0;
retry_blocking:
/*
* Try nonblocking first, so that if one device is full we'll try from
* other devices:
*/
ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
nr_replicas, nr_effective, have_cache, nr_replicas, nr_effective, have_cache,
reserve, flags, cl); reserve, flags, cl);
if (ret && if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
!bch2_err_matches(ret, BCH_ERR_transaction_restart) && bch2_err_matches(ret, BCH_ERR_operation_blocked) ||
!bch2_err_matches(ret, BCH_ERR_insufficient_devices) && bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
!cl && _cl) { bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
cl = _cl; return ret;
goto retry_blocking; if (*nr_effective >= nr_replicas)
return 0;
} }
return ret; ret = __open_bucket_add_buckets(trans, ptrs, wp,
devs_have, target, false,
nr_replicas, nr_effective, have_cache,
reserve, flags, cl);
return ret < 0 ? ret : 0;
} }
void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca, void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
@ -1159,14 +1213,10 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
struct open_bucket *ob; struct open_bucket *ob;
struct open_buckets ptrs; struct open_buckets ptrs;
unsigned nr_effective, write_points_nr; unsigned nr_effective, write_points_nr;
unsigned ob_flags = 0;
bool have_cache; bool have_cache;
int ret; int ret;
int i; int i;
if (!(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS))
ob_flags |= BUCKET_ALLOC_USE_DURABILITY;
BUG_ON(!nr_replicas || !nr_replicas_required); BUG_ON(!nr_replicas || !nr_replicas_required);
retry: retry:
ptrs.nr = 0; ptrs.nr = 0;
@ -1176,9 +1226,6 @@ retry:
*wp_ret = wp = writepoint_find(trans, write_point.v); *wp_ret = wp = writepoint_find(trans, write_point.v);
if (wp->data_type == BCH_DATA_user)
ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
/* metadata may not allocate on cache devices: */ /* metadata may not allocate on cache devices: */
if (wp->data_type != BCH_DATA_user) if (wp->data_type != BCH_DATA_user)
have_cache = true; have_cache = true;
@ -1188,13 +1235,13 @@ retry:
target, erasure_code, target, erasure_code,
nr_replicas, &nr_effective, nr_replicas, &nr_effective,
&have_cache, reserve, &have_cache, reserve,
ob_flags, cl); flags, cl);
} else { } else {
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code, target, erasure_code,
nr_replicas, &nr_effective, nr_replicas, &nr_effective,
&have_cache, reserve, &have_cache, reserve,
ob_flags, NULL); flags, NULL);
if (!ret || if (!ret ||
bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto alloc_done; goto alloc_done;
@ -1203,7 +1250,7 @@ retry:
0, erasure_code, 0, erasure_code,
nr_replicas, &nr_effective, nr_replicas, &nr_effective,
&have_cache, reserve, &have_cache, reserve,
ob_flags, cl); flags, cl);
} }
alloc_done: alloc_done:
BUG_ON(!ret && nr_effective < nr_replicas); BUG_ON(!ret && nr_effective < nr_replicas);
@ -1350,6 +1397,24 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
} }
} }
void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c)
{
unsigned i;
spin_lock(&c->freelist_lock);
for (i = 0; i < c->open_buckets_partial_nr; i++) {
struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
prt_printf(out, "%zu ref %u type %s ec %u %u:%llu:%u\n",
ob - c->open_buckets,
atomic_read(&ob->pin),
bch2_data_types[ob->data_type],
ob->ec != NULL,
ob->dev, ob->bucket, ob->gen);
}
spin_unlock(&c->freelist_lock);
}
static const char * const bch2_write_point_states[] = { static const char * const bch2_write_point_states[] = {
#define x(n) #n, #define x(n) #n,
WRITE_POINT_STATES() WRITE_POINT_STATES()

View File

@ -31,8 +31,7 @@ void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
long bch2_bucket_alloc_new_fs(struct bch_dev *); long bch2_bucket_alloc_new_fs(struct bch_dev *);
struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *, struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
enum alloc_reserve, bool, enum alloc_reserve, struct closure *);
struct closure *);
static inline void ob_push(struct bch_fs *c, struct open_buckets *obs, static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
struct open_bucket *ob) struct open_bucket *ob)
@ -152,8 +151,9 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *, int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
struct dev_stripe_state *, struct bch_devs_mask *, struct dev_stripe_state *, struct bch_devs_mask *,
unsigned, unsigned *, bool *, enum alloc_reserve, unsigned, unsigned *, bool *,
unsigned, struct closure *); enum bch_data_type, enum alloc_reserve,
struct closure *);
int bch2_alloc_sectors_start_trans(struct btree_trans *, int bch2_alloc_sectors_start_trans(struct btree_trans *,
unsigned, unsigned, unsigned, unsigned,
@ -221,6 +221,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp
void bch2_fs_allocator_foreground_init(struct bch_fs *); void bch2_fs_allocator_foreground_init(struct bch_fs *);
void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *); void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *);
void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *);
void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);

View File

@ -51,10 +51,9 @@ struct open_bucket {
* the block in the stripe this open_bucket corresponds to: * the block in the stripe this open_bucket corresponds to:
*/ */
u8 ec_idx; u8 ec_idx;
enum bch_data_type data_type:8; enum bch_data_type data_type:6;
unsigned valid:1; unsigned valid:1;
unsigned on_partial_list:1; unsigned on_partial_list:1;
unsigned alloc_reserve:3;
u8 dev; u8 dev;
u8 gen; u8 gen;

View File

@ -932,11 +932,14 @@ static int check_one_backpointer(struct btree_trans *trans,
struct bpos bucket, struct bpos bucket,
u64 *bp_offset, u64 *bp_offset,
struct bbpos start, struct bbpos start,
struct bbpos end) struct bbpos end,
struct bpos *last_flushed_pos)
{ {
struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
struct bch_backpointer bp; struct bch_backpointer bp;
struct bbpos pos; struct bbpos pos;
struct bpos bp_pos;
struct bkey_s_c k; struct bkey_s_c k;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
int ret; int ret;
@ -957,17 +960,31 @@ static int check_one_backpointer(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
if (fsck_err_on(!k.k, trans->c, bp_pos = bucket_pos_to_bp(c, bucket,
max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX);
if (!k.k && !bpos_eq(*last_flushed_pos, bp_pos)) {
*last_flushed_pos = bp_pos;
pr_info("flushing at %llu:%llu",
last_flushed_pos->inode,
last_flushed_pos->offset);
ret = bch2_btree_write_buffer_flush_sync(trans) ?:
-BCH_ERR_transaction_restart_write_buffer_flush;
goto out;
}
if (fsck_err_on(!k.k, c,
"%s backpointer points to missing extent\n%s", "%s backpointer points to missing extent\n%s",
*bp_offset < BACKPOINTER_OFFSET_MAX ? "alloc" : "btree", *bp_offset < BACKPOINTER_OFFSET_MAX ? "alloc" : "btree",
(bch2_backpointer_to_text(&buf, &bp), buf.buf))) { (bch2_backpointer_to_text(&buf, &bp), buf.buf))) {
ret = bch2_backpointer_del_by_offset(trans, bucket, *bp_offset, bp); ret = bch2_backpointer_del_by_offset(trans, bucket, *bp_offset, bp);
if (ret == -ENOENT) if (ret == -ENOENT)
bch_err(trans->c, "backpointer at %llu not found", *bp_offset); bch_err(c, "backpointer at %llu not found", *bp_offset);
} }
out:
bch2_trans_iter_exit(trans, &iter);
fsck_err: fsck_err:
bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf); printbuf_exit(&buf);
return ret; return ret;
} }
@ -978,6 +995,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
{ {
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bpos last_flushed_pos = SPOS_MAX;
int ret = 0; int ret = 0;
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
@ -987,7 +1005,8 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
while (!(ret = commit_do(trans, NULL, NULL, while (!(ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_LAZY_RW| BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL, BTREE_INSERT_NOFAIL,
check_one_backpointer(trans, iter.pos, &bp_offset, start, end))) && check_one_backpointer(trans, iter.pos, &bp_offset,
start, end, &last_flushed_pos))) &&
bp_offset < U64_MAX) bp_offset < U64_MAX)
bp_offset++; bp_offset++;

View File

@ -516,9 +516,6 @@ struct bch_dev {
unsigned nr_open_buckets; unsigned nr_open_buckets;
unsigned nr_btree_reserve; unsigned nr_btree_reserve;
open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_partial_nr;
size_t inc_gen_needs_gc; size_t inc_gen_needs_gc;
size_t inc_gen_really_needs_gc; size_t inc_gen_really_needs_gc;
size_t buckets_waiting_on_journal; size_t buckets_waiting_on_journal;
@ -859,6 +856,9 @@ struct bch_fs {
struct open_bucket open_buckets[OPEN_BUCKETS_COUNT]; struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT]; open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_partial_nr;
struct write_point btree_write_point; struct write_point btree_write_point;
struct write_point rebalance_write_point; struct write_point rebalance_write_point;

View File

@ -2568,6 +2568,18 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
return bch2_btree_iter_peek_slot(iter); return bch2_btree_iter_peek_slot(iter);
} }
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter)
{
struct bkey_s_c k;
while (btree_trans_too_many_iters(iter->trans) ||
(k = bch2_btree_iter_peek_type(iter, iter->flags),
bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
bch2_trans_begin(iter->trans);
return k;
}
/* new transactional stuff: */ /* new transactional stuff: */
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG

View File

@ -596,6 +596,8 @@ static inline int btree_trans_too_many_iters(struct btree_trans *trans)
return 0; return 0;
} }
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
static inline struct bkey_s_c static inline struct bkey_s_c
__bch2_btree_iter_peek_and_restart(struct btree_trans *trans, __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
struct btree_iter *iter, unsigned flags) struct btree_iter *iter, unsigned flags)

View File

@ -64,6 +64,15 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq); bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
(*fast)++; (*fast)++;
if (path->ref > 1) {
/*
* We can't clone a path that has write locks: if the path is
* shared, unlock before set_pos(), traverse():
*/
bch2_btree_node_unlock_write(trans, path, path->l[0].b);
*write_locked = false;
}
return 0; return 0;
trans_commit: trans_commit:
return bch2_trans_update(trans, iter, &wb->k, 0) ?: return bch2_trans_update(trans, iter, &wb->k, 0) ?:

View File

@ -98,8 +98,10 @@ static void bch2_bkey_mark_dev_cached(struct bkey_s k, unsigned dev)
struct bch_extent_ptr *ptr; struct bch_extent_ptr *ptr;
bkey_for_each_ptr(ptrs, ptr) bkey_for_each_ptr(ptrs, ptr)
if (ptr->dev == dev) if (ptr->dev == dev) {
ptr->cached = true; bch2_extent_ptr_set_cached(k, ptr);
return;
}
} }
static int __bch2_data_update_index_update(struct btree_trans *trans, static int __bch2_data_update_index_update(struct btree_trans *trans,
@ -295,15 +297,7 @@ out:
int bch2_data_update_index_update(struct bch_write_op *op) int bch2_data_update_index_update(struct bch_write_op *op)
{ {
struct bch_fs *c = op->c; return bch2_trans_run(op->c, __bch2_data_update_index_update(&trans, op));
struct btree_trans trans;
int ret;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
ret = __bch2_data_update_index_update(&trans, op);
bch2_trans_exit(&trans);
return ret;
} }
void bch2_data_update_read_done(struct data_update *m, void bch2_data_update_read_done(struct data_update *m,
@ -326,8 +320,9 @@ void bch2_data_update_exit(struct data_update *update)
const struct bch_extent_ptr *ptr; const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(ptrs, ptr) { bkey_for_each_ptr(ptrs, ptr) {
bch2_bucket_nocow_unlock(&c->nocow_locks, if (c->opts.nocow_enabled)
PTR_BUCKET_POS(c, ptr), 0); bch2_bucket_nocow_unlock(&c->nocow_locks,
PTR_BUCKET_POS(c, ptr), 0);
percpu_ref_put(&bch_dev_bkey_exists(c, ptr->dev)->ref); percpu_ref_put(&bch_dev_bkey_exists(c, ptr->dev)->ref);
} }
@ -487,23 +482,26 @@ int bch2_data_update_init(struct btree_trans *trans,
if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
m->op.incompressible = true; m->op.incompressible = true;
if (ctxt) { if (c->opts.nocow_enabled) {
move_ctxt_wait_event(ctxt, trans, if (ctxt) {
(locked = bch2_bucket_nocow_trylock(&c->nocow_locks, move_ctxt_wait_event(ctxt, trans,
PTR_BUCKET_POS(c, &p.ptr), 0)) || (locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
!atomic_read(&ctxt->read_sectors)); PTR_BUCKET_POS(c, &p.ptr), 0)) ||
!atomic_read(&ctxt->read_sectors));
if (!locked) if (!locked)
bch2_bucket_nocow_lock(&c->nocow_locks, bch2_bucket_nocow_lock(&c->nocow_locks,
PTR_BUCKET_POS(c, &p.ptr), 0); PTR_BUCKET_POS(c, &p.ptr), 0);
} else { } else {
if (!bch2_bucket_nocow_trylock(&c->nocow_locks, if (!bch2_bucket_nocow_trylock(&c->nocow_locks,
PTR_BUCKET_POS(c, &p.ptr), 0)) { PTR_BUCKET_POS(c, &p.ptr), 0)) {
ret = -BCH_ERR_nocow_lock_blocked; ret = -BCH_ERR_nocow_lock_blocked;
goto err; goto err;
}
} }
ptrs_locked |= (1U << i);
} }
ptrs_locked |= (1U << i);
i++; i++;
} }

View File

@ -138,20 +138,28 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k) struct bkey_s_c k)
{ {
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
unsigned i; unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u", prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
s->algorithm, s->algorithm,
le16_to_cpu(s->sectors), le16_to_cpu(s->sectors),
s->nr_blocks - s->nr_redundant, nr_data,
s->nr_redundant, s->nr_redundant,
s->csum_type, s->csum_type,
1U << s->csum_granularity_bits); 1U << s->csum_granularity_bits);
for (i = 0; i < s->nr_blocks; i++) for (i = 0; i < s->nr_blocks; i++) {
prt_printf(out, " %u:%llu:%u", s->ptrs[i].dev, const struct bch_extent_ptr *ptr = s->ptrs + i;
(u64) s->ptrs[i].offset, struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
stripe_blockcount_get(s, i)); u32 offset;
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
if (i < nr_data)
prt_printf(out, "#%u", stripe_blockcount_get(s, i));
if (ptr_stale(ca, ptr))
prt_printf(out, " stale");
}
} }
/* returns blocknr in stripe that we matched: */ /* returns blocknr in stripe that we matched: */
@ -442,15 +450,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
} }
static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe) static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
struct ec_stripe_buf *stripe)
{ {
struct btree_trans trans;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
bch2_trans_init(&trans, c, 0, 0); bch2_trans_iter_init(trans, &iter, BTREE_ID_stripes,
bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes,
POS(0, idx), BTREE_ITER_SLOTS); POS(0, idx), BTREE_ITER_SLOTS);
k = bch2_btree_iter_peek_slot(&iter); k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k); ret = bkey_err(k);
@ -462,11 +469,15 @@ static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *strip
} }
bkey_reassemble(&stripe->key.k_i, k); bkey_reassemble(&stripe->key.k_i, k);
err: err:
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(trans, &iter);
bch2_trans_exit(&trans);
return ret; return ret;
} }
static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
{
return bch2_trans_run(c, get_stripe_key_trans(&trans, idx, stripe));
}
/* recovery read path: */ /* recovery read path: */
int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
{ {
@ -865,25 +876,6 @@ err:
return ret; return ret;
} }
static void extent_stripe_ptr_add(struct bkey_s_extent e,
struct ec_stripe_buf *s,
struct bch_extent_ptr *ptr,
unsigned block)
{
struct bch_extent_stripe_ptr *dst = (void *) ptr;
union bch_extent_entry *end = extent_entry_last(e);
memmove_u64s_up(dst + 1, dst, (u64 *) end - (u64 *) dst);
e.k->u64s += sizeof(*dst) / sizeof(u64);
*dst = (struct bch_extent_stripe_ptr) {
.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
.block = block,
.redundancy = s->key.v.nr_redundant,
.idx = s->key.k.p.offset,
};
}
static int ec_stripe_update_extent(struct btree_trans *trans, static int ec_stripe_update_extent(struct btree_trans *trans,
struct bpos bucket, u8 gen, struct bpos bucket, u8 gen,
struct ec_stripe_buf *s, struct ec_stripe_buf *s,
@ -895,6 +887,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
struct bkey_s_c k; struct bkey_s_c k;
const struct bch_extent_ptr *ptr_c; const struct bch_extent_ptr *ptr_c;
struct bch_extent_ptr *ptr, *ec_ptr = NULL; struct bch_extent_ptr *ptr, *ec_ptr = NULL;
struct bch_extent_stripe_ptr stripe_ptr;
struct bkey_i *n; struct bkey_i *n;
int ret, dev, block; int ret, dev, block;
@ -933,16 +926,27 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
dev = s->key.v.ptrs[block].dev; dev = s->key.v.ptrs[block].dev;
n = bch2_bkey_make_mut(trans, k); n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(stripe_ptr));
ret = PTR_ERR_OR_ZERO(n); ret = PTR_ERR_OR_ZERO(n);
if (ret) if (ret)
goto out; goto out;
bkey_reassemble(n, k);
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev); bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev);
ec_ptr = (void *) bch2_bkey_has_device(bkey_i_to_s_c(n), dev); ec_ptr = (void *) bch2_bkey_has_device(bkey_i_to_s_c(n), dev);
BUG_ON(!ec_ptr); BUG_ON(!ec_ptr);
extent_stripe_ptr_add(bkey_i_to_s_extent(n), s, ec_ptr, block); stripe_ptr = (struct bch_extent_stripe_ptr) {
.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
.block = block,
.redundancy = s->key.v.nr_redundant,
.idx = s->key.k.p.offset,
};
__extent_entry_insert(n,
(union bch_extent_entry *) ec_ptr,
(union bch_extent_entry *) &stripe_ptr);
ret = bch2_trans_update(trans, &iter, n, 0); ret = bch2_trans_update(trans, &iter, n, 0);
out: out:
@ -999,6 +1003,35 @@ err:
return ret; return ret;
} }
static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
struct ec_stripe_new *s,
unsigned block,
struct open_bucket *ob)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
unsigned offset = ca->mi.bucket_size - ob->sectors_free;
int ret;
if (!bch2_dev_get_ioref(ca, WRITE)) {
s->err = -EROFS;
return;
}
memset(s->new_stripe.data[block] + (offset << 9),
0,
ob->sectors_free << 9);
ret = blkdev_issue_zeroout(ca->disk_sb.bdev,
ob->bucket * ca->mi.bucket_size + offset,
ob->sectors_free,
GFP_KERNEL, 0);
percpu_ref_put(&ca->io_ref);
if (ret)
s->err = ret;
}
/* /*
* data buckets of new stripe all written: create the stripe * data buckets of new stripe all written: create the stripe
*/ */
@ -1014,6 +1047,14 @@ static void ec_stripe_create(struct ec_stripe_new *s)
closure_sync(&s->iodone); closure_sync(&s->iodone);
for (i = 0; i < nr_data; i++)
if (s->blocks[i]) {
ob = c->open_buckets + s->blocks[i];
if (ob->sectors_free)
zero_out_rest_of_ec_bucket(c, s, i, ob);
}
if (s->err) { if (s->err) {
if (!bch2_err_matches(s->err, EROFS)) if (!bch2_err_matches(s->err, EROFS))
bch_err(c, "error creating stripe: error writing data buckets"); bch_err(c, "error creating stripe: error writing data buckets");
@ -1155,9 +1196,6 @@ void bch2_ec_bucket_written(struct bch_fs *c, struct open_bucket *ob)
{ {
struct ec_stripe_new *s = ob->ec; struct ec_stripe_new *s = ob->ec;
if (ob->sectors_free)
s->err = -1;
ec_stripe_new_put(c, s); ec_stripe_new_put(c, s);
} }
@ -1398,10 +1436,10 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
h->s->nr_parity, h->s->nr_parity,
&nr_have_parity, &nr_have_parity,
&have_cache, &have_cache,
BCH_DATA_parity,
h->copygc h->copygc
? RESERVE_movinggc ? RESERVE_movinggc
: RESERVE_none, : RESERVE_none,
0,
cl); cl);
open_bucket_for_each(c, &buckets, ob, i) { open_bucket_for_each(c, &buckets, ob, i) {
@ -1427,10 +1465,10 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
h->s->nr_data, h->s->nr_data,
&nr_have_data, &nr_have_data,
&have_cache, &have_cache,
BCH_DATA_user,
h->copygc h->copygc
? RESERVE_movinggc ? RESERVE_movinggc
: RESERVE_none, : RESERVE_none,
0,
cl); cl);
open_bucket_for_each(c, &buckets, ob, i) { open_bucket_for_each(c, &buckets, ob, i) {
@ -1486,8 +1524,9 @@ static s64 get_existing_stripe(struct bch_fs *c,
return ret; return ret;
} }
static int __bch2_ec_stripe_head_reuse(struct bch_fs *c, struct ec_stripe_head *h) static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h)
{ {
struct bch_fs *c = trans->c;
unsigned i; unsigned i;
s64 idx; s64 idx;
int ret; int ret;
@ -1497,7 +1536,7 @@ static int __bch2_ec_stripe_head_reuse(struct bch_fs *c, struct ec_stripe_head *
return -BCH_ERR_ENOSPC_stripe_reuse; return -BCH_ERR_ENOSPC_stripe_reuse;
h->s->have_existing_stripe = true; h->s->have_existing_stripe = true;
ret = get_stripe_key(c, idx, &h->s->existing_stripe); ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
if (ret) { if (ret) {
bch2_fs_fatal_error(c, "error reading stripe key: %i", ret); bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
return ret; return ret;
@ -1626,7 +1665,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
goto err; goto err;
if (ret && needs_stripe_new) if (ret && needs_stripe_new)
ret = __bch2_ec_stripe_head_reuse(c, h); ret = __bch2_ec_stripe_head_reuse(trans, h);
if (ret) { if (ret) {
bch_err_ratelimited(c, "failed to get stripe: %s", bch2_err_str(ret)); bch_err_ratelimited(c, "failed to get stripe: %s", bch2_err_str(ret));
goto err; goto err;
@ -1771,6 +1810,7 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
void bch2_fs_ec_exit(struct bch_fs *c) void bch2_fs_ec_exit(struct bch_fs *c)
{ {
struct ec_stripe_head *h; struct ec_stripe_head *h;
unsigned i;
while (1) { while (1) {
mutex_lock(&c->ec_stripe_head_lock); mutex_lock(&c->ec_stripe_head_lock);
@ -1782,7 +1822,12 @@ void bch2_fs_ec_exit(struct bch_fs *c)
if (!h) if (!h)
break; break;
BUG_ON(h->s); if (h->s) {
for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++)
BUG_ON(h->s->blocks[i]);
kfree(h->s);
}
kfree(h); kfree(h);
} }
@ -1801,6 +1846,8 @@ void bch2_fs_ec_init_early(struct bch_fs *c)
int bch2_fs_ec_init(struct bch_fs *c) int bch2_fs_ec_init(struct bch_fs *c)
{ {
spin_lock_init(&c->ec_stripes_new_lock);
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio), return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
BIOSET_NEED_BVECS); BIOSET_NEED_BVECS);
} }

View File

@ -706,18 +706,6 @@ void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry
k->k.u64s -= extent_entry_u64s(entry); k->k.u64s -= extent_entry_u64s(entry);
} }
static inline void __extent_entry_insert(struct bkey_i *k,
union bch_extent_entry *dst,
union bch_extent_entry *new)
{
union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
dst, (u64 *) end - (u64 *) dst);
k->k.u64s += extent_entry_u64s(new);
memcpy_u64s_small(dst, new, extent_entry_u64s(new));
}
void bch2_extent_ptr_decoded_append(struct bkey_i *k, void bch2_extent_ptr_decoded_append(struct bkey_i *k,
struct extent_ptr_decoded *p) struct extent_ptr_decoded *p)
{ {
@ -951,6 +939,29 @@ bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1,
return false; return false;
} }
void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry;
union bch_extent_entry *ec = NULL;
bkey_extent_entry_for_each(ptrs, entry) {
if (&entry->ptr == ptr) {
ptr->cached = true;
if (ec)
extent_entry_drop(k, ec);
return;
}
if (extent_entry_is_stripe_ptr(entry))
ec = entry;
else if (extent_entry_is_ptr(entry))
ec = NULL;
}
BUG();
}
/* /*
* bch_extent_normalize - clean up an extent, dropping stale pointers etc. * bch_extent_normalize - clean up an extent, dropping stale pointers etc.
* *
@ -1094,7 +1105,7 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
unsigned size_ondisk = k.k->size; unsigned size_ondisk = k.k->size;
unsigned nonce = UINT_MAX; unsigned nonce = UINT_MAX;
unsigned nr_ptrs = 0; unsigned nr_ptrs = 0;
bool unwritten = false; bool unwritten = false, have_ec = false, crc_since_last_ptr = false;
int ret; int ret;
if (bkey_is_btree_ptr(k.k)) if (bkey_is_btree_ptr(k.k))
@ -1130,7 +1141,14 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
return -BCH_ERR_invalid_bkey; return -BCH_ERR_invalid_bkey;
} }
if (entry->ptr.cached && have_ec) {
prt_printf(err, "cached, erasure coded ptr");
return -BCH_ERR_invalid_bkey;
}
unwritten = entry->ptr.unwritten; unwritten = entry->ptr.unwritten;
have_ec = false;
crc_since_last_ptr = false;
nr_ptrs++; nr_ptrs++;
break; break;
case BCH_EXTENT_ENTRY_crc32: case BCH_EXTENT_ENTRY_crc32:
@ -1164,17 +1182,43 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
return -BCH_ERR_invalid_bkey; return -BCH_ERR_invalid_bkey;
} }
} }
if (crc_since_last_ptr) {
prt_printf(err, "redundant crc entry");
return -BCH_ERR_invalid_bkey;
}
crc_since_last_ptr = true;
break; break;
case BCH_EXTENT_ENTRY_stripe_ptr: case BCH_EXTENT_ENTRY_stripe_ptr:
if (have_ec) {
prt_printf(err, "redundant stripe entry");
return -BCH_ERR_invalid_bkey;
}
have_ec = true;
break; break;
} }
} }
if (!nr_ptrs) {
prt_str(err, "no ptrs");
return -BCH_ERR_invalid_bkey;
}
if (nr_ptrs >= BCH_BKEY_PTRS_MAX) { if (nr_ptrs >= BCH_BKEY_PTRS_MAX) {
prt_str(err, "too many ptrs"); prt_str(err, "too many ptrs");
return -BCH_ERR_invalid_bkey; return -BCH_ERR_invalid_bkey;
} }
if (crc_since_last_ptr) {
prt_printf(err, "redundant crc entry");
return -BCH_ERR_invalid_bkey;
}
if (have_ec) {
prt_printf(err, "redundant stripe entry");
return -BCH_ERR_invalid_bkey;
}
return 0; return 0;
} }

View File

@ -76,6 +76,18 @@ static inline size_t extent_entry_u64s(const union bch_extent_entry *entry)
return extent_entry_bytes(entry) / sizeof(u64); return extent_entry_bytes(entry) / sizeof(u64);
} }
static inline void __extent_entry_insert(struct bkey_i *k,
union bch_extent_entry *dst,
union bch_extent_entry *new)
{
union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
dst, (u64 *) end - (u64 *) dst);
k->k.u64s += extent_entry_u64s(new);
memcpy_u64s_small(dst, new, extent_entry_u64s(new));
}
static inline bool extent_entry_is_ptr(const union bch_extent_entry *e) static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
{ {
return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr; return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
@ -655,6 +667,8 @@ bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c); bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
bool bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s_c); bool bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s_c);
void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c); struct bkey_s_c);

View File

@ -1650,7 +1650,7 @@ static void __bch2_write(struct bch_write_op *op)
nofs_flags = memalloc_nofs_save(); nofs_flags = memalloc_nofs_save();
if (unlikely(op->opts.nocow)) { if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) {
bch2_nocow_write(op); bch2_nocow_write(op);
if (op->flags & BCH_WRITE_DONE) if (op->flags & BCH_WRITE_DONE)
goto out_nofs_restore; goto out_nofs_restore;

View File

@ -789,8 +789,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
break; break;
} }
} else { } else {
ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none, ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none, cl);
false, cl);
ret = PTR_ERR_OR_ZERO(ob[nr_got]); ret = PTR_ERR_OR_ZERO(ob[nr_got]);
if (ret) if (ret)
break; break;

View File

@ -31,22 +31,6 @@ int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
return 0; return 0;
} }
void bch2_keylist_add_in_order(struct keylist *l, struct bkey_i *insert)
{
struct bkey_i *where;
for_each_keylist_key(l, where)
if (bpos_lt(insert->k.p, where->k.p))
break;
memmove_u64s_up((u64 *) where + insert->k.u64s,
where,
((u64 *) l->top) - ((u64 *) where));
l->top_p += insert->k.u64s;
bkey_copy(where, insert);
}
void bch2_keylist_pop_front(struct keylist *l) void bch2_keylist_pop_front(struct keylist *l)
{ {
l->top_p -= bch2_keylist_front(l)->k.u64s; l->top_p -= bch2_keylist_front(l)->k.u64s;

View File

@ -5,7 +5,6 @@
#include "keylist_types.h" #include "keylist_types.h"
int bch2_keylist_realloc(struct keylist *, u64 *, size_t, size_t); int bch2_keylist_realloc(struct keylist *, u64 *, size_t, size_t);
void bch2_keylist_add_in_order(struct keylist *, struct bkey_i *);
void bch2_keylist_pop_front(struct keylist *); void bch2_keylist_pop_front(struct keylist *);
static inline void bch2_keylist_init(struct keylist *l, u64 *inline_keys) static inline void bch2_keylist_init(struct keylist *l, u64 *inline_keys)

View File

@ -4,6 +4,7 @@
#include "alloc_background.h" #include "alloc_background.h"
#include "btree_iter.h" #include "btree_iter.h"
#include "btree_update.h" #include "btree_update.h"
#include "btree_write_buffer.h"
#include "error.h" #include "error.h"
#include "lru.h" #include "lru.h"
#include "recovery.h" #include "recovery.h"
@ -101,7 +102,8 @@ static const char * const bch2_lru_types[] = {
static int bch2_check_lru_key(struct btree_trans *trans, static int bch2_check_lru_key(struct btree_trans *trans,
struct btree_iter *lru_iter, struct btree_iter *lru_iter,
struct bkey_s_c lru_k) struct bkey_s_c lru_k,
struct bpos *last_flushed_pos)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
@ -137,19 +139,25 @@ static int bch2_check_lru_key(struct btree_trans *trans,
break; break;
} }
if (fsck_err_on(lru_k.k->type != KEY_TYPE_set || if (lru_k.k->type != KEY_TYPE_set ||
lru_pos_time(lru_k.k->p) != idx, c, lru_pos_time(lru_k.k->p) != idx) {
"incorrect lru entry: lru %s time %llu\n" if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) {
" %s\n" *last_flushed_pos = lru_k.k->p;
" for %s", ret = bch2_btree_write_buffer_flush_sync(trans) ?:
bch2_lru_types[type], -BCH_ERR_transaction_restart_write_buffer_flush;
lru_pos_time(lru_k.k->p), goto out;
(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), }
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
ret = bch2_btree_delete_at(trans, lru_iter, 0); if (fsck_err(c, "incorrect lru entry: lru %s time %llu\n"
if (ret) " %s\n"
goto err; " for %s",
bch2_lru_types[type],
lru_pos_time(lru_k.k->p),
(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf)))
ret = bch2_btree_delete_at(trans, lru_iter, 0);
} }
out:
err: err:
fsck_err: fsck_err:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
@ -163,6 +171,7 @@ int bch2_check_lrus(struct bch_fs *c)
struct btree_trans trans; struct btree_trans trans;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bpos last_flushed_pos = POS_MIN;
int ret = 0; int ret = 0;
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
@ -170,7 +179,7 @@ int bch2_check_lrus(struct bch_fs *c)
ret = for_each_btree_key_commit(&trans, iter, ret = for_each_btree_key_commit(&trans, iter,
BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
bch2_check_lru_key(&trans, &iter, k)); bch2_check_lru_key(&trans, &iter, k, &last_flushed_pos));
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;

View File

@ -227,7 +227,8 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans,
if (bkey_deleted(&n->k)) if (bkey_deleted(&n->k))
n->k.size = 0; n->k.size = 0;
return bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: return bch2_trans_relock(trans) ?:
bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
} }

View File

@ -404,6 +404,12 @@ enum opt_type {
NULL, "Nocow mode: Writes will be done in place when possible.\n"\ NULL, "Nocow mode: Writes will be done in place when possible.\n"\
"Snapshots and reflink will still caused writes to be COW\n"\ "Snapshots and reflink will still caused writes to be COW\n"\
"Implicitly disables data checksumming, compression and encryption")\ "Implicitly disables data checksumming, compression and encryption")\
x(nocow_enabled, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, true, \
NULL, "Enable nocow mode: enables runtime locking in\n"\
"data move path needed if nocow will ever be in use\n")\
x(no_data_io, u8, \ x(no_data_io, u8, \
OPT_FS|OPT_MOUNT, \ OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \ OPT_BOOL(), \

View File

@ -194,6 +194,7 @@ read_attribute(btree_cache);
read_attribute(btree_key_cache); read_attribute(btree_key_cache);
read_attribute(stripes_heap); read_attribute(stripes_heap);
read_attribute(open_buckets); read_attribute(open_buckets);
read_attribute(open_buckets_partial);
read_attribute(write_points); read_attribute(write_points);
read_attribute(nocow_lock_table); read_attribute(nocow_lock_table);
@ -455,6 +456,9 @@ SHOW(bch2_fs)
if (attr == &sysfs_open_buckets) if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c); bch2_open_buckets_to_text(out, c);
if (attr == &sysfs_open_buckets_partial)
bch2_open_buckets_partial_to_text(out, c);
if (attr == &sysfs_write_points) if (attr == &sysfs_write_points)
bch2_write_points_to_text(out, c); bch2_write_points_to_text(out, c);
@ -663,6 +667,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_new_stripes, &sysfs_new_stripes,
&sysfs_stripes_heap, &sysfs_stripes_heap,
&sysfs_open_buckets, &sysfs_open_buckets,
&sysfs_open_buckets_partial,
&sysfs_write_points, &sysfs_write_points,
#ifdef BCH_WRITE_REF_DEBUG #ifdef BCH_WRITE_REF_DEBUG
&sysfs_write_refs, &sysfs_write_refs,

View File

@ -118,6 +118,14 @@ int blkdev_issue_discard(struct block_device *bdev,
return 0; return 0;
} }
int blkdev_issue_zeroout(struct block_device *bdev,
sector_t sector, sector_t nr_sects,
gfp_t gfp_mask, unsigned flags)
{
/* Not yet implemented: */
BUG();
}
unsigned bdev_logical_block_size(struct block_device *bdev) unsigned bdev_logical_block_size(struct block_device *bdev)
{ {
struct stat statbuf; struct stat statbuf;