Update bcachefs sources to ca97ee3577 bcachefs: bch2_btree_iter_peek_and_restart_outlined()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-02-26 21:36:39 -05:00
parent bf359ac1ad
commit 30cca2e94d
25 changed files with 540 additions and 321 deletions

View File

@ -1 +1 @@
8e1519ccb62b76736d5b9ca97e58b41ed9a11274
ca97ee357774427208e4c251bfaa5957ae7f8c2c

View File

@ -70,6 +70,7 @@ static inline void submit_bio(struct bio *bio)
}
int blkdev_issue_discard(struct block_device *, sector_t, sector_t, gfp_t);
int blkdev_issue_zeroout(struct block_device *, sector_t, sector_t, gfp_t, unsigned);
#define bdev_get_queue(bdev) (&((bdev)->queue))

View File

@ -516,7 +516,6 @@ DEFINE_EVENT(bch_fs, gc_gens_end,
DECLARE_EVENT_CLASS(bucket_alloc,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
bool user,
u64 bucket,
u64 free,
u64 avail,
@ -525,14 +524,13 @@ DECLARE_EVENT_CLASS(bucket_alloc,
struct bucket_alloc_state *s,
bool nonblocking,
const char *err),
TP_ARGS(ca, alloc_reserve, user, bucket, free, avail,
TP_ARGS(ca, alloc_reserve, bucket, free, avail,
copygc_wait_amount, copygc_waiting_for,
s, nonblocking, err),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(u8, dev )
__array(char, reserve, 16 )
__field(bool, user )
__field(u64, bucket )
__field(u64, free )
__field(u64, avail )
@ -548,9 +546,8 @@ DECLARE_EVENT_CLASS(bucket_alloc,
),
TP_fast_assign(
__entry->dev = ca->dev;
__entry->dev = ca->dev_idx;
strscpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve));
__entry->user = user;
__entry->bucket = bucket;
__entry->free = free;
__entry->avail = avail;
@ -565,10 +562,9 @@ DECLARE_EVENT_CLASS(bucket_alloc,
strscpy(__entry->err, err, sizeof(__entry->err));
),
TP_printk("%d,%d reserve %s user %u bucket %llu free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nocow %llu nonblocking %u err %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
TP_printk("reserve %s bucket %u:%llu free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nocow %llu nonblocking %u err %s",
__entry->reserve,
__entry->user,
__entry->dev,
__entry->bucket,
__entry->free,
__entry->avail,
@ -585,7 +581,6 @@ DECLARE_EVENT_CLASS(bucket_alloc,
DEFINE_EVENT(bucket_alloc, bucket_alloc,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
bool user,
u64 bucket,
u64 free,
u64 avail,
@ -594,14 +589,13 @@ DEFINE_EVENT(bucket_alloc, bucket_alloc,
struct bucket_alloc_state *s,
bool nonblocking,
const char *err),
TP_ARGS(ca, alloc_reserve, user, bucket, free, avail,
TP_ARGS(ca, alloc_reserve, bucket, free, avail,
copygc_wait_amount, copygc_waiting_for,
s, nonblocking, err)
);
DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
bool user,
u64 bucket,
u64 free,
u64 avail,
@ -610,7 +604,7 @@ DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
struct bucket_alloc_state *s,
bool nonblocking,
const char *err),
TP_ARGS(ca, alloc_reserve, user, bucket, free, avail,
TP_ARGS(ca, alloc_reserve, bucket, free, avail,
copygc_wait_amount, copygc_waiting_for,
s, nonblocking, err)
);

View File

@ -2175,21 +2175,24 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
}
mutex_unlock(&c->btree_reserve_cache_lock);
while (1) {
struct open_bucket *ob;
spin_lock(&c->freelist_lock);
if (!ca->open_buckets_partial_nr) {
spin_unlock(&c->freelist_lock);
break;
}
ob = c->open_buckets +
ca->open_buckets_partial[--ca->open_buckets_partial_nr];
i = 0;
while (i < c->open_buckets_partial_nr) {
struct open_bucket *ob =
c->open_buckets + c->open_buckets_partial[i];
if (ob->dev == ca->dev_idx) {
swap(c->open_buckets_partial[i],
c->open_buckets_partial[--c->open_buckets_partial_nr]);
ob->on_partial_list = false;
spin_unlock(&c->freelist_lock);
bch2_open_bucket_put(c, ob);
spin_lock(&c->freelist_lock);
} else {
i++;
}
}
spin_unlock(&c->freelist_lock);
bch2_ec_stop_dev(c, ca);

View File

@ -154,26 +154,17 @@ static void open_bucket_free_unused(struct bch_fs *c,
struct write_point *wp,
struct open_bucket *ob)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
bool may_realloc = wp->data_type == BCH_DATA_user;
BUG_ON(c->open_buckets_partial_nr >=
ARRAY_SIZE(c->open_buckets_partial));
BUG_ON(ca->open_buckets_partial_nr >
ARRAY_SIZE(ca->open_buckets_partial));
if (ca->open_buckets_partial_nr <
ARRAY_SIZE(ca->open_buckets_partial) &&
may_realloc) {
spin_lock(&c->freelist_lock);
ob->on_partial_list = true;
ca->open_buckets_partial[ca->open_buckets_partial_nr++] =
c->open_buckets_partial[c->open_buckets_partial_nr++] =
ob - c->open_buckets;
spin_unlock(&c->freelist_lock);
closure_wake_up(&c->open_buckets_wait);
closure_wake_up(&c->freelist_wait);
} else {
bch2_open_bucket_put(c, ob);
}
}
/* _only_ for allocating the journal on a new device: */
@ -259,7 +250,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
ob->valid = true;
ob->sectors_free = ca->mi.bucket_size;
ob->alloc_reserve = reserve;
ob->dev = ca->dev_idx;
ob->gen = a->gen;
ob->bucket = bucket;
@ -386,32 +376,6 @@ err:
return ob;
}
static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch_dev *ca,
enum alloc_reserve reserve)
{
struct open_bucket *ob;
int i;
spin_lock(&c->freelist_lock);
for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
ob = c->open_buckets + ca->open_buckets_partial[i];
if (reserve <= ob->alloc_reserve) {
array_remove_item(ca->open_buckets_partial,
ca->open_buckets_partial_nr,
i);
ob->on_partial_list = false;
ob->alloc_reserve = reserve;
spin_unlock(&c->freelist_lock);
return ob;
}
}
spin_unlock(&c->freelist_lock);
return NULL;
}
/*
* This path is for before the freespace btree is initialized:
*
@ -535,7 +499,6 @@ again:
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
struct bch_dev *ca,
enum alloc_reserve reserve,
bool may_alloc_partial,
struct closure *cl,
struct bch_dev_usage *usage)
{
@ -574,12 +537,6 @@ again:
if (waiting)
closure_wake_up(&c->freelist_wait);
if (may_alloc_partial) {
ob = try_alloc_partial_bucket(c, ca, reserve);
if (ob)
return ob;
}
alloc:
ob = likely(freespace)
? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl)
@ -599,7 +556,6 @@ err:
if (!IS_ERR(ob))
trace_and_count(c, bucket_alloc, ca,
bch2_alloc_reserves[reserve],
may_alloc_partial,
ob->bucket,
usage->d[BCH_DATA_free].buckets,
avail,
@ -611,7 +567,6 @@ err:
else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart))
trace_and_count(c, bucket_alloc_fail, ca,
bch2_alloc_reserves[reserve],
may_alloc_partial,
0,
usage->d[BCH_DATA_free].buckets,
avail,
@ -626,7 +581,6 @@ err:
struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
enum alloc_reserve reserve,
bool may_alloc_partial,
struct closure *cl)
{
struct bch_dev_usage usage;
@ -634,7 +588,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
bch2_trans_do(c, NULL, NULL, 0,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
may_alloc_partial, cl, &usage)));
cl, &usage)));
return ob;
}
@ -691,12 +645,10 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
}
#define BUCKET_MAY_ALLOC_PARTIAL (1 << 0)
#define BUCKET_ALLOC_USE_DURABILITY (1 << 1)
static void add_new_bucket(struct bch_fs *c,
static int add_new_bucket(struct bch_fs *c,
struct open_buckets *ptrs,
struct bch_devs_mask *devs_may_alloc,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
unsigned flags,
@ -705,12 +657,19 @@ static void add_new_bucket(struct bch_fs *c,
unsigned durability =
bch_dev_bkey_exists(c, ob->dev)->mi.durability;
BUG_ON(*nr_effective >= nr_replicas);
__clear_bit(ob->dev, devs_may_alloc->d);
*nr_effective += (flags & BUCKET_ALLOC_USE_DURABILITY)
? durability : 1;
*nr_effective += durability;
*have_cache |= !durability;
ob_push(c, ptrs, ob);
if (*nr_effective >= nr_replicas)
return 1;
if (ob->ec)
return 1;
return 0;
}
int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
@ -720,8 +679,8 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
enum bch_data_type data_type,
enum alloc_reserve reserve,
unsigned flags,
struct closure *cl)
{
struct bch_fs *c = trans->c;
@ -754,8 +713,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
continue;
}
ob = bch2_bucket_alloc_trans(trans, ca, reserve,
flags & BUCKET_MAY_ALLOC_PARTIAL, cl, &usage);
ob = bch2_bucket_alloc_trans(trans, ca, reserve, cl, &usage);
if (!IS_ERR(ob))
bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
percpu_ref_put(&ca->ref);
@ -767,10 +725,11 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
continue;
}
add_new_bucket(c, ptrs, devs_may_alloc,
nr_effective, have_cache, flags, ob);
ob->data_type = data_type;
if (*nr_effective >= nr_replicas) {
if (add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, 0, ob)) {
ret = 0;
break;
}
@ -792,7 +751,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
struct write_point *wp,
struct bch_devs_mask *devs_may_alloc,
u16 target,
unsigned erasure_code,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
@ -805,9 +763,7 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
struct open_bucket *ob;
struct bch_dev *ca;
unsigned i, ec_idx;
if (!erasure_code)
return 0;
int ret = 0;
if (nr_replicas < 2)
return 0;
@ -842,46 +798,187 @@ got_bucket:
ob->ec_idx = ec_idx;
ob->ec = h->s;
add_new_bucket(c, ptrs, devs_may_alloc,
nr_effective, have_cache, flags, ob);
ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, flags, ob);
atomic_inc(&h->s->pin);
out_put_head:
bch2_ec_stripe_head_put(c, h);
return 0;
return ret;
}
/* Sector allocator */
static void get_buckets_from_writepoint(struct bch_fs *c,
static bool want_bucket(struct bch_fs *c,
struct write_point *wp,
struct bch_devs_mask *devs_may_alloc,
bool *have_cache, bool ec,
struct open_bucket *ob)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
if (!test_bit(ob->dev, devs_may_alloc->d))
return false;
if (ob->data_type != wp->data_type)
return false;
if (!ca->mi.durability &&
(wp->data_type != BCH_DATA_user || !*have_cache))
return false;
if (ec != (ob->ec != NULL))
return false;
return true;
}
static int bucket_alloc_set_writepoint(struct bch_fs *c,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_mask *devs_may_alloc,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
unsigned flags,
bool need_ec)
bool ec, unsigned flags)
{
struct open_buckets ptrs_skip = { .nr = 0 };
struct open_bucket *ob;
unsigned i;
int ret = 0;
open_bucket_for_each(c, &wp->ptrs, ob, i) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
if (*nr_effective < nr_replicas &&
test_bit(ob->dev, devs_may_alloc->d) &&
(ca->mi.durability ||
(wp->data_type == BCH_DATA_user && !*have_cache)) &&
(ob->ec || !need_ec)) {
add_new_bucket(c, ptrs, devs_may_alloc,
nr_effective, have_cache,
flags, ob);
} else {
if (!ret && want_bucket(c, wp, devs_may_alloc,
have_cache, ec, ob))
ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, flags, ob);
else
ob_push(c, &ptrs_skip, ob);
}
}
wp->ptrs = ptrs_skip;
return ret;
}
static int bucket_alloc_set_partial(struct bch_fs *c,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_mask *devs_may_alloc,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache, bool ec,
enum alloc_reserve reserve,
unsigned flags)
{
int i, ret = 0;
if (!c->open_buckets_partial_nr)
return 0;
spin_lock(&c->freelist_lock);
for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
if (want_bucket(c, wp, devs_may_alloc, have_cache, ec, ob)) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
struct bch_dev_usage usage;
u64 avail;
bch2_dev_usage_read_fast(ca, &usage);
avail = dev_buckets_free(ca, usage, reserve);
if (!avail)
continue;
array_remove_item(c->open_buckets_partial,
c->open_buckets_partial_nr,
i);
ob->on_partial_list = false;
ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, flags, ob);
if (ret)
break;
}
}
spin_unlock(&c->freelist_lock);
return ret;
}
static int __open_bucket_add_buckets(struct btree_trans *trans,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_list *devs_have,
u16 target,
bool erasure_code,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
enum alloc_reserve reserve,
unsigned flags,
struct closure *_cl)
{
struct bch_fs *c = trans->c;
struct bch_devs_mask devs;
struct open_bucket *ob;
struct closure *cl = NULL;
unsigned i;
int ret;
rcu_read_lock();
devs = target_rw_devs(c, wp->data_type, target);
rcu_read_unlock();
/* Don't allocate from devices we already have pointers to: */
for (i = 0; i < devs_have->nr; i++)
__clear_bit(devs_have->devs[i], devs.d);
open_bucket_for_each(c, ptrs, ob, i)
__clear_bit(ob->dev, devs.d);
if (erasure_code && ec_open_bucket(c, ptrs))
return 0;
ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs,
nr_replicas, nr_effective,
have_cache, erasure_code, flags);
if (ret)
return ret;
ret = bucket_alloc_set_partial(c, ptrs, wp, &devs,
nr_replicas, nr_effective,
have_cache, erasure_code, reserve, flags);
if (ret)
return ret;
if (erasure_code) {
ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
target,
nr_replicas, nr_effective,
have_cache, flags, _cl);
} else {
retry_blocking:
/*
* Try nonblocking first, so that if one device is full we'll try from
* other devices:
*/
ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
nr_replicas, nr_effective, have_cache,
wp->data_type, reserve, cl);
if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
!bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
!cl && _cl) {
cl = _cl;
goto retry_blocking;
}
}
return ret;
}
static int open_bucket_add_buckets(struct btree_trans *trans,
@ -895,72 +992,29 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
bool *have_cache,
enum alloc_reserve reserve,
unsigned flags,
struct closure *_cl)
struct closure *cl)
{
struct bch_fs *c = trans->c;
struct bch_devs_mask devs;
struct open_bucket *ob;
struct closure *cl = NULL;
int ret;
unsigned i;
rcu_read_lock();
devs = target_rw_devs(c, wp->data_type, target);
rcu_read_unlock();
/* Don't allocate from devices we already have pointers to: */
for (i = 0; i < devs_have->nr; i++)
__clear_bit(devs_have->devs[i], devs.d);
open_bucket_for_each(c, ptrs, ob, i)
__clear_bit(ob->dev, devs.d);
if (erasure_code) {
if (!ec_open_bucket(c, ptrs)) {
get_buckets_from_writepoint(c, ptrs, wp, &devs,
nr_replicas, nr_effective,
have_cache, flags, true);
if (*nr_effective >= nr_replicas)
return 0;
}
if (!ec_open_bucket(c, ptrs)) {
ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
target, erasure_code,
nr_replicas, nr_effective,
have_cache, flags, _cl);
ret = __open_bucket_add_buckets(trans, ptrs, wp,
devs_have, target, erasure_code,
nr_replicas, nr_effective, have_cache,
reserve, flags, cl);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret, BCH_ERR_operation_blocked) ||
bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
return ret;
if (*nr_effective >= nr_replicas)
return 0;
}
}
get_buckets_from_writepoint(c, ptrs, wp, &devs,
nr_replicas, nr_effective,
have_cache, flags, false);
if (*nr_effective >= nr_replicas)
return 0;
retry_blocking:
/*
* Try nonblocking first, so that if one device is full we'll try from
* other devices:
*/
ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
ret = __open_bucket_add_buckets(trans, ptrs, wp,
devs_have, target, false,
nr_replicas, nr_effective, have_cache,
reserve, flags, cl);
if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
!bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
!cl && _cl) {
cl = _cl;
goto retry_blocking;
}
return ret;
return ret < 0 ? ret : 0;
}
void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
@ -1159,14 +1213,10 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
struct open_bucket *ob;
struct open_buckets ptrs;
unsigned nr_effective, write_points_nr;
unsigned ob_flags = 0;
bool have_cache;
int ret;
int i;
if (!(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS))
ob_flags |= BUCKET_ALLOC_USE_DURABILITY;
BUG_ON(!nr_replicas || !nr_replicas_required);
retry:
ptrs.nr = 0;
@ -1176,9 +1226,6 @@ retry:
*wp_ret = wp = writepoint_find(trans, write_point.v);
if (wp->data_type == BCH_DATA_user)
ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
/* metadata may not allocate on cache devices: */
if (wp->data_type != BCH_DATA_user)
have_cache = true;
@ -1188,13 +1235,13 @@ retry:
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
ob_flags, cl);
flags, cl);
} else {
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
ob_flags, NULL);
flags, NULL);
if (!ret ||
bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto alloc_done;
@ -1203,7 +1250,7 @@ retry:
0, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
ob_flags, cl);
flags, cl);
}
alloc_done:
BUG_ON(!ret && nr_effective < nr_replicas);
@ -1350,6 +1397,24 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
}
}
void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c)
{
unsigned i;
spin_lock(&c->freelist_lock);
for (i = 0; i < c->open_buckets_partial_nr; i++) {
struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
prt_printf(out, "%zu ref %u type %s ec %u %u:%llu:%u\n",
ob - c->open_buckets,
atomic_read(&ob->pin),
bch2_data_types[ob->data_type],
ob->ec != NULL,
ob->dev, ob->bucket, ob->gen);
}
spin_unlock(&c->freelist_lock);
}
static const char * const bch2_write_point_states[] = {
#define x(n) #n,
WRITE_POINT_STATES()

View File

@ -31,8 +31,7 @@ void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
long bch2_bucket_alloc_new_fs(struct bch_dev *);
struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
enum alloc_reserve, bool,
struct closure *);
enum alloc_reserve, struct closure *);
static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
struct open_bucket *ob)
@ -152,8 +151,9 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
struct dev_stripe_state *, struct bch_devs_mask *,
unsigned, unsigned *, bool *, enum alloc_reserve,
unsigned, struct closure *);
unsigned, unsigned *, bool *,
enum bch_data_type, enum alloc_reserve,
struct closure *);
int bch2_alloc_sectors_start_trans(struct btree_trans *,
unsigned, unsigned,
@ -221,6 +221,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp
void bch2_fs_allocator_foreground_init(struct bch_fs *);
void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *);
void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *);
void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);

View File

@ -51,10 +51,9 @@ struct open_bucket {
* the block in the stripe this open_bucket corresponds to:
*/
u8 ec_idx;
enum bch_data_type data_type:8;
enum bch_data_type data_type:6;
unsigned valid:1;
unsigned on_partial_list:1;
unsigned alloc_reserve:3;
u8 dev;
u8 gen;

View File

@ -932,11 +932,14 @@ static int check_one_backpointer(struct btree_trans *trans,
struct bpos bucket,
u64 *bp_offset,
struct bbpos start,
struct bbpos end)
struct bbpos end,
struct bpos *last_flushed_pos)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bch_backpointer bp;
struct bbpos pos;
struct bpos bp_pos;
struct bkey_s_c k;
struct printbuf buf = PRINTBUF;
int ret;
@ -957,17 +960,31 @@ static int check_one_backpointer(struct btree_trans *trans,
if (ret)
return ret;
if (fsck_err_on(!k.k, trans->c,
bp_pos = bucket_pos_to_bp(c, bucket,
max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX);
if (!k.k && !bpos_eq(*last_flushed_pos, bp_pos)) {
*last_flushed_pos = bp_pos;
pr_info("flushing at %llu:%llu",
last_flushed_pos->inode,
last_flushed_pos->offset);
ret = bch2_btree_write_buffer_flush_sync(trans) ?:
-BCH_ERR_transaction_restart_write_buffer_flush;
goto out;
}
if (fsck_err_on(!k.k, c,
"%s backpointer points to missing extent\n%s",
*bp_offset < BACKPOINTER_OFFSET_MAX ? "alloc" : "btree",
(bch2_backpointer_to_text(&buf, &bp), buf.buf))) {
ret = bch2_backpointer_del_by_offset(trans, bucket, *bp_offset, bp);
if (ret == -ENOENT)
bch_err(trans->c, "backpointer at %llu not found", *bp_offset);
bch_err(c, "backpointer at %llu not found", *bp_offset);
}
bch2_trans_iter_exit(trans, &iter);
out:
fsck_err:
bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf);
return ret;
}
@ -978,6 +995,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
{
struct btree_iter iter;
struct bkey_s_c k;
struct bpos last_flushed_pos = SPOS_MAX;
int ret = 0;
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
@ -987,7 +1005,8 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
while (!(ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL,
check_one_backpointer(trans, iter.pos, &bp_offset, start, end))) &&
check_one_backpointer(trans, iter.pos, &bp_offset,
start, end, &last_flushed_pos))) &&
bp_offset < U64_MAX)
bp_offset++;

View File

@ -516,9 +516,6 @@ struct bch_dev {
unsigned nr_open_buckets;
unsigned nr_btree_reserve;
open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_partial_nr;
size_t inc_gen_needs_gc;
size_t inc_gen_really_needs_gc;
size_t buckets_waiting_on_journal;
@ -859,6 +856,9 @@ struct bch_fs {
struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_partial_nr;
struct write_point btree_write_point;
struct write_point rebalance_write_point;

View File

@ -2568,6 +2568,18 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
return bch2_btree_iter_peek_slot(iter);
}
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter)
{
struct bkey_s_c k;
while (btree_trans_too_many_iters(iter->trans) ||
(k = bch2_btree_iter_peek_type(iter, iter->flags),
bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
bch2_trans_begin(iter->trans);
return k;
}
/* new transactional stuff: */
#ifdef CONFIG_BCACHEFS_DEBUG

View File

@ -596,6 +596,8 @@ static inline int btree_trans_too_many_iters(struct btree_trans *trans)
return 0;
}
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
static inline struct bkey_s_c
__bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
struct btree_iter *iter, unsigned flags)

View File

@ -64,6 +64,15 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
(*fast)++;
if (path->ref > 1) {
/*
* We can't clone a path that has write locks: if the path is
* shared, unlock before set_pos(), traverse():
*/
bch2_btree_node_unlock_write(trans, path, path->l[0].b);
*write_locked = false;
}
return 0;
trans_commit:
return bch2_trans_update(trans, iter, &wb->k, 0) ?:

View File

@ -98,8 +98,10 @@ static void bch2_bkey_mark_dev_cached(struct bkey_s k, unsigned dev)
struct bch_extent_ptr *ptr;
bkey_for_each_ptr(ptrs, ptr)
if (ptr->dev == dev)
ptr->cached = true;
if (ptr->dev == dev) {
bch2_extent_ptr_set_cached(k, ptr);
return;
}
}
static int __bch2_data_update_index_update(struct btree_trans *trans,
@ -295,15 +297,7 @@ out:
int bch2_data_update_index_update(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
struct btree_trans trans;
int ret;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
ret = __bch2_data_update_index_update(&trans, op);
bch2_trans_exit(&trans);
return ret;
return bch2_trans_run(op->c, __bch2_data_update_index_update(&trans, op));
}
void bch2_data_update_read_done(struct data_update *m,
@ -326,6 +320,7 @@ void bch2_data_update_exit(struct data_update *update)
const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(ptrs, ptr) {
if (c->opts.nocow_enabled)
bch2_bucket_nocow_unlock(&c->nocow_locks,
PTR_BUCKET_POS(c, ptr), 0);
percpu_ref_put(&bch_dev_bkey_exists(c, ptr->dev)->ref);
@ -487,6 +482,7 @@ int bch2_data_update_init(struct btree_trans *trans,
if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
m->op.incompressible = true;
if (c->opts.nocow_enabled) {
if (ctxt) {
move_ctxt_wait_event(ctxt, trans,
(locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
@ -504,6 +500,8 @@ int bch2_data_update_init(struct btree_trans *trans,
}
}
ptrs_locked |= (1U << i);
}
i++;
}

View File

@ -138,20 +138,28 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
unsigned i;
unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
s->algorithm,
le16_to_cpu(s->sectors),
s->nr_blocks - s->nr_redundant,
nr_data,
s->nr_redundant,
s->csum_type,
1U << s->csum_granularity_bits);
for (i = 0; i < s->nr_blocks; i++)
prt_printf(out, " %u:%llu:%u", s->ptrs[i].dev,
(u64) s->ptrs[i].offset,
stripe_blockcount_get(s, i));
for (i = 0; i < s->nr_blocks; i++) {
const struct bch_extent_ptr *ptr = s->ptrs + i;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
u32 offset;
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
if (i < nr_data)
prt_printf(out, "#%u", stripe_blockcount_get(s, i));
if (ptr_stale(ca, ptr))
prt_printf(out, " stale");
}
}
/* returns blocknr in stripe that we matched: */
@ -442,15 +450,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
percpu_ref_put(&ca->io_ref);
}
static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
struct ec_stripe_buf *stripe)
{
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret;
bch2_trans_init(&trans, c, 0, 0);
bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes,
bch2_trans_iter_init(trans, &iter, BTREE_ID_stripes,
POS(0, idx), BTREE_ITER_SLOTS);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
@ -462,11 +469,15 @@ static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *strip
}
bkey_reassemble(&stripe->key.k_i, k);
err:
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
{
return bch2_trans_run(c, get_stripe_key_trans(&trans, idx, stripe));
}
/* recovery read path: */
int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
{
@ -865,25 +876,6 @@ err:
return ret;
}
static void extent_stripe_ptr_add(struct bkey_s_extent e,
struct ec_stripe_buf *s,
struct bch_extent_ptr *ptr,
unsigned block)
{
struct bch_extent_stripe_ptr *dst = (void *) ptr;
union bch_extent_entry *end = extent_entry_last(e);
memmove_u64s_up(dst + 1, dst, (u64 *) end - (u64 *) dst);
e.k->u64s += sizeof(*dst) / sizeof(u64);
*dst = (struct bch_extent_stripe_ptr) {
.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
.block = block,
.redundancy = s->key.v.nr_redundant,
.idx = s->key.k.p.offset,
};
}
static int ec_stripe_update_extent(struct btree_trans *trans,
struct bpos bucket, u8 gen,
struct ec_stripe_buf *s,
@ -895,6 +887,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
struct bkey_s_c k;
const struct bch_extent_ptr *ptr_c;
struct bch_extent_ptr *ptr, *ec_ptr = NULL;
struct bch_extent_stripe_ptr stripe_ptr;
struct bkey_i *n;
int ret, dev, block;
@ -933,16 +926,27 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
dev = s->key.v.ptrs[block].dev;
n = bch2_bkey_make_mut(trans, k);
n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(stripe_ptr));
ret = PTR_ERR_OR_ZERO(n);
if (ret)
goto out;
bkey_reassemble(n, k);
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev);
ec_ptr = (void *) bch2_bkey_has_device(bkey_i_to_s_c(n), dev);
BUG_ON(!ec_ptr);
extent_stripe_ptr_add(bkey_i_to_s_extent(n), s, ec_ptr, block);
stripe_ptr = (struct bch_extent_stripe_ptr) {
.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
.block = block,
.redundancy = s->key.v.nr_redundant,
.idx = s->key.k.p.offset,
};
__extent_entry_insert(n,
(union bch_extent_entry *) ec_ptr,
(union bch_extent_entry *) &stripe_ptr);
ret = bch2_trans_update(trans, &iter, n, 0);
out:
@ -999,6 +1003,35 @@ err:
return ret;
}
static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
struct ec_stripe_new *s,
unsigned block,
struct open_bucket *ob)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
unsigned offset = ca->mi.bucket_size - ob->sectors_free;
int ret;
if (!bch2_dev_get_ioref(ca, WRITE)) {
s->err = -EROFS;
return;
}
memset(s->new_stripe.data[block] + (offset << 9),
0,
ob->sectors_free << 9);
ret = blkdev_issue_zeroout(ca->disk_sb.bdev,
ob->bucket * ca->mi.bucket_size + offset,
ob->sectors_free,
GFP_KERNEL, 0);
percpu_ref_put(&ca->io_ref);
if (ret)
s->err = ret;
}
/*
* data buckets of new stripe all written: create the stripe
*/
@ -1014,6 +1047,14 @@ static void ec_stripe_create(struct ec_stripe_new *s)
closure_sync(&s->iodone);
for (i = 0; i < nr_data; i++)
if (s->blocks[i]) {
ob = c->open_buckets + s->blocks[i];
if (ob->sectors_free)
zero_out_rest_of_ec_bucket(c, s, i, ob);
}
if (s->err) {
if (!bch2_err_matches(s->err, EROFS))
bch_err(c, "error creating stripe: error writing data buckets");
@ -1155,9 +1196,6 @@ void bch2_ec_bucket_written(struct bch_fs *c, struct open_bucket *ob)
{
struct ec_stripe_new *s = ob->ec;
if (ob->sectors_free)
s->err = -1;
ec_stripe_new_put(c, s);
}
@ -1398,10 +1436,10 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
h->s->nr_parity,
&nr_have_parity,
&have_cache,
BCH_DATA_parity,
h->copygc
? RESERVE_movinggc
: RESERVE_none,
0,
cl);
open_bucket_for_each(c, &buckets, ob, i) {
@ -1427,10 +1465,10 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
h->s->nr_data,
&nr_have_data,
&have_cache,
BCH_DATA_user,
h->copygc
? RESERVE_movinggc
: RESERVE_none,
0,
cl);
open_bucket_for_each(c, &buckets, ob, i) {
@ -1486,8 +1524,9 @@ static s64 get_existing_stripe(struct bch_fs *c,
return ret;
}
static int __bch2_ec_stripe_head_reuse(struct bch_fs *c, struct ec_stripe_head *h)
static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h)
{
struct bch_fs *c = trans->c;
unsigned i;
s64 idx;
int ret;
@ -1497,7 +1536,7 @@ static int __bch2_ec_stripe_head_reuse(struct bch_fs *c, struct ec_stripe_head *
return -BCH_ERR_ENOSPC_stripe_reuse;
h->s->have_existing_stripe = true;
ret = get_stripe_key(c, idx, &h->s->existing_stripe);
ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
if (ret) {
bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
return ret;
@ -1626,7 +1665,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
goto err;
if (ret && needs_stripe_new)
ret = __bch2_ec_stripe_head_reuse(c, h);
ret = __bch2_ec_stripe_head_reuse(trans, h);
if (ret) {
bch_err_ratelimited(c, "failed to get stripe: %s", bch2_err_str(ret));
goto err;
@ -1771,6 +1810,7 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
void bch2_fs_ec_exit(struct bch_fs *c)
{
struct ec_stripe_head *h;
unsigned i;
while (1) {
mutex_lock(&c->ec_stripe_head_lock);
@ -1782,7 +1822,12 @@ void bch2_fs_ec_exit(struct bch_fs *c)
if (!h)
break;
BUG_ON(h->s);
if (h->s) {
for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++)
BUG_ON(h->s->blocks[i]);
kfree(h->s);
}
kfree(h);
}
@ -1801,6 +1846,8 @@ void bch2_fs_ec_init_early(struct bch_fs *c)
int bch2_fs_ec_init(struct bch_fs *c)
{
spin_lock_init(&c->ec_stripes_new_lock);
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
BIOSET_NEED_BVECS);
}

View File

@ -706,18 +706,6 @@ void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry
k->k.u64s -= extent_entry_u64s(entry);
}
static inline void __extent_entry_insert(struct bkey_i *k,
union bch_extent_entry *dst,
union bch_extent_entry *new)
{
union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
dst, (u64 *) end - (u64 *) dst);
k->k.u64s += extent_entry_u64s(new);
memcpy_u64s_small(dst, new, extent_entry_u64s(new));
}
void bch2_extent_ptr_decoded_append(struct bkey_i *k,
struct extent_ptr_decoded *p)
{
@ -951,6 +939,29 @@ bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1,
return false;
}
void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry;
union bch_extent_entry *ec = NULL;
bkey_extent_entry_for_each(ptrs, entry) {
if (&entry->ptr == ptr) {
ptr->cached = true;
if (ec)
extent_entry_drop(k, ec);
return;
}
if (extent_entry_is_stripe_ptr(entry))
ec = entry;
else if (extent_entry_is_ptr(entry))
ec = NULL;
}
BUG();
}
/*
* bch_extent_normalize - clean up an extent, dropping stale pointers etc.
*
@ -1094,7 +1105,7 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
unsigned size_ondisk = k.k->size;
unsigned nonce = UINT_MAX;
unsigned nr_ptrs = 0;
bool unwritten = false;
bool unwritten = false, have_ec = false, crc_since_last_ptr = false;
int ret;
if (bkey_is_btree_ptr(k.k))
@ -1130,7 +1141,14 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
return -BCH_ERR_invalid_bkey;
}
if (entry->ptr.cached && have_ec) {
prt_printf(err, "cached, erasure coded ptr");
return -BCH_ERR_invalid_bkey;
}
unwritten = entry->ptr.unwritten;
have_ec = false;
crc_since_last_ptr = false;
nr_ptrs++;
break;
case BCH_EXTENT_ENTRY_crc32:
@ -1164,17 +1182,43 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
return -BCH_ERR_invalid_bkey;
}
}
if (crc_since_last_ptr) {
prt_printf(err, "redundant crc entry");
return -BCH_ERR_invalid_bkey;
}
crc_since_last_ptr = true;
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
if (have_ec) {
prt_printf(err, "redundant stripe entry");
return -BCH_ERR_invalid_bkey;
}
have_ec = true;
break;
}
}
if (!nr_ptrs) {
prt_str(err, "no ptrs");
return -BCH_ERR_invalid_bkey;
}
if (nr_ptrs >= BCH_BKEY_PTRS_MAX) {
prt_str(err, "too many ptrs");
return -BCH_ERR_invalid_bkey;
}
if (crc_since_last_ptr) {
prt_printf(err, "redundant crc entry");
return -BCH_ERR_invalid_bkey;
}
if (have_ec) {
prt_printf(err, "redundant stripe entry");
return -BCH_ERR_invalid_bkey;
}
return 0;
}

View File

@ -76,6 +76,18 @@ static inline size_t extent_entry_u64s(const union bch_extent_entry *entry)
return extent_entry_bytes(entry) / sizeof(u64);
}
static inline void __extent_entry_insert(struct bkey_i *k,
union bch_extent_entry *dst,
union bch_extent_entry *new)
{
union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
dst, (u64 *) end - (u64 *) dst);
k->k.u64s += extent_entry_u64s(new);
memcpy_u64s_small(dst, new, extent_entry_u64s(new));
}
static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
{
return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
@ -655,6 +667,8 @@ bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
bool bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s_c);
void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);

View File

@ -1650,7 +1650,7 @@ static void __bch2_write(struct bch_write_op *op)
nofs_flags = memalloc_nofs_save();
if (unlikely(op->opts.nocow)) {
if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) {
bch2_nocow_write(op);
if (op->flags & BCH_WRITE_DONE)
goto out_nofs_restore;

View File

@ -789,8 +789,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
break;
}
} else {
ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none,
false, cl);
ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none, cl);
ret = PTR_ERR_OR_ZERO(ob[nr_got]);
if (ret)
break;

View File

@ -31,22 +31,6 @@ int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
return 0;
}
void bch2_keylist_add_in_order(struct keylist *l, struct bkey_i *insert)
{
struct bkey_i *where;
for_each_keylist_key(l, where)
if (bpos_lt(insert->k.p, where->k.p))
break;
memmove_u64s_up((u64 *) where + insert->k.u64s,
where,
((u64 *) l->top) - ((u64 *) where));
l->top_p += insert->k.u64s;
bkey_copy(where, insert);
}
void bch2_keylist_pop_front(struct keylist *l)
{
l->top_p -= bch2_keylist_front(l)->k.u64s;

View File

@ -5,7 +5,6 @@
#include "keylist_types.h"
int bch2_keylist_realloc(struct keylist *, u64 *, size_t, size_t);
void bch2_keylist_add_in_order(struct keylist *, struct bkey_i *);
void bch2_keylist_pop_front(struct keylist *);
static inline void bch2_keylist_init(struct keylist *l, u64 *inline_keys)

View File

@ -4,6 +4,7 @@
#include "alloc_background.h"
#include "btree_iter.h"
#include "btree_update.h"
#include "btree_write_buffer.h"
#include "error.h"
#include "lru.h"
#include "recovery.h"
@ -101,7 +102,8 @@ static const char * const bch2_lru_types[] = {
static int bch2_check_lru_key(struct btree_trans *trans,
struct btree_iter *lru_iter,
struct bkey_s_c lru_k)
struct bkey_s_c lru_k,
struct bpos *last_flushed_pos)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@ -137,19 +139,25 @@ static int bch2_check_lru_key(struct btree_trans *trans,
break;
}
if (fsck_err_on(lru_k.k->type != KEY_TYPE_set ||
lru_pos_time(lru_k.k->p) != idx, c,
"incorrect lru entry: lru %s time %llu\n"
if (lru_k.k->type != KEY_TYPE_set ||
lru_pos_time(lru_k.k->p) != idx) {
if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) {
*last_flushed_pos = lru_k.k->p;
ret = bch2_btree_write_buffer_flush_sync(trans) ?:
-BCH_ERR_transaction_restart_write_buffer_flush;
goto out;
}
if (fsck_err(c, "incorrect lru entry: lru %s time %llu\n"
" %s\n"
" for %s",
bch2_lru_types[type],
lru_pos_time(lru_k.k->p),
(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf)))
ret = bch2_btree_delete_at(trans, lru_iter, 0);
if (ret)
goto err;
}
out:
err:
fsck_err:
bch2_trans_iter_exit(trans, &iter);
@ -163,6 +171,7 @@ int bch2_check_lrus(struct bch_fs *c)
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bpos last_flushed_pos = POS_MIN;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
@ -170,7 +179,7 @@ int bch2_check_lrus(struct bch_fs *c)
ret = for_each_btree_key_commit(&trans, iter,
BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
bch2_check_lru_key(&trans, &iter, k));
bch2_check_lru_key(&trans, &iter, k, &last_flushed_pos));
bch2_trans_exit(&trans);
return ret;

View File

@ -227,7 +227,8 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans,
if (bkey_deleted(&n->k))
n->k.size = 0;
return bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
return bch2_trans_relock(trans) ?:
bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
}

View File

@ -404,6 +404,12 @@ enum opt_type {
NULL, "Nocow mode: Writes will be done in place when possible.\n"\
"Snapshots and reflink will still caused writes to be COW\n"\
"Implicitly disables data checksumming, compression and encryption")\
x(nocow_enabled, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, true, \
NULL, "Enable nocow mode: enables runtime locking in\n"\
"data move path needed if nocow will ever be in use\n")\
x(no_data_io, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \

View File

@ -194,6 +194,7 @@ read_attribute(btree_cache);
read_attribute(btree_key_cache);
read_attribute(stripes_heap);
read_attribute(open_buckets);
read_attribute(open_buckets_partial);
read_attribute(write_points);
read_attribute(nocow_lock_table);
@ -455,6 +456,9 @@ SHOW(bch2_fs)
if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c);
if (attr == &sysfs_open_buckets_partial)
bch2_open_buckets_partial_to_text(out, c);
if (attr == &sysfs_write_points)
bch2_write_points_to_text(out, c);
@ -663,6 +667,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_new_stripes,
&sysfs_stripes_heap,
&sysfs_open_buckets,
&sysfs_open_buckets_partial,
&sysfs_write_points,
#ifdef BCH_WRITE_REF_DEBUG
&sysfs_write_refs,

View File

@ -118,6 +118,14 @@ int blkdev_issue_discard(struct block_device *bdev,
return 0;
}
int blkdev_issue_zeroout(struct block_device *bdev,
sector_t sector, sector_t nr_sects,
gfp_t gfp_mask, unsigned flags)
{
/* Not yet implemented: */
BUG();
}
unsigned bdev_logical_block_size(struct block_device *bdev)
{
struct stat statbuf;