mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 72405e7ff8 bcachefs: Fix bch2_check_extents_to_backpointers()
This commit is contained in:
parent
46ba4fb48c
commit
fa35853772
@ -1 +1 @@
|
||||
3856459b1b9f37cebee2bca3c9edcafaf393aa98
|
||||
72405e7ff8c5fb569b74b046d19866ee480f29b7
|
||||
|
@ -1006,7 +1006,7 @@ static bool next_bucket(struct bch_fs *c, struct bpos *bucket)
|
||||
iter = bucket->inode;
|
||||
ca = __bch2_next_dev(c, &iter, NULL);
|
||||
if (ca)
|
||||
bucket->offset = ca->mi.first_bucket;
|
||||
*bucket = POS(ca->dev_idx, ca->mi.first_bucket);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ca != NULL;
|
||||
@ -2158,43 +2158,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
|
||||
*/
|
||||
bch2_recalc_capacity(c);
|
||||
|
||||
/* Next, close write points that point to this device... */
|
||||
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
|
||||
bch2_writepoint_stop(c, ca, &c->write_points[i]);
|
||||
|
||||
bch2_writepoint_stop(c, ca, &c->copygc_write_point);
|
||||
bch2_writepoint_stop(c, ca, &c->rebalance_write_point);
|
||||
bch2_writepoint_stop(c, ca, &c->btree_write_point);
|
||||
|
||||
mutex_lock(&c->btree_reserve_cache_lock);
|
||||
while (c->btree_reserve_cache_nr) {
|
||||
struct btree_alloc *a =
|
||||
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
|
||||
|
||||
bch2_open_buckets_put(c, &a->ob);
|
||||
}
|
||||
mutex_unlock(&c->btree_reserve_cache_lock);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
i = 0;
|
||||
while (i < c->open_buckets_partial_nr) {
|
||||
struct open_bucket *ob =
|
||||
c->open_buckets + c->open_buckets_partial[i];
|
||||
|
||||
if (ob->dev == ca->dev_idx) {
|
||||
swap(c->open_buckets_partial[i],
|
||||
c->open_buckets_partial[--c->open_buckets_partial_nr]);
|
||||
ob->on_partial_list = false;
|
||||
spin_unlock(&c->freelist_lock);
|
||||
bch2_open_bucket_put(c, ob);
|
||||
spin_lock(&c->freelist_lock);
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
bch2_ec_stop_dev(c, ca);
|
||||
bch2_open_buckets_stop(c, ca, false);
|
||||
|
||||
/*
|
||||
* Wake up threads that were blocked on allocation, so they can notice
|
||||
|
@ -216,7 +216,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
|
||||
u64 free = max_t(s64, 0,
|
||||
u.d[BCH_DATA_free].buckets
|
||||
+ u.d[BCH_DATA_need_discard].buckets
|
||||
- bch2_dev_buckets_reserved(ca, RESERVE_none));
|
||||
- bch2_dev_buckets_reserved(ca, RESERVE_stripe));
|
||||
|
||||
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
|
||||
}
|
||||
|
@ -97,7 +97,7 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
|
||||
|
||||
if (ob->ec) {
|
||||
ec_stripe_new_put(c, ob->ec);
|
||||
ec_stripe_new_put(c, ob->ec, STRIPE_REF_io);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -658,9 +658,11 @@ static int add_new_bucket(struct bch_fs *c,
|
||||
bch_dev_bkey_exists(c, ob->dev)->mi.durability;
|
||||
|
||||
BUG_ON(*nr_effective >= nr_replicas);
|
||||
BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
|
||||
|
||||
__clear_bit(ob->dev, devs_may_alloc->d);
|
||||
*nr_effective += durability;
|
||||
*nr_effective += (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)
|
||||
? durability : 1;
|
||||
*have_cache |= !durability;
|
||||
|
||||
ob_push(c, ptrs, ob);
|
||||
@ -679,6 +681,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
|
||||
unsigned nr_replicas,
|
||||
unsigned *nr_effective,
|
||||
bool *have_cache,
|
||||
unsigned flags,
|
||||
enum bch_data_type data_type,
|
||||
enum alloc_reserve reserve,
|
||||
struct closure *cl)
|
||||
@ -729,7 +732,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
|
||||
|
||||
if (add_new_bucket(c, ptrs, devs_may_alloc,
|
||||
nr_replicas, nr_effective,
|
||||
have_cache, 0, ob)) {
|
||||
have_cache, flags, ob)) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
@ -796,7 +799,7 @@ got_bucket:
|
||||
|
||||
ob->ec_idx = ec_idx;
|
||||
ob->ec = h->s;
|
||||
ec_stripe_new_get(h->s);
|
||||
ec_stripe_new_get(h->s, STRIPE_REF_io);
|
||||
|
||||
ret = add_new_bucket(c, ptrs, devs_may_alloc,
|
||||
nr_replicas, nr_effective,
|
||||
@ -823,7 +826,7 @@ static bool want_bucket(struct bch_fs *c,
|
||||
return false;
|
||||
|
||||
if (!ca->mi.durability &&
|
||||
(wp->data_type != BCH_DATA_user || !*have_cache))
|
||||
(wp->data_type == BCH_DATA_btree || ec || *have_cache))
|
||||
return false;
|
||||
|
||||
if (ec != (ob->ec != NULL))
|
||||
@ -877,6 +880,9 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
if (!c->open_buckets_partial_nr)
|
||||
goto unlock;
|
||||
|
||||
for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
|
||||
struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
|
||||
|
||||
@ -902,7 +908,7 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
unlock:
|
||||
spin_unlock(&c->freelist_lock);
|
||||
return ret;
|
||||
}
|
||||
@ -967,7 +973,7 @@ retry_blocking:
|
||||
*/
|
||||
ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
|
||||
nr_replicas, nr_effective, have_cache,
|
||||
wp->data_type, reserve, cl);
|
||||
flags, wp->data_type, reserve, cl);
|
||||
if (ret &&
|
||||
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
|
||||
!bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
|
||||
@ -1017,45 +1023,96 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
|
||||
void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct open_buckets *obs)
|
||||
static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c,
|
||||
struct bch_dev *ca, bool ec)
|
||||
{
|
||||
struct open_buckets ptrs = { .nr = 0 };
|
||||
struct open_bucket *ob, *ob2;
|
||||
unsigned i, j;
|
||||
|
||||
open_bucket_for_each(c, obs, ob, i) {
|
||||
bool drop = !ca || ob->dev == ca->dev_idx;
|
||||
if (ec) {
|
||||
return ob->ec != NULL;
|
||||
} else if (ca) {
|
||||
bool drop = ob->dev == ca->dev_idx;
|
||||
struct open_bucket *ob2;
|
||||
unsigned i;
|
||||
|
||||
if (!drop && ob->ec) {
|
||||
mutex_lock(&ob->ec->lock);
|
||||
for (j = 0; j < ob->ec->new_stripe.key.v.nr_blocks; j++) {
|
||||
if (!ob->ec->blocks[j])
|
||||
for (i = 0; i < ob->ec->new_stripe.key.v.nr_blocks; i++) {
|
||||
if (!ob->ec->blocks[i])
|
||||
continue;
|
||||
|
||||
ob2 = c->open_buckets + ob->ec->blocks[j];
|
||||
ob2 = c->open_buckets + ob->ec->blocks[i];
|
||||
drop |= ob2->dev == ca->dev_idx;
|
||||
}
|
||||
mutex_unlock(&ob->ec->lock);
|
||||
}
|
||||
|
||||
if (drop)
|
||||
return drop;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
bool ec, struct write_point *wp)
|
||||
{
|
||||
struct open_buckets ptrs = { .nr = 0 };
|
||||
struct open_bucket *ob;
|
||||
unsigned i;
|
||||
|
||||
mutex_lock(&wp->lock);
|
||||
open_bucket_for_each(c, &wp->ptrs, ob, i)
|
||||
if (should_drop_bucket(ob, c, ca, ec))
|
||||
bch2_open_bucket_put(c, ob);
|
||||
else
|
||||
ob_push(c, &ptrs, ob);
|
||||
}
|
||||
|
||||
*obs = ptrs;
|
||||
}
|
||||
|
||||
void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct write_point *wp)
|
||||
{
|
||||
mutex_lock(&wp->lock);
|
||||
bch2_open_buckets_stop_dev(c, ca, &wp->ptrs);
|
||||
wp->ptrs = ptrs;
|
||||
mutex_unlock(&wp->lock);
|
||||
}
|
||||
|
||||
void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
bool ec)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
/* Next, close write points that point to this device... */
|
||||
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
|
||||
bch2_writepoint_stop(c, ca, ec, &c->write_points[i]);
|
||||
|
||||
bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point);
|
||||
bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point);
|
||||
bch2_writepoint_stop(c, ca, ec, &c->btree_write_point);
|
||||
|
||||
mutex_lock(&c->btree_reserve_cache_lock);
|
||||
while (c->btree_reserve_cache_nr) {
|
||||
struct btree_alloc *a =
|
||||
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
|
||||
|
||||
bch2_open_buckets_put(c, &a->ob);
|
||||
}
|
||||
mutex_unlock(&c->btree_reserve_cache_lock);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
i = 0;
|
||||
while (i < c->open_buckets_partial_nr) {
|
||||
struct open_bucket *ob =
|
||||
c->open_buckets + c->open_buckets_partial[i];
|
||||
|
||||
if (should_drop_bucket(ob, c, ca, ec)) {
|
||||
--c->open_buckets_partial_nr;
|
||||
swap(c->open_buckets_partial[i],
|
||||
c->open_buckets_partial[c->open_buckets_partial_nr]);
|
||||
ob->on_partial_list = false;
|
||||
spin_unlock(&c->freelist_lock);
|
||||
bch2_open_bucket_put(c, ob);
|
||||
spin_lock(&c->freelist_lock);
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
bch2_ec_stop_dev(c, ca);
|
||||
}
|
||||
|
||||
static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
|
||||
unsigned long write_point)
|
||||
{
|
||||
@ -1101,8 +1158,7 @@ static bool try_increase_writepoints(struct bch_fs *c)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool try_decrease_writepoints(struct bch_fs *c,
|
||||
unsigned old_nr)
|
||||
static bool try_decrease_writepoints(struct bch_fs *c, unsigned old_nr)
|
||||
{
|
||||
struct write_point *wp;
|
||||
|
||||
@ -1123,7 +1179,7 @@ static bool try_decrease_writepoints(struct bch_fs *c,
|
||||
hlist_del_rcu(&wp->node);
|
||||
mutex_unlock(&c->write_points_hash_lock);
|
||||
|
||||
bch2_writepoint_stop(c, NULL, wp);
|
||||
bch2_writepoint_stop(c, NULL, false, wp);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1217,6 +1273,8 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
|
||||
|
||||
BUG_ON(!nr_replicas || !nr_replicas_required);
|
||||
retry:
|
||||
ptrs.nr = 0;
|
||||
@ -1230,13 +1288,7 @@ retry:
|
||||
if (wp->data_type != BCH_DATA_user)
|
||||
have_cache = true;
|
||||
|
||||
if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
|
||||
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
|
||||
target, erasure_code,
|
||||
nr_replicas, &nr_effective,
|
||||
&have_cache, reserve,
|
||||
flags, cl);
|
||||
} else {
|
||||
if (target && !(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
|
||||
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
|
||||
target, erasure_code,
|
||||
nr_replicas, &nr_effective,
|
||||
@ -1246,11 +1298,28 @@ retry:
|
||||
bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
goto alloc_done;
|
||||
|
||||
/* Don't retry from all devices if we're out of open buckets: */
|
||||
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
|
||||
goto allocate_blocking;
|
||||
|
||||
/*
|
||||
* Only try to allocate cache (durability = 0 devices) from the
|
||||
* specified target:
|
||||
*/
|
||||
have_cache = true;
|
||||
|
||||
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
|
||||
0, erasure_code,
|
||||
nr_replicas, &nr_effective,
|
||||
&have_cache, reserve,
|
||||
flags, cl);
|
||||
} else {
|
||||
allocate_blocking:
|
||||
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
|
||||
target, erasure_code,
|
||||
nr_replicas, &nr_effective,
|
||||
&have_cache, reserve,
|
||||
flags, cl);
|
||||
}
|
||||
alloc_done:
|
||||
BUG_ON(!ret && nr_effective < nr_replicas);
|
||||
@ -1380,14 +1449,16 @@ void bch2_fs_allocator_foreground_init(struct bch_fs *c)
|
||||
|
||||
static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob)
|
||||
{
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
|
||||
unsigned data_type = ob->data_type;
|
||||
barrier(); /* READ_ONCE() doesn't work on bitfields */
|
||||
|
||||
prt_printf(out, "%zu ref %u %s %u:%llu gen %u",
|
||||
prt_printf(out, "%zu ref %u %s %u:%llu gen %u allocated %u/%u",
|
||||
ob - c->open_buckets,
|
||||
atomic_read(&ob->pin),
|
||||
data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type",
|
||||
ob->dev, ob->bucket, ob->gen);
|
||||
ob->dev, ob->bucket, ob->gen,
|
||||
ca->mi.bucket_size - ob->sectors_free, ca->mi.bucket_size);
|
||||
if (ob->ec)
|
||||
prt_printf(out, " ec idx %llu", ob->ec->idx);
|
||||
if (ob->on_partial_list)
|
||||
|
@ -151,7 +151,7 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
|
||||
|
||||
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
|
||||
struct dev_stripe_state *, struct bch_devs_mask *,
|
||||
unsigned, unsigned *, bool *,
|
||||
unsigned, unsigned *, bool *, unsigned,
|
||||
enum bch_data_type, enum alloc_reserve,
|
||||
struct closure *);
|
||||
|
||||
@ -202,11 +202,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
|
||||
struct bkey_i *, unsigned, bool);
|
||||
void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
|
||||
|
||||
void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
|
||||
struct open_buckets *);
|
||||
|
||||
void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
|
||||
struct write_point *);
|
||||
void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *, bool);
|
||||
|
||||
static inline struct write_point_specifier writepoint_hashed(unsigned long v)
|
||||
{
|
||||
|
@ -549,13 +549,18 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
|
||||
bch2_check_btree_backpointer(&trans, &iter, k)));
|
||||
}
|
||||
|
||||
struct bpos_level {
|
||||
unsigned level;
|
||||
struct bpos pos;
|
||||
};
|
||||
|
||||
static int check_bp_exists(struct btree_trans *trans,
|
||||
struct bpos bucket_pos,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c orig_k,
|
||||
struct bpos bucket_start,
|
||||
struct bpos bucket_end,
|
||||
struct bpos *last_flushed_pos)
|
||||
struct bpos_level *last_flushed)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter alloc_iter, bp_iter = { NULL };
|
||||
@ -600,8 +605,11 @@ static int check_bp_exists(struct btree_trans *trans,
|
||||
|
||||
if (bp_k.k->type != KEY_TYPE_backpointer ||
|
||||
memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
|
||||
if (!bpos_eq(*last_flushed_pos, orig_k.k->p)) {
|
||||
*last_flushed_pos = orig_k.k->p;
|
||||
if (last_flushed->level != bp.level ||
|
||||
!bpos_eq(last_flushed->pos, orig_k.k->p)) {
|
||||
last_flushed->level = bp.level;
|
||||
last_flushed->pos = orig_k.k->p;
|
||||
|
||||
ret = bch2_btree_write_buffer_flush_sync(trans) ?:
|
||||
-BCH_ERR_transaction_restart_write_buffer_flush;
|
||||
goto out;
|
||||
@ -639,7 +647,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos bucket_start,
|
||||
struct bpos bucket_end,
|
||||
struct bpos *last_flushed_pos)
|
||||
struct bpos_level *last_flushed)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_ptrs_c ptrs;
|
||||
@ -668,7 +676,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
|
||||
ret = check_bp_exists(trans, bucket_pos, bp, k,
|
||||
bucket_start, bucket_end,
|
||||
last_flushed_pos);
|
||||
last_flushed);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -680,7 +688,7 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans,
|
||||
enum btree_id btree_id,
|
||||
struct bpos bucket_start,
|
||||
struct bpos bucket_end,
|
||||
struct bpos *last_flushed_pos)
|
||||
struct bpos_level *last_flushed)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
@ -709,12 +717,12 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans,
|
||||
if (p.ptr.cached)
|
||||
continue;
|
||||
|
||||
bch2_extent_ptr_to_bp(c, iter.btree_id, iter.path->level + 1,
|
||||
bch2_extent_ptr_to_bp(c, iter.btree_id, b->c.level + 1,
|
||||
k, p, &bucket_pos, &bp);
|
||||
|
||||
ret = check_bp_exists(trans, bucket_pos, bp, k,
|
||||
bucket_start, bucket_end,
|
||||
last_flushed_pos);
|
||||
last_flushed);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -794,7 +802,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
{
|
||||
struct btree_iter iter;
|
||||
enum btree_id btree_id;
|
||||
struct bpos last_flushed_pos = SPOS_MAX;
|
||||
struct bpos_level last_flushed = { UINT_MAX };
|
||||
int ret = 0;
|
||||
|
||||
for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
|
||||
@ -811,7 +819,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
check_extent_to_backpointers(trans, &iter,
|
||||
bucket_start, bucket_end,
|
||||
&last_flushed_pos));
|
||||
&last_flushed));
|
||||
if (ret)
|
||||
break;
|
||||
} while (!bch2_btree_iter_advance(&iter));
|
||||
@ -826,7 +834,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
check_btree_root_to_backpointers(trans, btree_id,
|
||||
bucket_start, bucket_end,
|
||||
&last_flushed_pos));
|
||||
&last_flushed));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
@ -214,8 +214,11 @@
|
||||
#define BCH_WRITE_REF_DEBUG
|
||||
#endif
|
||||
|
||||
#ifndef dynamic_fault
|
||||
#define dynamic_fault(...) 0
|
||||
#define race_fault(...) 0
|
||||
#endif
|
||||
|
||||
#define race_fault(...) dynamic_fault("bcachefs:race")
|
||||
|
||||
#define trace_and_count(_c, _name, ...) \
|
||||
do { \
|
||||
@ -652,7 +655,6 @@ typedef struct {
|
||||
x(fallocate) \
|
||||
x(discard) \
|
||||
x(invalidate) \
|
||||
x(move) \
|
||||
x(delete_dead_snapshots) \
|
||||
x(snapshot_delete_pagecache) \
|
||||
x(sysfs)
|
||||
@ -922,6 +924,13 @@ struct bch_fs {
|
||||
|
||||
mempool_t large_bkey_pool;
|
||||
|
||||
/* MOVE.C */
|
||||
struct list_head moving_context_list;
|
||||
struct mutex moving_context_lock;
|
||||
|
||||
struct list_head data_progress_list;
|
||||
struct mutex data_progress_lock;
|
||||
|
||||
/* REBALANCE */
|
||||
struct bch_fs_rebalance rebalance;
|
||||
|
||||
@ -932,10 +941,6 @@ struct bch_fs {
|
||||
bool copygc_running;
|
||||
wait_queue_head_t copygc_running_wq;
|
||||
|
||||
/* DATA PROGRESS STATS */
|
||||
struct list_head data_progress_list;
|
||||
struct mutex data_progress_lock;
|
||||
|
||||
/* STRIPES: */
|
||||
GENRADIX(struct stripe) stripes;
|
||||
GENRADIX(struct gc_stripe) gc_stripes;
|
||||
@ -952,14 +957,14 @@ struct bch_fs {
|
||||
|
||||
struct list_head ec_stripe_new_list;
|
||||
struct mutex ec_stripe_new_lock;
|
||||
wait_queue_head_t ec_stripe_new_wait;
|
||||
|
||||
struct work_struct ec_stripe_create_work;
|
||||
u64 ec_stripe_hint;
|
||||
|
||||
struct bio_set ec_bioset;
|
||||
|
||||
struct work_struct ec_stripe_delete_work;
|
||||
struct llist_head ec_stripe_delete_list;
|
||||
|
||||
struct bio_set ec_bioset;
|
||||
|
||||
/* REFLINK */
|
||||
u64 reflink_hint;
|
||||
|
@ -16,7 +16,7 @@
|
||||
#include "replicas.h"
|
||||
#include "subvolume.h"
|
||||
|
||||
#include <linux/prandom.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/prefetch.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
|
@ -770,11 +770,11 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans,
|
||||
|
||||
bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
unsigned flags,
|
||||
struct btree_path *path,
|
||||
struct bkey_i *insert)
|
||||
struct btree_insert_entry *insert_entry)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_cached *ck = (void *) path->l[0].b;
|
||||
struct bkey_cached *ck = (void *) insert_entry->path->l[0].b;
|
||||
struct bkey_i *insert = insert_entry->k;
|
||||
bool kick_reclaim = false;
|
||||
|
||||
BUG_ON(insert->k.u64s > ck->u64s);
|
||||
@ -802,9 +802,24 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
kick_reclaim = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* To minimize lock contention, we only add the journal pin here and
|
||||
* defer pin updates to the flush callback via ->seq. Be careful not to
|
||||
* update ->seq on nojournal commits because we don't want to update the
|
||||
* pin to a seq that doesn't include journal updates on disk. Otherwise
|
||||
* we risk losing the update after a crash.
|
||||
*
|
||||
* The only exception is if the pin is not active in the first place. We
|
||||
* have to add the pin because journal reclaim drives key cache
|
||||
* flushing. The flush callback will not proceed unless ->seq matches
|
||||
* the latest pin, so make sure it starts with a consistent value.
|
||||
*/
|
||||
if (!(insert_entry->flags & BTREE_UPDATE_NOJOURNAL) ||
|
||||
!journal_pin_active(&ck->journal)) {
|
||||
ck->seq = trans->journal_res.seq;
|
||||
}
|
||||
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
|
||||
&ck->journal, bch2_btree_key_cache_journal_flush);
|
||||
ck->seq = trans->journal_res.seq;
|
||||
|
||||
if (kick_reclaim)
|
||||
journal_reclaim_kick(&c->journal);
|
||||
|
@ -30,7 +30,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *,
|
||||
unsigned);
|
||||
|
||||
bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned,
|
||||
struct btree_path *, struct bkey_i *);
|
||||
struct btree_insert_entry *);
|
||||
int bch2_btree_key_cache_flush(struct btree_trans *,
|
||||
enum btree_id, struct bpos);
|
||||
void bch2_btree_key_cache_drop(struct btree_trans *,
|
||||
|
@ -388,6 +388,40 @@ int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *p
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b)
|
||||
{
|
||||
struct btree_path *linked;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* XXX BIG FAT NOTICE
|
||||
*
|
||||
* Drop all read locks before taking a write lock:
|
||||
*
|
||||
* This is a hack, because bch2_btree_node_lock_write_nofail() is a
|
||||
* hack - but by dropping read locks first, this should never fail, and
|
||||
* we only use this in code paths where whatever read locks we've
|
||||
* already taken are no longer needed:
|
||||
*/
|
||||
|
||||
trans_for_each_path(trans, linked) {
|
||||
if (!linked->nodes_locked)
|
||||
continue;
|
||||
|
||||
for (i = 0; i < BTREE_MAX_DEPTH; i++)
|
||||
if (btree_node_read_locked(linked, i)) {
|
||||
btree_node_unlock(trans, linked, i);
|
||||
btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
|
||||
}
|
||||
}
|
||||
|
||||
ret = __btree_node_lock_write(trans, path, b, true);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
/* relock */
|
||||
|
||||
static inline bool btree_path_get_locks(struct btree_trans *trans,
|
||||
|
@ -299,15 +299,6 @@ static inline int __btree_node_lock_write(struct btree_trans *trans,
|
||||
: __bch2_btree_node_lock_write(trans, path, b, lock_may_not_fail);
|
||||
}
|
||||
|
||||
static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_bkey_cached_common *b)
|
||||
{
|
||||
int ret = __btree_node_lock_write(trans, path, b, true);
|
||||
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
static inline int __must_check
|
||||
bch2_btree_node_lock_write(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
@ -316,6 +307,10 @@ bch2_btree_node_lock_write(struct btree_trans *trans,
|
||||
return __btree_node_lock_write(trans, path, b, false);
|
||||
}
|
||||
|
||||
void bch2_btree_node_lock_write_nofail(struct btree_trans *,
|
||||
struct btree_path *,
|
||||
struct btree_bkey_cached_common *);
|
||||
|
||||
/* relock: */
|
||||
|
||||
bool bch2_btree_path_relock_norestart(struct btree_trans *,
|
||||
|
@ -13,6 +13,9 @@ void bch2_btree_node_prep_for_write(struct btree_trans *,
|
||||
bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
|
||||
struct btree *, struct btree_node_iter *,
|
||||
struct bkey_i *);
|
||||
|
||||
int bch2_btree_node_flush0(struct journal *, struct journal_entry_pin *, u64);
|
||||
int bch2_btree_node_flush1(struct journal *, struct journal_entry_pin *, u64);
|
||||
void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
|
||||
|
||||
void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,
|
||||
|
@ -227,12 +227,12 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||
int bch2_btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||
{
|
||||
return __btree_node_flush(j, pin, 0, seq);
|
||||
}
|
||||
|
||||
static int btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||
int bch2_btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||
{
|
||||
return __btree_node_flush(j, pin, 1, seq);
|
||||
}
|
||||
@ -244,8 +244,8 @@ inline void bch2_btree_add_journal_pin(struct bch_fs *c,
|
||||
|
||||
bch2_journal_pin_add(&c->journal, seq, &w->journal,
|
||||
btree_node_write_idx(b) == 0
|
||||
? btree_node_flush0
|
||||
: btree_node_flush1);
|
||||
? bch2_btree_node_flush0
|
||||
: bch2_btree_node_flush1);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -765,7 +765,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
if (!i->cached)
|
||||
btree_insert_key_leaf(trans, i);
|
||||
else if (!i->key_cache_already_flushed)
|
||||
bch2_btree_insert_key_cached(trans, flags, i->path, i->k);
|
||||
bch2_btree_insert_key_cached(trans, flags, i);
|
||||
else {
|
||||
bch2_btree_key_cache_drop(trans, i->path);
|
||||
btree_path_set_dirty(i->path, BTREE_ITER_NEED_TRAVERSE);
|
||||
|
@ -1855,7 +1855,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
|
||||
if (IS_ERR(a))
|
||||
return PTR_ERR(a);
|
||||
|
||||
if (a->v.data_type && a->v.data_type != type) {
|
||||
if (a->v.data_type && type && a->v.data_type != type) {
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
|
||||
"while marking %s",
|
||||
|
@ -92,18 +92,6 @@ static int insert_snapshot_whiteouts(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_bkey_mark_dev_cached(struct bkey_s k, unsigned dev)
|
||||
{
|
||||
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
|
||||
struct bch_extent_ptr *ptr;
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr)
|
||||
if (ptr->dev == dev) {
|
||||
bch2_extent_ptr_set_cached(k, ptr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
struct bch_write_op *op)
|
||||
{
|
||||
@ -126,15 +114,17 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
while (1) {
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
|
||||
struct bkey_i *insert;
|
||||
struct bkey_i *insert = NULL;
|
||||
struct bkey_i_extent *new;
|
||||
const union bch_extent_entry *entry;
|
||||
const union bch_extent_entry *entry_c;
|
||||
union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
struct bch_extent_ptr *ptr;
|
||||
const struct bch_extent_ptr *ptr_c;
|
||||
struct bpos next_pos;
|
||||
bool did_work = false;
|
||||
bool should_check_enospc;
|
||||
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
|
||||
unsigned i;
|
||||
unsigned rewrites_found = 0, durability, i;
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
@ -146,7 +136,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
new = bkey_i_to_extent(bch2_keylist_front(keys));
|
||||
|
||||
if (!bch2_extents_match(k, old))
|
||||
goto nomatch;
|
||||
goto nowork;
|
||||
|
||||
bkey_reassemble(_insert.k, k);
|
||||
insert = _insert.k;
|
||||
@ -169,50 +159,60 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
* Fist, drop rewrite_ptrs from @new:
|
||||
*/
|
||||
i = 0;
|
||||
bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) {
|
||||
bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry_c) {
|
||||
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
|
||||
bch2_extent_has_ptr(old, p, bkey_i_to_s_c(insert))) {
|
||||
/*
|
||||
* If we're going to be adding a pointer to the
|
||||
* same device, we have to drop the old one -
|
||||
* otherwise, we can just mark it cached:
|
||||
*/
|
||||
if (bch2_bkey_has_device(bkey_i_to_s_c(&new->k_i), p.ptr.dev))
|
||||
bch2_bkey_drop_device_noerror(bkey_i_to_s(insert), p.ptr.dev);
|
||||
else
|
||||
bch2_bkey_mark_dev_cached(bkey_i_to_s(insert), p.ptr.dev);
|
||||
(ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
|
||||
!ptr->cached) {
|
||||
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
|
||||
rewrites_found |= 1U << i;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
if (m->data_opts.rewrite_ptrs &&
|
||||
!rewrites_found &&
|
||||
bch2_bkey_durability(c, k) >= m->op.opts.data_replicas)
|
||||
goto nowork;
|
||||
|
||||
/* Add new ptrs: */
|
||||
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
|
||||
const struct bch_extent_ptr *existing_ptr =
|
||||
bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev);
|
||||
|
||||
if (existing_ptr && existing_ptr->cached) {
|
||||
/*
|
||||
* We're replacing a cached pointer with a non
|
||||
* cached pointer:
|
||||
* A replica that we just wrote might conflict with a replica
|
||||
* that we want to keep, due to racing with another move:
|
||||
*/
|
||||
bch2_bkey_drop_device_noerror(bkey_i_to_s(insert),
|
||||
existing_ptr->dev);
|
||||
} else if (existing_ptr) {
|
||||
/*
|
||||
* raced with another move op? extent already
|
||||
* has a pointer to the device we just wrote
|
||||
* data to
|
||||
*/
|
||||
continue;
|
||||
restart_drop_conflicting_replicas:
|
||||
extent_for_each_ptr(extent_i_to_s(new), ptr)
|
||||
if ((ptr_c = bch2_bkey_has_device_c(bkey_i_to_s_c(insert), ptr->dev)) &&
|
||||
!ptr_c->cached) {
|
||||
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(&new->k_i), ptr);
|
||||
goto restart_drop_conflicting_replicas;
|
||||
}
|
||||
|
||||
if (!bkey_val_u64s(&new->k))
|
||||
goto nowork;
|
||||
|
||||
/* Now, drop pointers that conflict with what we just wrote: */
|
||||
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry)
|
||||
if ((ptr = bch2_bkey_has_device(bkey_i_to_s(insert), p.ptr.dev)))
|
||||
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr);
|
||||
|
||||
durability = bch2_bkey_durability(c, bkey_i_to_s_c(insert)) +
|
||||
bch2_bkey_durability(c, bkey_i_to_s_c(&new->k_i));
|
||||
|
||||
/* Now, drop excess replicas: */
|
||||
restart_drop_extra_replicas:
|
||||
bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs(bkey_i_to_s(insert)), p, entry) {
|
||||
unsigned ptr_durability = bch2_extent_ptr_durability(c, &p);
|
||||
|
||||
if (!p.ptr.cached &&
|
||||
durability - ptr_durability >= m->op.opts.data_replicas) {
|
||||
durability -= ptr_durability;
|
||||
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
|
||||
goto restart_drop_extra_replicas;
|
||||
}
|
||||
}
|
||||
|
||||
/* Finally, add the pointers we just wrote: */
|
||||
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry)
|
||||
bch2_extent_ptr_decoded_append(insert, &p);
|
||||
did_work = true;
|
||||
}
|
||||
|
||||
if (!did_work)
|
||||
goto nomatch;
|
||||
|
||||
bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 });
|
||||
bch2_extent_normalize(c, bkey_i_to_s(insert));
|
||||
@ -253,6 +253,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, &op->res,
|
||||
NULL,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
m->data_opts.btree_insert_flags);
|
||||
if (!ret) {
|
||||
@ -273,7 +274,7 @@ next:
|
||||
goto out;
|
||||
}
|
||||
continue;
|
||||
nomatch:
|
||||
nowork:
|
||||
if (m->ctxt && m->ctxt->stats) {
|
||||
BUG_ON(k.k->p.offset <= iter.pos.offset);
|
||||
atomic64_inc(&m->ctxt->stats->keys_raced);
|
||||
|
121
libbcachefs/ec.c
121
libbcachefs/ec.c
@ -659,14 +659,13 @@ static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s)
|
||||
static u64 stripe_idx_to_delete(struct bch_fs *c)
|
||||
{
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
size_t heap_idx;
|
||||
|
||||
lockdep_assert_held(&c->ec_stripes_heap_lock);
|
||||
|
||||
for (heap_idx = 0; heap_idx < h->used; heap_idx++)
|
||||
if (h->data[heap_idx].blocks_nonempty == 0 &&
|
||||
!bch2_stripe_is_open(c, h->data[heap_idx].idx))
|
||||
return h->data[heap_idx].idx;
|
||||
if (h->used &&
|
||||
h->data[0].blocks_nonempty == 0 &&
|
||||
!bch2_stripe_is_open(c, h->data[0].idx))
|
||||
return h->data[0].idx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -959,7 +958,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
|
||||
bkey_reassemble(n, k);
|
||||
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev);
|
||||
ec_ptr = (void *) bch2_bkey_has_device(bkey_i_to_s_c(n), dev);
|
||||
ec_ptr = bch2_bkey_has_device(bkey_i_to_s(n), dev);
|
||||
BUG_ON(!ec_ptr);
|
||||
|
||||
stripe_ptr = (struct bch_extent_stripe_ptr) {
|
||||
@ -990,6 +989,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
|
||||
|
||||
while (1) {
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
ec_stripe_update_extent(trans, bucket_pos, bucket.gen,
|
||||
s, &bp_offset));
|
||||
@ -1057,6 +1057,13 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
|
||||
s->err = ret;
|
||||
}
|
||||
|
||||
void bch2_ec_stripe_new_free(struct bch_fs *c, struct ec_stripe_new *s)
|
||||
{
|
||||
if (s->idx)
|
||||
bch2_stripe_close(c, s);
|
||||
kfree(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* data buckets of new stripe all written: create the stripe
|
||||
*/
|
||||
@ -1072,6 +1079,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
|
||||
closure_sync(&s->iodone);
|
||||
|
||||
if (!s->err) {
|
||||
for (i = 0; i < nr_data; i++)
|
||||
if (s->blocks[i]) {
|
||||
ob = c->open_buckets + s->blocks[i];
|
||||
@ -1079,6 +1087,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
if (ob->sectors_free)
|
||||
zero_out_rest_of_ec_bucket(c, s, i, ob);
|
||||
}
|
||||
}
|
||||
|
||||
if (s->err) {
|
||||
if (!bch2_err_matches(s->err, EROFS))
|
||||
@ -1119,7 +1128,9 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_trans_do(c, &s->res, NULL, BTREE_INSERT_NOFAIL,
|
||||
ret = bch2_trans_do(c, &s->res, NULL,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
ec_stripe_key_update(&trans, &s->new_stripe.key,
|
||||
!s->have_existing_stripe));
|
||||
if (ret) {
|
||||
@ -1152,13 +1163,11 @@ err:
|
||||
list_del(&s->list);
|
||||
mutex_unlock(&c->ec_stripe_new_lock);
|
||||
|
||||
if (s->idx)
|
||||
bch2_stripe_close(c, s);
|
||||
|
||||
ec_stripe_buf_exit(&s->existing_stripe);
|
||||
ec_stripe_buf_exit(&s->new_stripe);
|
||||
closure_debug_destroy(&s->iodone);
|
||||
kfree(s);
|
||||
|
||||
ec_stripe_new_put(c, s, STRIPE_REF_stripe);
|
||||
}
|
||||
|
||||
static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
|
||||
@ -1167,7 +1176,7 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
|
||||
|
||||
mutex_lock(&c->ec_stripe_new_lock);
|
||||
list_for_each_entry(s, &c->ec_stripe_new_list, list)
|
||||
if (!atomic_read(&s->pin))
|
||||
if (!atomic_read(&s->ref[STRIPE_REF_io]))
|
||||
goto out;
|
||||
s = NULL;
|
||||
out:
|
||||
@ -1209,7 +1218,7 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
|
||||
list_add(&s->list, &c->ec_stripe_new_list);
|
||||
mutex_unlock(&c->ec_stripe_new_lock);
|
||||
|
||||
ec_stripe_new_put(c, s);
|
||||
ec_stripe_new_put(c, s, STRIPE_REF_io);
|
||||
}
|
||||
|
||||
void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
|
||||
@ -1321,7 +1330,8 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
|
||||
|
||||
mutex_init(&s->lock);
|
||||
closure_init(&s->iodone, NULL);
|
||||
atomic_set(&s->pin, 1);
|
||||
atomic_set(&s->ref[STRIPE_REF_stripe], 1);
|
||||
atomic_set(&s->ref[STRIPE_REF_io], 1);
|
||||
s->c = c;
|
||||
s->h = h;
|
||||
s->nr_data = min_t(unsigned, h->nr_active_devs,
|
||||
@ -1402,6 +1412,11 @@ struct ec_stripe_head *__bch2_ec_stripe_head_get(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
if (test_bit(BCH_FS_GOING_RO, &c->flags)) {
|
||||
h = ERR_PTR(-EROFS);
|
||||
goto found;
|
||||
}
|
||||
|
||||
list_for_each_entry(h, &c->ec_stripe_head_list, list)
|
||||
if (h->target == target &&
|
||||
h->algo == algo &&
|
||||
@ -1451,7 +1466,7 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
|
||||
&devs,
|
||||
h->s->nr_parity,
|
||||
&nr_have_parity,
|
||||
&have_cache,
|
||||
&have_cache, 0,
|
||||
BCH_DATA_parity,
|
||||
reserve,
|
||||
cl);
|
||||
@ -1478,7 +1493,7 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
|
||||
&devs,
|
||||
h->s->nr_data,
|
||||
&nr_have_data,
|
||||
&have_cache,
|
||||
&have_cache, 0,
|
||||
BCH_DATA_user,
|
||||
reserve,
|
||||
cl);
|
||||
@ -1706,6 +1721,14 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
|
||||
if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
|
||||
goto err;
|
||||
|
||||
if (reserve == RESERVE_movinggc) {
|
||||
ret = new_stripe_alloc_buckets(trans, h, reserve, NULL) ?:
|
||||
__bch2_ec_stripe_head_reserve(trans, h);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto allocate_buf;
|
||||
}
|
||||
|
||||
/* XXX freelist_wait? */
|
||||
closure_wait(&c->freelist_wait, cl);
|
||||
waiting = true;
|
||||
@ -1738,7 +1761,7 @@ err:
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
|
||||
static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct ec_stripe_head *h;
|
||||
struct open_bucket *ob;
|
||||
@ -1746,11 +1769,13 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
mutex_lock(&c->ec_stripe_head_lock);
|
||||
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
|
||||
|
||||
mutex_lock(&h->lock);
|
||||
if (!h->s)
|
||||
goto unlock;
|
||||
|
||||
if (!ca)
|
||||
goto found;
|
||||
|
||||
for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) {
|
||||
if (!h->s->blocks[i])
|
||||
continue;
|
||||
@ -1769,6 +1794,32 @@ unlock:
|
||||
mutex_unlock(&c->ec_stripe_head_lock);
|
||||
}
|
||||
|
||||
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
__bch2_ec_stop(c, ca);
|
||||
}
|
||||
|
||||
void bch2_fs_ec_stop(struct bch_fs *c)
|
||||
{
|
||||
__bch2_ec_stop(c, NULL);
|
||||
}
|
||||
|
||||
static bool bch2_fs_ec_flush_done(struct bch_fs *c)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
mutex_lock(&c->ec_stripe_new_lock);
|
||||
ret = list_empty(&c->ec_stripe_new_list);
|
||||
mutex_unlock(&c->ec_stripe_new_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_fs_ec_flush(struct bch_fs *c)
|
||||
{
|
||||
wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c));
|
||||
}
|
||||
|
||||
int bch2_stripes_read(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
@ -1821,13 +1872,16 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
size_t i;
|
||||
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
for (i = 0; i < min_t(size_t, h->used, 20); i++) {
|
||||
for (i = 0; i < min_t(size_t, h->used, 50); i++) {
|
||||
m = genradix_ptr(&c->stripes, h->data[i].idx);
|
||||
|
||||
prt_printf(out, "%zu %u/%u+%u\n", h->data[i].idx,
|
||||
prt_printf(out, "%zu %u/%u+%u", h->data[i].idx,
|
||||
h->data[i].blocks_nonempty,
|
||||
m->nr_blocks - m->nr_redundant,
|
||||
m->nr_redundant);
|
||||
if (bch2_stripe_is_open(c, h->data[i].idx))
|
||||
prt_str(out, " open");
|
||||
prt_newline(out);
|
||||
}
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
}
|
||||
@ -1839,22 +1893,27 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
|
||||
mutex_lock(&c->ec_stripe_head_lock);
|
||||
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
|
||||
prt_printf(out, "target %u algo %u redundancy %u:\n",
|
||||
h->target, h->algo, h->redundancy);
|
||||
prt_printf(out, "target %u algo %u redundancy %u %s:\n",
|
||||
h->target, h->algo, h->redundancy,
|
||||
bch2_alloc_reserves[h->reserve]);
|
||||
|
||||
if (h->s)
|
||||
prt_printf(out, "\tpending: idx %llu blocks %u+%u allocated %u\n",
|
||||
prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n",
|
||||
h->s->idx, h->s->nr_data, h->s->nr_parity,
|
||||
bitmap_weight(h->s->blocks_allocated,
|
||||
h->s->nr_data));
|
||||
}
|
||||
mutex_unlock(&c->ec_stripe_head_lock);
|
||||
|
||||
prt_printf(out, "in flight:\n");
|
||||
|
||||
mutex_lock(&c->ec_stripe_new_lock);
|
||||
list_for_each_entry(s, &c->ec_stripe_new_list, list) {
|
||||
prt_printf(out, "\tin flight: idx %llu blocks %u+%u pin %u\n",
|
||||
prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n",
|
||||
s->idx, s->nr_data, s->nr_parity,
|
||||
atomic_read(&s->pin));
|
||||
atomic_read(&s->ref[STRIPE_REF_io]),
|
||||
atomic_read(&s->ref[STRIPE_REF_stripe]),
|
||||
bch2_alloc_reserves[s->h->reserve]);
|
||||
}
|
||||
mutex_unlock(&c->ec_stripe_new_lock);
|
||||
}
|
||||
@ -1892,14 +1951,22 @@ void bch2_fs_ec_exit(struct bch_fs *c)
|
||||
|
||||
void bch2_fs_ec_init_early(struct bch_fs *c)
|
||||
{
|
||||
spin_lock_init(&c->ec_stripes_new_lock);
|
||||
mutex_init(&c->ec_stripes_heap_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_stripe_head_list);
|
||||
mutex_init(&c->ec_stripe_head_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_stripe_new_list);
|
||||
mutex_init(&c->ec_stripe_new_lock);
|
||||
init_waitqueue_head(&c->ec_stripe_new_wait);
|
||||
|
||||
INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
|
||||
INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
|
||||
}
|
||||
|
||||
int bch2_fs_ec_init(struct bch_fs *c)
|
||||
{
|
||||
spin_lock_init(&c->ec_stripes_new_lock);
|
||||
|
||||
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
|
||||
BIOSET_NEED_BVECS);
|
||||
}
|
||||
|
@ -143,6 +143,12 @@ struct ec_stripe_buf {
|
||||
|
||||
struct ec_stripe_head;
|
||||
|
||||
enum ec_stripe_ref {
|
||||
STRIPE_REF_io,
|
||||
STRIPE_REF_stripe,
|
||||
STRIPE_REF_NR
|
||||
};
|
||||
|
||||
struct ec_stripe_new {
|
||||
struct bch_fs *c;
|
||||
struct ec_stripe_head *h;
|
||||
@ -154,8 +160,7 @@ struct ec_stripe_new {
|
||||
|
||||
struct closure iodone;
|
||||
|
||||
/* counts in flight writes, stripe is created when pin == 0 */
|
||||
atomic_t pin;
|
||||
atomic_t ref[STRIPE_REF_NR];
|
||||
|
||||
int err;
|
||||
|
||||
@ -213,24 +218,35 @@ void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
|
||||
|
||||
void bch2_do_stripe_deletes(struct bch_fs *);
|
||||
void bch2_ec_do_stripe_creates(struct bch_fs *);
|
||||
void bch2_ec_stripe_new_free(struct bch_fs *, struct ec_stripe_new *);
|
||||
|
||||
static inline void ec_stripe_new_get(struct ec_stripe_new *s)
|
||||
static inline void ec_stripe_new_get(struct ec_stripe_new *s,
|
||||
enum ec_stripe_ref ref)
|
||||
{
|
||||
atomic_inc(&s->pin);
|
||||
atomic_inc(&s->ref[ref]);
|
||||
}
|
||||
|
||||
static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s)
|
||||
static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s,
|
||||
enum ec_stripe_ref ref)
|
||||
{
|
||||
BUG_ON(atomic_read(&s->pin) <= 0);
|
||||
BUG_ON(!s->err && !s->idx);
|
||||
BUG_ON(atomic_read(&s->ref[ref]) <= 0);
|
||||
|
||||
if (atomic_dec_and_test(&s->pin))
|
||||
if (atomic_dec_and_test(&s->ref[ref]))
|
||||
switch (ref) {
|
||||
case STRIPE_REF_stripe:
|
||||
bch2_ec_stripe_new_free(c, s);
|
||||
break;
|
||||
case STRIPE_REF_io:
|
||||
bch2_ec_do_stripe_creates(c);
|
||||
break;
|
||||
default:
|
||||
unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
void bch2_ec_flush_new_stripes(struct bch_fs *);
|
||||
void bch2_fs_ec_stop(struct bch_fs *);
|
||||
void bch2_fs_ec_flush(struct bch_fs *);
|
||||
|
||||
int bch2_stripes_read(struct bch_fs *);
|
||||
|
||||
|
@ -26,8 +26,6 @@
|
||||
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *);
|
||||
|
||||
static unsigned bch2_crc_field_size_max[] = {
|
||||
[BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
|
||||
[BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
|
||||
@ -512,7 +510,7 @@ restart_narrow_pointers:
|
||||
|
||||
bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
|
||||
if (can_narrow_crc(p.crc, n)) {
|
||||
__bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
|
||||
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(k), &i->ptr);
|
||||
p.ptr.offset += p.crc.offset;
|
||||
p.crc = n;
|
||||
bch2_extent_ptr_decoded_append(k, &p);
|
||||
@ -765,7 +763,7 @@ static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
|
||||
/*
|
||||
* Returns pointer to the next entry after the one being dropped:
|
||||
*/
|
||||
static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k,
|
||||
union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k,
|
||||
struct bch_extent_ptr *ptr)
|
||||
{
|
||||
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
|
||||
@ -809,7 +807,7 @@ union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
|
||||
{
|
||||
bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr;
|
||||
union bch_extent_entry *ret =
|
||||
__bch2_bkey_drop_ptr(k, ptr);
|
||||
bch2_bkey_drop_ptr_noerror(k, ptr);
|
||||
|
||||
/*
|
||||
* If we deleted all the dirty pointers and there's still cached
|
||||
@ -840,14 +838,13 @@ void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
|
||||
|
||||
void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev)
|
||||
{
|
||||
struct bch_extent_ptr *ptr = (void *) bch2_bkey_has_device(k.s_c, dev);
|
||||
struct bch_extent_ptr *ptr = bch2_bkey_has_device(k, dev);
|
||||
|
||||
if (ptr)
|
||||
__bch2_bkey_drop_ptr(k, ptr);
|
||||
bch2_bkey_drop_ptr_noerror(k, ptr);
|
||||
}
|
||||
|
||||
const struct bch_extent_ptr *
|
||||
bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
|
||||
const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
@ -922,11 +919,11 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
|
||||
}
|
||||
}
|
||||
|
||||
bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1,
|
||||
struct bkey_s_c k2)
|
||||
struct bch_extent_ptr *
|
||||
bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bkey_s k2)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2);
|
||||
const union bch_extent_entry *entry2;
|
||||
struct bkey_ptrs ptrs2 = bch2_bkey_ptrs(k2);
|
||||
union bch_extent_entry *entry2;
|
||||
struct extent_ptr_decoded p2;
|
||||
|
||||
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
|
||||
@ -934,9 +931,9 @@ bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1,
|
||||
p1.ptr.gen == p2.ptr.gen &&
|
||||
(s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
|
||||
(s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
|
||||
return true;
|
||||
return &entry2->ptr;
|
||||
|
||||
return false;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr)
|
||||
@ -992,6 +989,9 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bch_dev *ca;
|
||||
bool first = true;
|
||||
|
||||
if (c)
|
||||
prt_printf(out, "durability: %u ", bch2_bkey_durability(c, k));
|
||||
|
||||
bkey_extent_entry_for_each(ptrs, entry) {
|
||||
if (!first)
|
||||
prt_printf(out, " ");
|
||||
|
@ -613,14 +613,21 @@ unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
void bch2_bkey_drop_device(struct bkey_s, unsigned);
|
||||
void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned);
|
||||
const struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s_c, unsigned);
|
||||
|
||||
const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c, unsigned);
|
||||
|
||||
static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsigned dev)
|
||||
{
|
||||
return (void *) bch2_bkey_has_device_c(k.s_c, dev);
|
||||
}
|
||||
|
||||
bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
|
||||
|
||||
void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *);
|
||||
|
||||
static inline void bch2_bkey_append_ptr(struct bkey_i *k, struct bch_extent_ptr ptr)
|
||||
{
|
||||
EBUG_ON(bch2_bkey_has_device(bkey_i_to_s_c(k), ptr.dev));
|
||||
EBUG_ON(bch2_bkey_has_device(bkey_i_to_s(k), ptr.dev));
|
||||
|
||||
switch (k->k.type) {
|
||||
case KEY_TYPE_btree_ptr:
|
||||
@ -642,6 +649,8 @@ static inline void bch2_bkey_append_ptr(struct bkey_i *k, struct bch_extent_ptr
|
||||
|
||||
void bch2_extent_ptr_decoded_append(struct bkey_i *,
|
||||
struct extent_ptr_decoded *);
|
||||
union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s,
|
||||
struct bch_extent_ptr *);
|
||||
union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
|
||||
struct bch_extent_ptr *);
|
||||
|
||||
@ -665,7 +674,8 @@ do { \
|
||||
bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
|
||||
struct bch_extent_ptr, u64);
|
||||
bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
|
||||
bool bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s_c);
|
||||
struct bch_extent_ptr *
|
||||
bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
|
||||
|
||||
void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
|
||||
|
||||
|
@ -954,11 +954,11 @@ static int check_inode(struct btree_trans *trans,
|
||||
iter->pos.snapshot),
|
||||
POS(u.bi_inum, U64_MAX),
|
||||
0, NULL);
|
||||
if (ret) {
|
||||
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
bch_err(c, "error in fsck: error truncating inode: %s",
|
||||
bch2_err_str(ret));
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* We truncated without our normal sector accounting hook, just
|
||||
|
@ -218,7 +218,8 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
|
||||
|
||||
bch2_trans_copy_iter(&iter, extent_iter);
|
||||
|
||||
for_each_btree_key_continue_norestart(iter, BTREE_ITER_SLOTS, old, ret) {
|
||||
for_each_btree_key_upto_continue_norestart(iter,
|
||||
new->k.p, BTREE_ITER_SLOTS, old, ret) {
|
||||
s64 sectors = min(new->k.p.offset, old.k->p.offset) -
|
||||
max(bkey_start_offset(&new->k),
|
||||
bkey_start_offset(old.k));
|
||||
@ -705,6 +706,7 @@ static void bch2_write_done(struct closure *cl)
|
||||
struct bch_fs *c = op->c;
|
||||
|
||||
bch2_disk_reservation_put(c, &op->res);
|
||||
if (!(op->flags & BCH_WRITE_MOVE))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_write);
|
||||
bch2_keylist_free(&op->insert_keys, op->inline_keys);
|
||||
|
||||
@ -834,36 +836,30 @@ static void bch2_write_index(struct closure *cl)
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct write_point *wp = op->wp;
|
||||
struct workqueue_struct *wq = index_update_wq(op);
|
||||
unsigned long flags;
|
||||
|
||||
if ((op->flags & BCH_WRITE_DONE) &&
|
||||
(op->flags & BCH_WRITE_MOVE))
|
||||
bch2_bio_free_pages_pool(op->c, &op->wbio.bio);
|
||||
|
||||
barrier();
|
||||
|
||||
/*
|
||||
* We're not using wp->writes_lock here, so this is racey: that's ok,
|
||||
* because this is just for diagnostic purposes, and we're running out
|
||||
* of interrupt context here so if we were to take the log we'd have to
|
||||
* switch to spin_lock_irq()/irqsave(), which is not free:
|
||||
*/
|
||||
spin_lock_irqsave(&wp->writes_lock, flags);
|
||||
if (wp->state == WRITE_POINT_waiting_io)
|
||||
__wp_update_state(wp, WRITE_POINT_waiting_work);
|
||||
list_add_tail(&op->wp_list, &wp->writes);
|
||||
spin_unlock_irqrestore (&wp->writes_lock, flags);
|
||||
|
||||
op->btree_update_ready = true;
|
||||
queue_work(wq, &wp->index_update_work);
|
||||
}
|
||||
|
||||
static inline void bch2_write_queue(struct bch_write_op *op, struct write_point *wp)
|
||||
{
|
||||
op->btree_update_ready = false;
|
||||
op->wp = wp;
|
||||
|
||||
spin_lock(&wp->writes_lock);
|
||||
list_add_tail(&op->wp_list, &wp->writes);
|
||||
if (wp->state == WRITE_POINT_stopped)
|
||||
if (wp->state == WRITE_POINT_stopped) {
|
||||
spin_lock_irq(&wp->writes_lock);
|
||||
__wp_update_state(wp, WRITE_POINT_waiting_io);
|
||||
spin_unlock(&wp->writes_lock);
|
||||
spin_unlock_irq(&wp->writes_lock);
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_write_point_do_index_updates(struct work_struct *work)
|
||||
@ -873,16 +869,12 @@ void bch2_write_point_do_index_updates(struct work_struct *work)
|
||||
struct bch_write_op *op;
|
||||
|
||||
while (1) {
|
||||
spin_lock(&wp->writes_lock);
|
||||
list_for_each_entry(op, &wp->writes, wp_list)
|
||||
if (op->btree_update_ready) {
|
||||
spin_lock_irq(&wp->writes_lock);
|
||||
op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list);
|
||||
if (op)
|
||||
list_del(&op->wp_list);
|
||||
goto unlock;
|
||||
}
|
||||
op = NULL;
|
||||
unlock:
|
||||
wp_update_state(wp, op != NULL);
|
||||
spin_unlock(&wp->writes_lock);
|
||||
spin_unlock_irq(&wp->writes_lock);
|
||||
|
||||
if (!op)
|
||||
break;
|
||||
@ -1673,7 +1665,6 @@ static void __bch2_write(struct bch_write_op *op)
|
||||
}
|
||||
again:
|
||||
memset(&op->failed, 0, sizeof(op->failed));
|
||||
op->btree_update_ready = false;
|
||||
|
||||
do {
|
||||
struct bkey_i *key_to_write;
|
||||
@ -1853,7 +1844,12 @@ void bch2_write(struct closure *cl)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (c->opts.nochanges ||
|
||||
if (c->opts.nochanges) {
|
||||
op->error = -BCH_ERR_erofs_no_writes;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!(op->flags & BCH_WRITE_MOVE) &&
|
||||
!bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
|
||||
op->error = -BCH_ERR_erofs_no_writes;
|
||||
goto err;
|
||||
@ -1881,6 +1877,28 @@ err:
|
||||
op->end_io(op);
|
||||
}
|
||||
|
||||
const char * const bch2_write_flags[] = {
|
||||
#define x(f) #f,
|
||||
BCH_WRITE_FLAGS()
|
||||
#undef x
|
||||
NULL
|
||||
};
|
||||
|
||||
void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
|
||||
{
|
||||
prt_str(out, "pos: ");
|
||||
bch2_bpos_to_text(out, op->pos);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "started: ");
|
||||
bch2_pr_time_units(out, local_clock() - op->start_time);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "flags: ");
|
||||
prt_bitflags(out, bch2_write_flags, op->flags);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
/* Cache promotion on read */
|
||||
|
||||
struct promote_op {
|
||||
|
@ -28,41 +28,34 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
|
||||
|
||||
const char *bch2_blk_status_to_str(blk_status_t);
|
||||
|
||||
enum bch_write_flags {
|
||||
__BCH_WRITE_ALLOC_NOWAIT,
|
||||
__BCH_WRITE_CACHED,
|
||||
__BCH_WRITE_DATA_ENCODED,
|
||||
__BCH_WRITE_PAGES_STABLE,
|
||||
__BCH_WRITE_PAGES_OWNED,
|
||||
__BCH_WRITE_ONLY_SPECIFIED_DEVS,
|
||||
__BCH_WRITE_WROTE_DATA_INLINE,
|
||||
__BCH_WRITE_FROM_INTERNAL,
|
||||
__BCH_WRITE_CHECK_ENOSPC,
|
||||
__BCH_WRITE_SYNC,
|
||||
__BCH_WRITE_MOVE,
|
||||
__BCH_WRITE_IN_WORKER,
|
||||
__BCH_WRITE_DONE,
|
||||
__BCH_WRITE_IO_ERROR,
|
||||
__BCH_WRITE_CONVERT_UNWRITTEN,
|
||||
#define BCH_WRITE_FLAGS() \
|
||||
x(ALLOC_NOWAIT) \
|
||||
x(CACHED) \
|
||||
x(DATA_ENCODED) \
|
||||
x(PAGES_STABLE) \
|
||||
x(PAGES_OWNED) \
|
||||
x(ONLY_SPECIFIED_DEVS) \
|
||||
x(WROTE_DATA_INLINE) \
|
||||
x(FROM_INTERNAL) \
|
||||
x(CHECK_ENOSPC) \
|
||||
x(SYNC) \
|
||||
x(MOVE) \
|
||||
x(IN_WORKER) \
|
||||
x(DONE) \
|
||||
x(IO_ERROR) \
|
||||
x(CONVERT_UNWRITTEN)
|
||||
|
||||
enum __bch_write_flags {
|
||||
#define x(f) __BCH_WRITE_##f,
|
||||
BCH_WRITE_FLAGS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
#define BCH_WRITE_ALLOC_NOWAIT (1U << __BCH_WRITE_ALLOC_NOWAIT)
|
||||
#define BCH_WRITE_CACHED (1U << __BCH_WRITE_CACHED)
|
||||
#define BCH_WRITE_DATA_ENCODED (1U << __BCH_WRITE_DATA_ENCODED)
|
||||
#define BCH_WRITE_PAGES_STABLE (1U << __BCH_WRITE_PAGES_STABLE)
|
||||
#define BCH_WRITE_PAGES_OWNED (1U << __BCH_WRITE_PAGES_OWNED)
|
||||
#define BCH_WRITE_ONLY_SPECIFIED_DEVS (1U << __BCH_WRITE_ONLY_SPECIFIED_DEVS)
|
||||
#define BCH_WRITE_WROTE_DATA_INLINE (1U << __BCH_WRITE_WROTE_DATA_INLINE)
|
||||
#define BCH_WRITE_FROM_INTERNAL (1U << __BCH_WRITE_FROM_INTERNAL)
|
||||
#define BCH_WRITE_CHECK_ENOSPC (1U << __BCH_WRITE_CHECK_ENOSPC)
|
||||
#define BCH_WRITE_SYNC (1U << __BCH_WRITE_SYNC)
|
||||
#define BCH_WRITE_MOVE (1U << __BCH_WRITE_MOVE)
|
||||
|
||||
/* Internal: */
|
||||
#define BCH_WRITE_IN_WORKER (1U << __BCH_WRITE_IN_WORKER)
|
||||
#define BCH_WRITE_DONE (1U << __BCH_WRITE_DONE)
|
||||
#define BCH_WRITE_IO_ERROR (1U << __BCH_WRITE_IO_ERROR)
|
||||
#define BCH_WRITE_CONVERT_UNWRITTEN (1U << __BCH_WRITE_CONVERT_UNWRITTEN)
|
||||
enum bch_write_flags {
|
||||
#define x(f) BCH_WRITE_##f = 1U << __BCH_WRITE_##f,
|
||||
BCH_WRITE_FLAGS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
|
||||
{
|
||||
@ -124,6 +117,8 @@ static inline struct bch_write_bio *wbio_init(struct bio *bio)
|
||||
return wbio;
|
||||
}
|
||||
|
||||
void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *);
|
||||
|
||||
struct bch_devs_mask;
|
||||
struct cache_promote_op;
|
||||
struct extent_ptr_decoded;
|
||||
|
@ -119,7 +119,7 @@ struct bch_write_op {
|
||||
unsigned nr_replicas_required:4;
|
||||
unsigned alloc_reserve:3;
|
||||
unsigned incompressible:1;
|
||||
unsigned btree_update_ready:1;
|
||||
unsigned stripe_waited:1;
|
||||
|
||||
struct bch_devs_list devs_have;
|
||||
u16 target;
|
||||
|
@ -68,8 +68,9 @@ journal_seq_to_buf(struct journal *j, u64 seq)
|
||||
|
||||
static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
|
||||
{
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
INIT_LIST_HEAD(&p->key_cache_list);
|
||||
unsigned i;
|
||||
for (i = 0; i < ARRAY_SIZE(p->list); i++)
|
||||
INIT_LIST_HEAD(&p->list[i]);
|
||||
INIT_LIST_HEAD(&p->flushed);
|
||||
atomic_set(&p->count, count);
|
||||
p->devs.nr = 0;
|
||||
@ -758,19 +759,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
u64 *new_bucket_seq = NULL, *new_buckets = NULL;
|
||||
struct open_bucket **ob = NULL;
|
||||
long *bu = NULL;
|
||||
unsigned i, nr_got = 0, nr_want = nr - ja->nr;
|
||||
unsigned old_nr = ja->nr;
|
||||
unsigned old_discard_idx = ja->discard_idx;
|
||||
unsigned old_dirty_idx_ondisk = ja->dirty_idx_ondisk;
|
||||
unsigned old_dirty_idx = ja->dirty_idx;
|
||||
unsigned old_cur_idx = ja->cur_idx;
|
||||
unsigned i, pos, nr_got = 0, nr_want = nr - ja->nr;
|
||||
int ret = 0;
|
||||
|
||||
if (c) {
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
bch2_journal_block(&c->journal);
|
||||
mutex_lock(&c->sb_lock);
|
||||
}
|
||||
BUG_ON(nr <= ja->nr);
|
||||
|
||||
bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL);
|
||||
ob = kcalloc(nr_want, sizeof(*ob), GFP_KERNEL);
|
||||
@ -778,7 +770,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL);
|
||||
if (!bu || !ob || !new_buckets || !new_bucket_seq) {
|
||||
ret = -ENOMEM;
|
||||
goto err_unblock;
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
for (nr_got = 0; nr_got < nr_want; nr_got++) {
|
||||
@ -794,87 +786,92 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = bch2_trans_run(c,
|
||||
bch2_trans_mark_metadata_bucket(&trans, ca,
|
||||
ob[nr_got]->bucket, BCH_DATA_journal,
|
||||
ca->mi.bucket_size));
|
||||
if (ret) {
|
||||
bch2_open_bucket_put(c, ob[nr_got]);
|
||||
bch_err(c, "error marking new journal buckets: %s", bch2_err_str(ret));
|
||||
break;
|
||||
}
|
||||
|
||||
bu[nr_got] = ob[nr_got]->bucket;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nr_got)
|
||||
goto err_unblock;
|
||||
goto err_free;
|
||||
|
||||
/*
|
||||
* We may be called from the device add path, before the new device has
|
||||
* actually been added to the running filesystem:
|
||||
*/
|
||||
if (!new_fs)
|
||||
spin_lock(&c->journal.lock);
|
||||
/* Don't return an error if we successfully allocated some buckets: */
|
||||
ret = 0;
|
||||
|
||||
if (c) {
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
bch2_journal_block(&c->journal);
|
||||
mutex_lock(&c->sb_lock);
|
||||
}
|
||||
|
||||
memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64));
|
||||
memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64));
|
||||
swap(new_buckets, ja->buckets);
|
||||
swap(new_bucket_seq, ja->bucket_seq);
|
||||
|
||||
BUG_ON(ja->discard_idx > ja->nr);
|
||||
|
||||
pos = ja->discard_idx ?: ja->nr;
|
||||
|
||||
memmove(new_buckets + pos + nr_got,
|
||||
new_buckets + pos,
|
||||
sizeof(new_buckets[0]) * (ja->nr - pos));
|
||||
memmove(new_bucket_seq + pos + nr_got,
|
||||
new_bucket_seq + pos,
|
||||
sizeof(new_bucket_seq[0]) * (ja->nr - pos));
|
||||
|
||||
for (i = 0; i < nr_got; i++) {
|
||||
unsigned pos = ja->discard_idx ?: ja->nr;
|
||||
long b = bu[i];
|
||||
|
||||
__array_insert_item(ja->buckets, ja->nr, pos);
|
||||
__array_insert_item(ja->bucket_seq, ja->nr, pos);
|
||||
ja->nr++;
|
||||
|
||||
ja->buckets[pos] = b;
|
||||
ja->bucket_seq[pos] = 0;
|
||||
|
||||
if (pos <= ja->discard_idx)
|
||||
ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
|
||||
if (pos <= ja->dirty_idx_ondisk)
|
||||
ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
|
||||
if (pos <= ja->dirty_idx)
|
||||
ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
|
||||
if (pos <= ja->cur_idx)
|
||||
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
|
||||
new_buckets[pos + i] = bu[i];
|
||||
new_bucket_seq[pos + i] = 0;
|
||||
}
|
||||
|
||||
ret = bch2_journal_buckets_to_sb(c, ca);
|
||||
if (ret) {
|
||||
/* Revert: */
|
||||
swap(new_buckets, ja->buckets);
|
||||
swap(new_bucket_seq, ja->bucket_seq);
|
||||
ja->nr = old_nr;
|
||||
ja->discard_idx = old_discard_idx;
|
||||
ja->dirty_idx_ondisk = old_dirty_idx_ondisk;
|
||||
ja->dirty_idx = old_dirty_idx;
|
||||
ja->cur_idx = old_cur_idx;
|
||||
}
|
||||
nr = ja->nr + nr_got;
|
||||
|
||||
ret = bch2_journal_buckets_to_sb(c, ca, new_buckets, nr);
|
||||
if (ret)
|
||||
goto err_unblock;
|
||||
|
||||
if (!new_fs)
|
||||
spin_unlock(&c->journal.lock);
|
||||
|
||||
if (ja->nr != old_nr && !new_fs)
|
||||
bch2_write_super(c);
|
||||
|
||||
/* Commit: */
|
||||
if (c)
|
||||
spin_lock(&c->journal.lock);
|
||||
|
||||
swap(new_buckets, ja->buckets);
|
||||
swap(new_bucket_seq, ja->bucket_seq);
|
||||
ja->nr = nr;
|
||||
|
||||
if (pos <= ja->discard_idx)
|
||||
ja->discard_idx = (ja->discard_idx + nr_got) % ja->nr;
|
||||
if (pos <= ja->dirty_idx_ondisk)
|
||||
ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + nr_got) % ja->nr;
|
||||
if (pos <= ja->dirty_idx)
|
||||
ja->dirty_idx = (ja->dirty_idx + nr_got) % ja->nr;
|
||||
if (pos <= ja->cur_idx)
|
||||
ja->cur_idx = (ja->cur_idx + nr_got) % ja->nr;
|
||||
|
||||
if (c)
|
||||
spin_unlock(&c->journal.lock);
|
||||
err_unblock:
|
||||
if (c) {
|
||||
bch2_journal_unblock(&c->journal);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!new_fs) {
|
||||
for (i = 0; i < nr_got; i++) {
|
||||
ret = bch2_trans_run(c,
|
||||
bch2_trans_mark_metadata_bucket(&trans, ca,
|
||||
bu[i], BCH_DATA_journal,
|
||||
ca->mi.bucket_size));
|
||||
if (ret) {
|
||||
bch2_fs_inconsistent(c, "error marking new journal buckets: %i", ret);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
}
|
||||
err:
|
||||
if (c)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
if (ob && !new_fs)
|
||||
if (ret && !new_fs)
|
||||
for (i = 0; i < nr_got; i++)
|
||||
bch2_trans_run(c,
|
||||
bch2_trans_mark_metadata_bucket(&trans, ca,
|
||||
bu[i], BCH_DATA_free, 0));
|
||||
err_free:
|
||||
if (!new_fs)
|
||||
for (i = 0; i < nr_got; i++)
|
||||
bch2_open_bucket_put(c, ob[i]);
|
||||
|
||||
@ -882,12 +879,7 @@ err:
|
||||
kfree(new_buckets);
|
||||
kfree(ob);
|
||||
kfree(bu);
|
||||
|
||||
return ret;
|
||||
err_unblock:
|
||||
if (c)
|
||||
bch2_journal_unblock(&c->journal);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -901,13 +893,15 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct closure cl;
|
||||
int ret = 0;
|
||||
|
||||
/* don't handle reducing nr of buckets yet: */
|
||||
if (nr < ja->nr)
|
||||
return 0;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
while (ja->nr != nr) {
|
||||
down_write(&c->state_lock);
|
||||
|
||||
/* don't handle reducing nr of buckets yet: */
|
||||
if (nr < ja->nr)
|
||||
goto unlock;
|
||||
|
||||
while (ja->nr < nr) {
|
||||
struct disk_reservation disk_res = { 0, 0 };
|
||||
|
||||
/*
|
||||
@ -938,7 +932,8 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
if (ret)
|
||||
bch_err(c, "%s: err %s", __func__, bch2_err_str(ret));
|
||||
|
||||
unlock:
|
||||
up_write(&c->state_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -977,7 +972,7 @@ static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
|
||||
seq++) {
|
||||
struct journal_buf *buf = journal_seq_to_buf(j, seq);
|
||||
|
||||
if (bch2_bkey_has_device(bkey_i_to_s_c(&buf->key), dev_idx))
|
||||
if (bch2_bkey_has_device_c(bkey_i_to_s_c(&buf->key), dev_idx))
|
||||
ret = true;
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
@ -1353,6 +1348,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
struct journal_entry_pin *pin;
|
||||
unsigned i;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
*seq = max(*seq, j->pin.front);
|
||||
@ -1370,12 +1366,8 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
list_for_each_entry(pin, &pin_list->list, list) {
|
||||
prt_printf(out, "\t%px %ps", pin, pin->flush);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
list_for_each_entry(pin, &pin_list->key_cache_list, list) {
|
||||
for (i = 0; i < ARRAY_SIZE(pin_list->list); i++)
|
||||
list_for_each_entry(pin, &pin_list->list[i], list) {
|
||||
prt_printf(out, "\t%px %ps", pin, pin->flush);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
@ -1339,8 +1339,7 @@ static void __journal_write_alloc(struct journal *j,
|
||||
if (!ca->mi.durability ||
|
||||
ca->mi.state != BCH_MEMBER_STATE_rw ||
|
||||
!ja->nr ||
|
||||
bch2_bkey_has_device(bkey_i_to_s_c(&w->key),
|
||||
ca->dev_idx) ||
|
||||
bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) ||
|
||||
sectors > ja->sectors_free)
|
||||
continue;
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_key_cache.h"
|
||||
#include "btree_update.h"
|
||||
#include "errcode.h"
|
||||
#include "error.h"
|
||||
#include "journal.h"
|
||||
@ -318,9 +319,7 @@ static void bch2_journal_reclaim_fast(struct journal *j)
|
||||
*/
|
||||
while (!fifo_empty(&j->pin) &&
|
||||
!atomic_read(&fifo_peek_front(&j->pin).count)) {
|
||||
BUG_ON(!list_empty(&fifo_peek_front(&j->pin).list));
|
||||
BUG_ON(!list_empty(&fifo_peek_front(&j->pin).flushed));
|
||||
BUG_ON(!fifo_pop(&j->pin, temp));
|
||||
fifo_pop(&j->pin, temp);
|
||||
popped = true;
|
||||
}
|
||||
|
||||
@ -379,6 +378,17 @@ void bch2_journal_pin_drop(struct journal *j,
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
|
||||
{
|
||||
if (fn == bch2_btree_node_flush0 ||
|
||||
fn == bch2_btree_node_flush1)
|
||||
return JOURNAL_PIN_btree;
|
||||
else if (fn == bch2_btree_key_cache_journal_flush)
|
||||
return JOURNAL_PIN_key_cache;
|
||||
else
|
||||
return JOURNAL_PIN_other;
|
||||
}
|
||||
|
||||
void bch2_journal_pin_set(struct journal *j, u64 seq,
|
||||
struct journal_entry_pin *pin,
|
||||
journal_pin_flush_fn flush_fn)
|
||||
@ -407,10 +417,8 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
|
||||
pin->seq = seq;
|
||||
pin->flush = flush_fn;
|
||||
|
||||
if (flush_fn == bch2_btree_key_cache_journal_flush)
|
||||
list_add(&pin->list, &pin_list->key_cache_list);
|
||||
else if (flush_fn)
|
||||
list_add(&pin->list, &pin_list->list);
|
||||
if (flush_fn)
|
||||
list_add(&pin->list, &pin_list->list[journal_pin_type(flush_fn)]);
|
||||
else
|
||||
list_add(&pin->list, &pin_list->flushed);
|
||||
|
||||
@ -446,26 +454,23 @@ void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
|
||||
|
||||
static struct journal_entry_pin *
|
||||
journal_get_next_pin(struct journal *j,
|
||||
bool get_any,
|
||||
bool get_key_cache,
|
||||
u64 max_seq, u64 *seq)
|
||||
u64 seq_to_flush,
|
||||
unsigned allowed_below_seq,
|
||||
unsigned allowed_above_seq,
|
||||
u64 *seq)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
struct journal_entry_pin *ret = NULL;
|
||||
unsigned i;
|
||||
|
||||
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) {
|
||||
if (*seq > max_seq && !get_any && !get_key_cache)
|
||||
if (*seq > seq_to_flush && !allowed_above_seq)
|
||||
break;
|
||||
|
||||
if (*seq <= max_seq || get_any) {
|
||||
ret = list_first_entry_or_null(&pin_list->list,
|
||||
struct journal_entry_pin, list);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (*seq <= max_seq || get_any || get_key_cache) {
|
||||
ret = list_first_entry_or_null(&pin_list->key_cache_list,
|
||||
for (i = 0; i < JOURNAL_PIN_NR; i++)
|
||||
if ((((1U << i) & allowed_below_seq) && *seq <= seq_to_flush) ||
|
||||
((1U << i) & allowed_above_seq)) {
|
||||
ret = list_first_entry_or_null(&pin_list->list[i],
|
||||
struct journal_entry_pin, list);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -476,7 +481,10 @@ journal_get_next_pin(struct journal *j,
|
||||
}
|
||||
|
||||
/* returns true if we did work */
|
||||
static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
static size_t journal_flush_pins(struct journal *j,
|
||||
u64 seq_to_flush,
|
||||
unsigned allowed_below_seq,
|
||||
unsigned allowed_above_seq,
|
||||
unsigned min_any,
|
||||
unsigned min_key_cache)
|
||||
{
|
||||
@ -489,15 +497,25 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
|
||||
while (1) {
|
||||
unsigned allowed_above = allowed_above_seq;
|
||||
unsigned allowed_below = allowed_below_seq;
|
||||
|
||||
if (min_any) {
|
||||
allowed_above |= ~0;
|
||||
allowed_below |= ~0;
|
||||
}
|
||||
|
||||
if (min_key_cache) {
|
||||
allowed_above |= 1U << JOURNAL_PIN_key_cache;
|
||||
allowed_below |= 1U << JOURNAL_PIN_key_cache;
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
|
||||
j->last_flushed = jiffies;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
pin = journal_get_next_pin(j,
|
||||
min_any != 0,
|
||||
min_key_cache != 0,
|
||||
seq_to_flush, &seq);
|
||||
pin = journal_get_next_pin(j, seq_to_flush, allowed_below, allowed_above, &seq);
|
||||
if (pin) {
|
||||
BUG_ON(j->flush_in_progress);
|
||||
j->flush_in_progress = pin;
|
||||
@ -656,6 +674,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
||||
atomic_long_read(&c->btree_key_cache.nr_keys));
|
||||
|
||||
nr_flushed = journal_flush_pins(j, seq_to_flush,
|
||||
~0, 0,
|
||||
min_nr, min_key_cache);
|
||||
|
||||
if (direct)
|
||||
@ -776,7 +795,11 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
|
||||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
|
||||
if (journal_flush_pins(j, seq_to_flush, 0, 0))
|
||||
if (journal_flush_pins(j, seq_to_flush,
|
||||
(1U << JOURNAL_PIN_key_cache)|
|
||||
(1U << JOURNAL_PIN_other), 0, 0, 0) ||
|
||||
journal_flush_pins(j, seq_to_flush,
|
||||
(1U << JOURNAL_PIN_btree), 0, 0, 0))
|
||||
*did_work = true;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
@ -175,46 +175,45 @@ const struct bch_sb_field_ops bch_sb_field_ops_journal_v2 = {
|
||||
.to_text = bch2_sb_journal_v2_to_text,
|
||||
};
|
||||
|
||||
int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca)
|
||||
int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
|
||||
u64 *buckets, unsigned nr)
|
||||
{
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bch_sb_field_journal_v2 *j;
|
||||
unsigned i, dst = 0, nr = 1;
|
||||
unsigned i, dst = 0, nr_compacted = 1;
|
||||
|
||||
if (c)
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
if (!ja->nr) {
|
||||
if (!nr) {
|
||||
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
|
||||
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal_v2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; i + 1 < ja->nr; i++)
|
||||
if (ja->buckets[i] + 1 != ja->buckets[i + 1])
|
||||
nr++;
|
||||
for (i = 0; i + 1 < nr; i++)
|
||||
if (buckets[i] + 1 != buckets[i + 1])
|
||||
nr_compacted++;
|
||||
|
||||
j = bch2_sb_resize_journal_v2(&ca->disk_sb,
|
||||
(sizeof(*j) + sizeof(j->d[0]) * nr) / sizeof(u64));
|
||||
(sizeof(*j) + sizeof(j->d[0]) * nr_compacted) / sizeof(u64));
|
||||
if (!j)
|
||||
return -BCH_ERR_ENOSPC_sb_journal;
|
||||
|
||||
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
|
||||
|
||||
j->d[dst].start = le64_to_cpu(ja->buckets[0]);
|
||||
j->d[dst].start = le64_to_cpu(buckets[0]);
|
||||
j->d[dst].nr = le64_to_cpu(1);
|
||||
|
||||
for (i = 1; i < ja->nr; i++) {
|
||||
if (ja->buckets[i] == ja->buckets[i - 1] + 1) {
|
||||
for (i = 1; i < nr; i++) {
|
||||
if (buckets[i] == buckets[i - 1] + 1) {
|
||||
le64_add_cpu(&j->d[dst].nr, 1);
|
||||
} else {
|
||||
dst++;
|
||||
j->d[dst].start = le64_to_cpu(ja->buckets[i]);
|
||||
j->d[dst].start = le64_to_cpu(buckets[i]);
|
||||
j->d[dst].nr = le64_to_cpu(1);
|
||||
}
|
||||
}
|
||||
|
||||
BUG_ON(dst + 1 != nr);
|
||||
|
||||
BUG_ON(dst + 1 != nr_compacted);
|
||||
return 0;
|
||||
}
|
||||
|
@ -21,4 +21,4 @@ static inline unsigned bch2_sb_field_journal_v2_nr_entries(struct bch_sb_field_j
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_journal;
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_journal_v2;
|
||||
|
||||
int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *);
|
||||
int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *, u64 *, unsigned);
|
||||
|
@ -43,9 +43,15 @@ struct journal_buf {
|
||||
* flushed:
|
||||
*/
|
||||
|
||||
enum journal_pin_type {
|
||||
JOURNAL_PIN_btree,
|
||||
JOURNAL_PIN_key_cache,
|
||||
JOURNAL_PIN_other,
|
||||
JOURNAL_PIN_NR,
|
||||
};
|
||||
|
||||
struct journal_entry_pin_list {
|
||||
struct list_head list;
|
||||
struct list_head key_cache_list;
|
||||
struct list_head list[JOURNAL_PIN_NR];
|
||||
struct list_head flushed;
|
||||
atomic_t count;
|
||||
struct bch_devs_list devs;
|
||||
|
@ -46,7 +46,7 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
|
||||
struct bkey_i *n;
|
||||
int ret;
|
||||
|
||||
if (!bch2_bkey_has_device(k, dev_idx))
|
||||
if (!bch2_bkey_has_device_c(k, dev_idx))
|
||||
return 0;
|
||||
|
||||
n = bch2_bkey_make_mut(trans, k);
|
||||
@ -130,8 +130,7 @@ retry:
|
||||
while (bch2_trans_begin(&trans),
|
||||
(b = bch2_btree_iter_peek_node(&iter)) &&
|
||||
!(ret = PTR_ERR_OR_ZERO(b))) {
|
||||
if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key),
|
||||
dev_idx))
|
||||
if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
|
||||
goto next;
|
||||
|
||||
bch2_bkey_buf_copy(&k, c, &b->key);
|
||||
|
@ -41,7 +41,8 @@ static void progress_list_del(struct bch_fs *c, struct bch_move_stats *stats)
|
||||
}
|
||||
|
||||
struct moving_io {
|
||||
struct list_head list;
|
||||
struct list_head read_list;
|
||||
struct list_head io_list;
|
||||
struct move_bucket_in_flight *b;
|
||||
struct closure cl;
|
||||
bool read_completed;
|
||||
@ -65,8 +66,12 @@ static void move_free(struct moving_io *io)
|
||||
atomic_dec(&io->b->count);
|
||||
|
||||
bch2_data_update_exit(&io->write);
|
||||
|
||||
mutex_lock(&ctxt->lock);
|
||||
list_del(&io->io_list);
|
||||
wake_up(&ctxt->wait);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_move);
|
||||
mutex_unlock(&ctxt->lock);
|
||||
|
||||
kfree(io);
|
||||
}
|
||||
|
||||
@ -101,7 +106,7 @@ static void move_write(struct moving_io *io)
|
||||
struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt)
|
||||
{
|
||||
struct moving_io *io =
|
||||
list_first_entry_or_null(&ctxt->reads, struct moving_io, list);
|
||||
list_first_entry_or_null(&ctxt->reads, struct moving_io, read_list);
|
||||
|
||||
return io && io->read_completed ? io : NULL;
|
||||
}
|
||||
@ -128,7 +133,7 @@ void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt,
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
|
||||
list_del(&io->list);
|
||||
list_del(&io->read_list);
|
||||
move_write(io);
|
||||
}
|
||||
}
|
||||
@ -145,6 +150,8 @@ static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt,
|
||||
|
||||
void bch2_moving_ctxt_exit(struct moving_context *ctxt)
|
||||
{
|
||||
struct bch_fs *c = ctxt->c;
|
||||
|
||||
move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads));
|
||||
closure_sync(&ctxt->cl);
|
||||
|
||||
@ -154,12 +161,15 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt)
|
||||
EBUG_ON(atomic_read(&ctxt->read_ios));
|
||||
|
||||
if (ctxt->stats) {
|
||||
progress_list_del(ctxt->c, ctxt->stats);
|
||||
|
||||
trace_move_data(ctxt->c,
|
||||
progress_list_del(c, ctxt->stats);
|
||||
trace_move_data(c,
|
||||
atomic64_read(&ctxt->stats->sectors_moved),
|
||||
atomic64_read(&ctxt->stats->keys_moved));
|
||||
}
|
||||
|
||||
mutex_lock(&c->moving_context_lock);
|
||||
list_del(&ctxt->list);
|
||||
mutex_unlock(&c->moving_context_lock);
|
||||
}
|
||||
|
||||
void bch2_moving_ctxt_init(struct moving_context *ctxt,
|
||||
@ -172,15 +182,23 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt,
|
||||
memset(ctxt, 0, sizeof(*ctxt));
|
||||
|
||||
ctxt->c = c;
|
||||
ctxt->fn = (void *) _RET_IP_;
|
||||
ctxt->rate = rate;
|
||||
ctxt->stats = stats;
|
||||
ctxt->wp = wp;
|
||||
ctxt->wait_on_copygc = wait_on_copygc;
|
||||
|
||||
closure_init_stack(&ctxt->cl);
|
||||
|
||||
mutex_init(&ctxt->lock);
|
||||
INIT_LIST_HEAD(&ctxt->reads);
|
||||
INIT_LIST_HEAD(&ctxt->ios);
|
||||
init_waitqueue_head(&ctxt->wait);
|
||||
|
||||
mutex_lock(&c->moving_context_lock);
|
||||
list_add(&ctxt->list, &c->moving_context_list);
|
||||
mutex_unlock(&c->moving_context_lock);
|
||||
|
||||
if (stats) {
|
||||
progress_list_add(c, stats);
|
||||
stats->data_type = BCH_DATA_user;
|
||||
@ -262,9 +280,6 @@ static int bch2_move_extent(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_move))
|
||||
return -BCH_ERR_erofs_no_writes;
|
||||
|
||||
/*
|
||||
* Before memory allocations & taking nocow locks in
|
||||
* bch2_data_update_init():
|
||||
@ -334,9 +349,14 @@ static int bch2_move_extent(struct btree_trans *trans,
|
||||
this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size);
|
||||
trace_move_extent_read(k.k);
|
||||
|
||||
|
||||
mutex_lock(&ctxt->lock);
|
||||
atomic_add(io->read_sectors, &ctxt->read_sectors);
|
||||
atomic_inc(&ctxt->read_ios);
|
||||
list_add_tail(&io->list, &ctxt->reads);
|
||||
|
||||
list_add_tail(&io->read_list, &ctxt->reads);
|
||||
list_add_tail(&io->io_list, &ctxt->ios);
|
||||
mutex_unlock(&ctxt->lock);
|
||||
|
||||
/*
|
||||
* dropped by move_read_endio() - guards against use after free of
|
||||
@ -354,7 +374,6 @@ err_free_pages:
|
||||
err_free:
|
||||
kfree(io);
|
||||
err:
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_move);
|
||||
trace_and_count(c, move_extent_alloc_mem_fail, k.k);
|
||||
return ret;
|
||||
}
|
||||
@ -759,8 +778,13 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
|
||||
data_opts.rewrite_ptrs = 0;
|
||||
|
||||
bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
|
||||
if (ptr->dev == bucket.inode)
|
||||
if (ptr->dev == bucket.inode) {
|
||||
data_opts.rewrite_ptrs |= 1U << i;
|
||||
if (ptr->cached) {
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
@ -819,14 +843,6 @@ next:
|
||||
}
|
||||
|
||||
trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret);
|
||||
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) {
|
||||
bch2_trans_unlock(trans);
|
||||
move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads));
|
||||
closure_sync(&ctxt->cl);
|
||||
if (!ctxt->write_error)
|
||||
bch2_verify_bucket_evacuated(trans, bucket, gen);
|
||||
}
|
||||
err:
|
||||
bch2_bkey_buf_exit(&sk, c);
|
||||
return ret;
|
||||
@ -1111,3 +1127,67 @@ int bch2_data_job(struct bch_fs *c,
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_data_jobs_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct bch_move_stats *stats;
|
||||
|
||||
mutex_lock(&c->data_progress_lock);
|
||||
list_for_each_entry(stats, &c->data_progress_list, list) {
|
||||
prt_printf(out, "%s: data type %s btree_id %s position: ",
|
||||
stats->name,
|
||||
bch2_data_types[stats->data_type],
|
||||
bch2_btree_ids[stats->btree_id]);
|
||||
bch2_bpos_to_text(out, stats->pos);
|
||||
prt_printf(out, "%s", "\n");
|
||||
}
|
||||
mutex_unlock(&c->data_progress_lock);
|
||||
}
|
||||
|
||||
static void bch2_moving_ctxt_to_text(struct printbuf *out, struct moving_context *ctxt)
|
||||
{
|
||||
struct moving_io *io;
|
||||
|
||||
prt_printf(out, "%ps:", ctxt->fn);
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_printf(out, "reads: %u sectors %u",
|
||||
atomic_read(&ctxt->read_ios),
|
||||
atomic_read(&ctxt->read_sectors));
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "writes: %u sectors %u",
|
||||
atomic_read(&ctxt->write_ios),
|
||||
atomic_read(&ctxt->write_sectors));
|
||||
prt_newline(out);
|
||||
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
mutex_lock(&ctxt->lock);
|
||||
list_for_each_entry(io, &ctxt->ios, io_list) {
|
||||
bch2_write_op_to_text(out, &io->write.op);
|
||||
}
|
||||
mutex_unlock(&ctxt->lock);
|
||||
|
||||
printbuf_indent_sub(out, 4);
|
||||
}
|
||||
|
||||
void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct moving_context *ctxt;
|
||||
|
||||
mutex_lock(&c->moving_context_lock);
|
||||
list_for_each_entry(ctxt, &c->moving_context_list, list)
|
||||
bch2_moving_ctxt_to_text(out, ctxt);
|
||||
mutex_unlock(&c->moving_context_lock);
|
||||
}
|
||||
|
||||
void bch2_fs_move_init(struct bch_fs *c)
|
||||
{
|
||||
INIT_LIST_HEAD(&c->moving_context_list);
|
||||
mutex_init(&c->moving_context_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->data_progress_list);
|
||||
mutex_init(&c->data_progress_lock);
|
||||
}
|
||||
|
@ -11,6 +11,9 @@ struct bch_read_bio;
|
||||
|
||||
struct moving_context {
|
||||
struct bch_fs *c;
|
||||
struct list_head list;
|
||||
void *fn;
|
||||
|
||||
struct bch_ratelimit *rate;
|
||||
struct bch_move_stats *stats;
|
||||
struct write_point_specifier wp;
|
||||
@ -19,7 +22,10 @@ struct moving_context {
|
||||
|
||||
/* For waiting on outstanding reads and writes: */
|
||||
struct closure cl;
|
||||
|
||||
struct mutex lock;
|
||||
struct list_head reads;
|
||||
struct list_head ios;
|
||||
|
||||
/* in flight sectors: */
|
||||
atomic_t read_sectors;
|
||||
@ -84,6 +90,9 @@ int bch2_data_job(struct bch_fs *,
|
||||
struct bch_ioctl_data);
|
||||
|
||||
void bch2_move_stats_init(struct bch_move_stats *stats, char *name);
|
||||
void bch2_data_jobs_to_text(struct printbuf *, struct bch_fs *);
|
||||
void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
void bch2_fs_move_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_MOVE_H */
|
||||
|
@ -46,7 +46,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
|
||||
if (bch2_bucket_is_open(trans->c, bucket.inode, bucket.offset))
|
||||
return 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, 0);
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, BTREE_ITER_CACHED);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
@ -85,7 +85,7 @@ static int move_bucket_cmp(const void *_l, const void *_r)
|
||||
const struct move_bucket *l = _l;
|
||||
const struct move_bucket *r = _r;
|
||||
|
||||
return bpos_cmp(l->bucket, r->bucket) ?: cmp_int(l->gen, r->gen);
|
||||
return bkey_cmp(l->bucket, r->bucket);
|
||||
}
|
||||
|
||||
static bool bucket_in_flight(move_buckets *buckets_sorted, struct move_bucket b)
|
||||
@ -178,13 +178,13 @@ static int bch2_copygc(struct btree_trans *trans,
|
||||
move_buckets_in_flight *buckets_in_flight)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_move_stats move_stats;
|
||||
struct data_update_opts data_opts = {
|
||||
.btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc,
|
||||
};
|
||||
move_buckets buckets = { 0 };
|
||||
struct move_bucket_in_flight *f;
|
||||
struct move_bucket *i;
|
||||
u64 moved = atomic64_read(&ctxt->stats->sectors_moved);
|
||||
int ret = 0;
|
||||
|
||||
ret = bch2_btree_write_buffer_flush(trans);
|
||||
@ -192,9 +192,6 @@ static int bch2_copygc(struct btree_trans *trans,
|
||||
__func__, bch2_err_str(ret)))
|
||||
return ret;
|
||||
|
||||
bch2_move_stats_init(&move_stats, "copygc");
|
||||
ctxt->stats = &move_stats;
|
||||
|
||||
ret = bch2_copygc_get_buckets(trans, ctxt, buckets_in_flight, &buckets);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -222,8 +219,8 @@ err:
|
||||
if (ret < 0 && !bch2_err_matches(ret, EROFS))
|
||||
bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
|
||||
|
||||
trace_and_count(c, copygc, c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
|
||||
ctxt->stats = NULL;
|
||||
moved = atomic64_read(&ctxt->stats->sectors_moved) - moved;
|
||||
trace_and_count(c, copygc, c, moved, 0, 0, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -282,6 +279,7 @@ static int bch2_copygc_thread(void *arg)
|
||||
struct bch_fs *c = arg;
|
||||
struct btree_trans trans;
|
||||
struct moving_context ctxt;
|
||||
struct bch_move_stats move_stats;
|
||||
struct io_clock *clock = &c->io_clock[WRITE];
|
||||
move_buckets_in_flight move_buckets;
|
||||
u64 last, wait;
|
||||
@ -294,7 +292,9 @@ static int bch2_copygc_thread(void *arg)
|
||||
|
||||
set_freezable();
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
bch2_moving_ctxt_init(&ctxt, c, NULL, NULL,
|
||||
|
||||
bch2_move_stats_init(&move_stats, "copygc");
|
||||
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
|
||||
writepoint_ptr(&c->copygc_write_point),
|
||||
false);
|
||||
|
||||
@ -334,8 +334,8 @@ static int bch2_copygc_thread(void *arg)
|
||||
wake_up(&c->copygc_running_wq);
|
||||
}
|
||||
|
||||
bch2_moving_ctxt_exit(&ctxt);
|
||||
bch2_trans_exit(&trans);
|
||||
bch2_moving_ctxt_exit(&ctxt);
|
||||
free_fifo(&move_buckets);
|
||||
|
||||
return 0;
|
||||
|
@ -92,6 +92,12 @@ enum opt_type {
|
||||
#define RATELIMIT_ERRORS_DEFAULT false
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
#define BCACHEFS_VERBOSE_DEFAULT true
|
||||
#else
|
||||
#define BCACHEFS_VERBOSE_DEFAULT false
|
||||
#endif
|
||||
|
||||
#define BCH_OPTS() \
|
||||
x(block_size, u16, \
|
||||
OPT_FS|OPT_FORMAT| \
|
||||
@ -276,7 +282,7 @@ enum opt_type {
|
||||
x(verbose, u8, \
|
||||
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
BCH2_NO_SB_OPT, false, \
|
||||
BCH2_NO_SB_OPT, BCACHEFS_VERBOSE_DEFAULT, \
|
||||
NULL, "Extra debugging information during mount/recovery")\
|
||||
x(journal_flush_delay, u32, \
|
||||
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
|
||||
|
@ -189,7 +189,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
|
||||
|
||||
for_each_btree_key_norestart(trans, reflink_iter, BTREE_ID_reflink,
|
||||
POS(0, c->reflink_hint),
|
||||
BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) {
|
||||
BTREE_ITER_SLOTS, k, ret) {
|
||||
if (reflink_iter.pos.inode) {
|
||||
bch2_btree_iter_set_pos(&reflink_iter, POS_MIN);
|
||||
continue;
|
||||
|
@ -513,7 +513,9 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
|
||||
n->v.pad = 0;
|
||||
SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &n->k_i, 0);
|
||||
ret = bch2_trans_update(trans, &iter, &n->k_i, 0) ?:
|
||||
bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
|
||||
bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
|
@ -206,11 +206,15 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
unsigned i, clean_passes = 0;
|
||||
u64 seq = 0;
|
||||
|
||||
bch2_fs_ec_stop(c);
|
||||
bch2_open_buckets_stop(c, NULL, true);
|
||||
bch2_rebalance_stop(c);
|
||||
bch2_copygc_stop(c);
|
||||
bch2_gc_thread_stop(c);
|
||||
bch2_fs_ec_flush(c);
|
||||
|
||||
bch_verbose(c, "flushing journal and stopping allocators");
|
||||
bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu",
|
||||
journal_cur_seq(&c->journal));
|
||||
|
||||
do {
|
||||
clean_passes++;
|
||||
@ -224,7 +228,8 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
}
|
||||
} while (clean_passes < 2);
|
||||
|
||||
bch_verbose(c, "flushing journal and stopping allocators complete");
|
||||
bch_verbose(c, "flushing journal and stopping allocators complete, journal seq %llu",
|
||||
journal_cur_seq(&c->journal));
|
||||
|
||||
if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
|
||||
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
|
||||
@ -679,6 +684,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
bch2_fs_rebalance_init(c);
|
||||
bch2_fs_quota_init(c);
|
||||
bch2_fs_ec_init_early(c);
|
||||
bch2_fs_move_init(c);
|
||||
|
||||
INIT_LIST_HEAD(&c->list);
|
||||
|
||||
@ -697,17 +703,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
INIT_LIST_HEAD(&c->fsck_errors);
|
||||
mutex_init(&c->fsck_error_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_stripe_head_list);
|
||||
mutex_init(&c->ec_stripe_head_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_stripe_new_list);
|
||||
mutex_init(&c->ec_stripe_new_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->data_progress_list);
|
||||
mutex_init(&c->data_progress_lock);
|
||||
|
||||
mutex_init(&c->ec_stripes_heap_lock);
|
||||
|
||||
seqcount_init(&c->gc_pos_lock);
|
||||
|
||||
seqcount_init(&c->usage_lock);
|
||||
|
@ -248,6 +248,7 @@ read_attribute(io_timers_read);
|
||||
read_attribute(io_timers_write);
|
||||
|
||||
read_attribute(data_jobs);
|
||||
read_attribute(moving_ctxts);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_TESTS
|
||||
write_attribute(perf_test);
|
||||
@ -277,25 +278,6 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long data_progress_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
long ret = 0;
|
||||
struct bch_move_stats *stats;
|
||||
|
||||
mutex_lock(&c->data_progress_lock);
|
||||
list_for_each_entry(stats, &c->data_progress_list, list) {
|
||||
prt_printf(out, "%s: data type %s btree_id %s position: ",
|
||||
stats->name,
|
||||
bch2_data_types[stats->data_type],
|
||||
bch2_btree_ids[stats->btree_id]);
|
||||
bch2_bpos_to_text(out, stats->pos);
|
||||
prt_printf(out, "%s", "\n");
|
||||
}
|
||||
|
||||
mutex_unlock(&c->data_progress_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
@ -476,7 +458,10 @@ SHOW(bch2_fs)
|
||||
bch2_io_timers_to_text(out, &c->io_clock[WRITE]);
|
||||
|
||||
if (attr == &sysfs_data_jobs)
|
||||
data_progress_to_text(out, c);
|
||||
bch2_data_jobs_to_text(out, c);
|
||||
|
||||
if (attr == &sysfs_moving_ctxts)
|
||||
bch2_fs_moving_ctxts_to_text(out, c);
|
||||
|
||||
#ifdef BCH_WRITE_REF_DEBUG
|
||||
if (attr == &sysfs_write_refs)
|
||||
@ -693,6 +678,7 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
sysfs_pd_controller_files(rebalance),
|
||||
|
||||
&sysfs_data_jobs,
|
||||
&sysfs_moving_ctxts,
|
||||
|
||||
&sysfs_internal_uuid,
|
||||
NULL
|
||||
|
14
linux/six.c
14
linux/six.c
@ -143,8 +143,17 @@ static int __do_six_trylock_type(struct six_lock *lock,
|
||||
* lock, issue a wakeup because we might have caused a
|
||||
* spurious trylock failure:
|
||||
*/
|
||||
#if 0
|
||||
/*
|
||||
* This code should be sufficient, but we're seeing unexplained
|
||||
* lost wakeups:
|
||||
*/
|
||||
if (old.write_locking)
|
||||
ret = -1 - SIX_LOCK_write;
|
||||
#else
|
||||
if (!ret)
|
||||
ret = -1 - SIX_LOCK_write;
|
||||
#endif
|
||||
} else if (type == SIX_LOCK_write && lock->readers) {
|
||||
if (try) {
|
||||
atomic64_add(__SIX_VAL(write_locking, 1),
|
||||
@ -320,11 +329,10 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
* Similar to the lock path, we may have caused a spurious write
|
||||
* lock fail and need to issue a wakeup:
|
||||
*/
|
||||
if (old.write_locking)
|
||||
six_lock_wakeup(lock, old, SIX_LOCK_write);
|
||||
|
||||
if (ret)
|
||||
six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
|
||||
else
|
||||
six_lock_wakeup(lock, old, SIX_LOCK_write);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user