Update bcachefs sources to 72405e7ff8 bcachefs: Fix bch2_check_extents_to_backpointers()

This commit is contained in:
Kent Overstreet 2023-03-14 12:56:38 -04:00
parent 46ba4fb48c
commit fa35853772
40 changed files with 824 additions and 522 deletions

View File

@ -1 +1 @@
3856459b1b9f37cebee2bca3c9edcafaf393aa98
72405e7ff8c5fb569b74b046d19866ee480f29b7

View File

@ -1006,7 +1006,7 @@ static bool next_bucket(struct bch_fs *c, struct bpos *bucket)
iter = bucket->inode;
ca = __bch2_next_dev(c, &iter, NULL);
if (ca)
bucket->offset = ca->mi.first_bucket;
*bucket = POS(ca->dev_idx, ca->mi.first_bucket);
rcu_read_unlock();
return ca != NULL;
@ -2158,43 +2158,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
*/
bch2_recalc_capacity(c);
/* Next, close write points that point to this device... */
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
bch2_writepoint_stop(c, ca, &c->write_points[i]);
bch2_writepoint_stop(c, ca, &c->copygc_write_point);
bch2_writepoint_stop(c, ca, &c->rebalance_write_point);
bch2_writepoint_stop(c, ca, &c->btree_write_point);
mutex_lock(&c->btree_reserve_cache_lock);
while (c->btree_reserve_cache_nr) {
struct btree_alloc *a =
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
bch2_open_buckets_put(c, &a->ob);
}
mutex_unlock(&c->btree_reserve_cache_lock);
spin_lock(&c->freelist_lock);
i = 0;
while (i < c->open_buckets_partial_nr) {
struct open_bucket *ob =
c->open_buckets + c->open_buckets_partial[i];
if (ob->dev == ca->dev_idx) {
swap(c->open_buckets_partial[i],
c->open_buckets_partial[--c->open_buckets_partial_nr]);
ob->on_partial_list = false;
spin_unlock(&c->freelist_lock);
bch2_open_bucket_put(c, ob);
spin_lock(&c->freelist_lock);
} else {
i++;
}
}
spin_unlock(&c->freelist_lock);
bch2_ec_stop_dev(c, ca);
bch2_open_buckets_stop(c, ca, false);
/*
* Wake up threads that were blocked on allocation, so they can notice

View File

@ -216,7 +216,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
u64 free = max_t(s64, 0,
u.d[BCH_DATA_free].buckets
+ u.d[BCH_DATA_need_discard].buckets
- bch2_dev_buckets_reserved(ca, RESERVE_none));
- bch2_dev_buckets_reserved(ca, RESERVE_stripe));
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
}

View File

@ -97,7 +97,7 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
if (ob->ec) {
ec_stripe_new_put(c, ob->ec);
ec_stripe_new_put(c, ob->ec, STRIPE_REF_io);
return;
}
@ -658,9 +658,11 @@ static int add_new_bucket(struct bch_fs *c,
bch_dev_bkey_exists(c, ob->dev)->mi.durability;
BUG_ON(*nr_effective >= nr_replicas);
BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
__clear_bit(ob->dev, devs_may_alloc->d);
*nr_effective += durability;
*nr_effective += (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)
? durability : 1;
*have_cache |= !durability;
ob_push(c, ptrs, ob);
@ -679,6 +681,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
unsigned flags,
enum bch_data_type data_type,
enum alloc_reserve reserve,
struct closure *cl)
@ -729,7 +732,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
if (add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, 0, ob)) {
have_cache, flags, ob)) {
ret = 0;
break;
}
@ -796,7 +799,7 @@ got_bucket:
ob->ec_idx = ec_idx;
ob->ec = h->s;
ec_stripe_new_get(h->s);
ec_stripe_new_get(h->s, STRIPE_REF_io);
ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
@ -823,7 +826,7 @@ static bool want_bucket(struct bch_fs *c,
return false;
if (!ca->mi.durability &&
(wp->data_type != BCH_DATA_user || !*have_cache))
(wp->data_type == BCH_DATA_btree || ec || *have_cache))
return false;
if (ec != (ob->ec != NULL))
@ -877,6 +880,9 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
spin_lock(&c->freelist_lock);
if (!c->open_buckets_partial_nr)
goto unlock;
for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
@ -902,7 +908,7 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
break;
}
}
unlock:
spin_unlock(&c->freelist_lock);
return ret;
}
@ -967,7 +973,7 @@ retry_blocking:
*/
ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
nr_replicas, nr_effective, have_cache,
wp->data_type, reserve, cl);
flags, wp->data_type, reserve, cl);
if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
!bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
@ -1017,43 +1023,94 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
return ret < 0 ? ret : 0;
}
void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
struct open_buckets *obs)
static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c,
struct bch_dev *ca, bool ec)
{
struct open_buckets ptrs = { .nr = 0 };
struct open_bucket *ob, *ob2;
unsigned i, j;
open_bucket_for_each(c, obs, ob, i) {
bool drop = !ca || ob->dev == ca->dev_idx;
if (ec) {
return ob->ec != NULL;
} else if (ca) {
bool drop = ob->dev == ca->dev_idx;
struct open_bucket *ob2;
unsigned i;
if (!drop && ob->ec) {
mutex_lock(&ob->ec->lock);
for (j = 0; j < ob->ec->new_stripe.key.v.nr_blocks; j++) {
if (!ob->ec->blocks[j])
for (i = 0; i < ob->ec->new_stripe.key.v.nr_blocks; i++) {
if (!ob->ec->blocks[i])
continue;
ob2 = c->open_buckets + ob->ec->blocks[j];
ob2 = c->open_buckets + ob->ec->blocks[i];
drop |= ob2->dev == ca->dev_idx;
}
mutex_unlock(&ob->ec->lock);
}
if (drop)
return drop;
} else {
return true;
}
}
static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
bool ec, struct write_point *wp)
{
struct open_buckets ptrs = { .nr = 0 };
struct open_bucket *ob;
unsigned i;
mutex_lock(&wp->lock);
open_bucket_for_each(c, &wp->ptrs, ob, i)
if (should_drop_bucket(ob, c, ca, ec))
bch2_open_bucket_put(c, ob);
else
ob_push(c, &ptrs, ob);
}
*obs = ptrs;
wp->ptrs = ptrs;
mutex_unlock(&wp->lock);
}
void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
struct write_point *wp)
void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
bool ec)
{
mutex_lock(&wp->lock);
bch2_open_buckets_stop_dev(c, ca, &wp->ptrs);
mutex_unlock(&wp->lock);
unsigned i;
/* Next, close write points that point to this device... */
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
bch2_writepoint_stop(c, ca, ec, &c->write_points[i]);
bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point);
bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point);
bch2_writepoint_stop(c, ca, ec, &c->btree_write_point);
mutex_lock(&c->btree_reserve_cache_lock);
while (c->btree_reserve_cache_nr) {
struct btree_alloc *a =
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
bch2_open_buckets_put(c, &a->ob);
}
mutex_unlock(&c->btree_reserve_cache_lock);
spin_lock(&c->freelist_lock);
i = 0;
while (i < c->open_buckets_partial_nr) {
struct open_bucket *ob =
c->open_buckets + c->open_buckets_partial[i];
if (should_drop_bucket(ob, c, ca, ec)) {
--c->open_buckets_partial_nr;
swap(c->open_buckets_partial[i],
c->open_buckets_partial[c->open_buckets_partial_nr]);
ob->on_partial_list = false;
spin_unlock(&c->freelist_lock);
bch2_open_bucket_put(c, ob);
spin_lock(&c->freelist_lock);
} else {
i++;
}
}
spin_unlock(&c->freelist_lock);
bch2_ec_stop_dev(c, ca);
}
static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
@ -1101,8 +1158,7 @@ static bool try_increase_writepoints(struct bch_fs *c)
return true;
}
static bool try_decrease_writepoints(struct bch_fs *c,
unsigned old_nr)
static bool try_decrease_writepoints(struct bch_fs *c, unsigned old_nr)
{
struct write_point *wp;
@ -1123,7 +1179,7 @@ static bool try_decrease_writepoints(struct bch_fs *c,
hlist_del_rcu(&wp->node);
mutex_unlock(&c->write_points_hash_lock);
bch2_writepoint_stop(c, NULL, wp);
bch2_writepoint_stop(c, NULL, false, wp);
return true;
}
@ -1217,6 +1273,8 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
int ret;
int i;
BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
BUG_ON(!nr_replicas || !nr_replicas_required);
retry:
ptrs.nr = 0;
@ -1230,13 +1288,7 @@ retry:
if (wp->data_type != BCH_DATA_user)
have_cache = true;
if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
flags, cl);
} else {
if (target && !(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
@ -1246,11 +1298,28 @@ retry:
bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto alloc_done;
/* Don't retry from all devices if we're out of open buckets: */
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
goto allocate_blocking;
/*
* Only try to allocate cache (durability = 0 devices) from the
* specified target:
*/
have_cache = true;
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
0, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
flags, cl);
} else {
allocate_blocking:
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
flags, cl);
}
alloc_done:
BUG_ON(!ret && nr_effective < nr_replicas);
@ -1380,14 +1449,16 @@ void bch2_fs_allocator_foreground_init(struct bch_fs *c)
static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
unsigned data_type = ob->data_type;
barrier(); /* READ_ONCE() doesn't work on bitfields */
prt_printf(out, "%zu ref %u %s %u:%llu gen %u",
prt_printf(out, "%zu ref %u %s %u:%llu gen %u allocated %u/%u",
ob - c->open_buckets,
atomic_read(&ob->pin),
data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type",
ob->dev, ob->bucket, ob->gen);
ob->dev, ob->bucket, ob->gen,
ca->mi.bucket_size - ob->sectors_free, ca->mi.bucket_size);
if (ob->ec)
prt_printf(out, " ec idx %llu", ob->ec->idx);
if (ob->on_partial_list)

View File

@ -151,7 +151,7 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
struct dev_stripe_state *, struct bch_devs_mask *,
unsigned, unsigned *, bool *,
unsigned, unsigned *, bool *, unsigned,
enum bch_data_type, enum alloc_reserve,
struct closure *);
@ -202,11 +202,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
struct bkey_i *, unsigned, bool);
void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
struct open_buckets *);
void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
struct write_point *);
void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *, bool);
static inline struct write_point_specifier writepoint_hashed(unsigned long v)
{

View File

@ -549,13 +549,18 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
bch2_check_btree_backpointer(&trans, &iter, k)));
}
struct bpos_level {
unsigned level;
struct bpos pos;
};
static int check_bp_exists(struct btree_trans *trans,
struct bpos bucket_pos,
struct bch_backpointer bp,
struct bkey_s_c orig_k,
struct bpos bucket_start,
struct bpos bucket_end,
struct bpos *last_flushed_pos)
struct bpos_level *last_flushed)
{
struct bch_fs *c = trans->c;
struct btree_iter alloc_iter, bp_iter = { NULL };
@ -600,8 +605,11 @@ static int check_bp_exists(struct btree_trans *trans,
if (bp_k.k->type != KEY_TYPE_backpointer ||
memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
if (!bpos_eq(*last_flushed_pos, orig_k.k->p)) {
*last_flushed_pos = orig_k.k->p;
if (last_flushed->level != bp.level ||
!bpos_eq(last_flushed->pos, orig_k.k->p)) {
last_flushed->level = bp.level;
last_flushed->pos = orig_k.k->p;
ret = bch2_btree_write_buffer_flush_sync(trans) ?:
-BCH_ERR_transaction_restart_write_buffer_flush;
goto out;
@ -639,7 +647,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
struct btree_iter *iter,
struct bpos bucket_start,
struct bpos bucket_end,
struct bpos *last_flushed_pos)
struct bpos_level *last_flushed)
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs;
@ -668,7 +676,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
ret = check_bp_exists(trans, bucket_pos, bp, k,
bucket_start, bucket_end,
last_flushed_pos);
last_flushed);
if (ret)
return ret;
}
@ -680,7 +688,7 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans,
enum btree_id btree_id,
struct bpos bucket_start,
struct bpos bucket_end,
struct bpos *last_flushed_pos)
struct bpos_level *last_flushed)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@ -709,12 +717,12 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans,
if (p.ptr.cached)
continue;
bch2_extent_ptr_to_bp(c, iter.btree_id, iter.path->level + 1,
bch2_extent_ptr_to_bp(c, iter.btree_id, b->c.level + 1,
k, p, &bucket_pos, &bp);
ret = check_bp_exists(trans, bucket_pos, bp, k,
bucket_start, bucket_end,
last_flushed_pos);
last_flushed);
if (ret)
goto err;
}
@ -794,7 +802,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
{
struct btree_iter iter;
enum btree_id btree_id;
struct bpos last_flushed_pos = SPOS_MAX;
struct bpos_level last_flushed = { UINT_MAX };
int ret = 0;
for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
@ -811,7 +819,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
BTREE_INSERT_NOFAIL,
check_extent_to_backpointers(trans, &iter,
bucket_start, bucket_end,
&last_flushed_pos));
&last_flushed));
if (ret)
break;
} while (!bch2_btree_iter_advance(&iter));
@ -826,7 +834,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
BTREE_INSERT_NOFAIL,
check_btree_root_to_backpointers(trans, btree_id,
bucket_start, bucket_end,
&last_flushed_pos));
&last_flushed));
if (ret)
break;
}

View File

@ -214,8 +214,11 @@
#define BCH_WRITE_REF_DEBUG
#endif
#ifndef dynamic_fault
#define dynamic_fault(...) 0
#define race_fault(...) 0
#endif
#define race_fault(...) dynamic_fault("bcachefs:race")
#define trace_and_count(_c, _name, ...) \
do { \
@ -652,7 +655,6 @@ typedef struct {
x(fallocate) \
x(discard) \
x(invalidate) \
x(move) \
x(delete_dead_snapshots) \
x(snapshot_delete_pagecache) \
x(sysfs)
@ -922,6 +924,13 @@ struct bch_fs {
mempool_t large_bkey_pool;
/* MOVE.C */
struct list_head moving_context_list;
struct mutex moving_context_lock;
struct list_head data_progress_list;
struct mutex data_progress_lock;
/* REBALANCE */
struct bch_fs_rebalance rebalance;
@ -932,10 +941,6 @@ struct bch_fs {
bool copygc_running;
wait_queue_head_t copygc_running_wq;
/* DATA PROGRESS STATS */
struct list_head data_progress_list;
struct mutex data_progress_lock;
/* STRIPES: */
GENRADIX(struct stripe) stripes;
GENRADIX(struct gc_stripe) gc_stripes;
@ -952,14 +957,14 @@ struct bch_fs {
struct list_head ec_stripe_new_list;
struct mutex ec_stripe_new_lock;
wait_queue_head_t ec_stripe_new_wait;
struct work_struct ec_stripe_create_work;
u64 ec_stripe_hint;
struct bio_set ec_bioset;
struct work_struct ec_stripe_delete_work;
struct llist_head ec_stripe_delete_list;
struct bio_set ec_bioset;
/* REFLINK */
u64 reflink_hint;

View File

@ -16,7 +16,7 @@
#include "replicas.h"
#include "subvolume.h"
#include <linux/prandom.h>
#include <linux/random.h>
#include <linux/prefetch.h>
#include <trace/events/bcachefs.h>

View File

@ -770,11 +770,11 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans,
bool bch2_btree_insert_key_cached(struct btree_trans *trans,
unsigned flags,
struct btree_path *path,
struct bkey_i *insert)
struct btree_insert_entry *insert_entry)
{
struct bch_fs *c = trans->c;
struct bkey_cached *ck = (void *) path->l[0].b;
struct bkey_cached *ck = (void *) insert_entry->path->l[0].b;
struct bkey_i *insert = insert_entry->k;
bool kick_reclaim = false;
BUG_ON(insert->k.u64s > ck->u64s);
@ -802,9 +802,24 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
kick_reclaim = true;
}
/*
* To minimize lock contention, we only add the journal pin here and
* defer pin updates to the flush callback via ->seq. Be careful not to
* update ->seq on nojournal commits because we don't want to update the
* pin to a seq that doesn't include journal updates on disk. Otherwise
* we risk losing the update after a crash.
*
* The only exception is if the pin is not active in the first place. We
* have to add the pin because journal reclaim drives key cache
* flushing. The flush callback will not proceed unless ->seq matches
* the latest pin, so make sure it starts with a consistent value.
*/
if (!(insert_entry->flags & BTREE_UPDATE_NOJOURNAL) ||
!journal_pin_active(&ck->journal)) {
ck->seq = trans->journal_res.seq;
}
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
&ck->journal, bch2_btree_key_cache_journal_flush);
ck->seq = trans->journal_res.seq;
if (kick_reclaim)
journal_reclaim_kick(&c->journal);

View File

@ -30,7 +30,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *,
unsigned);
bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned,
struct btree_path *, struct bkey_i *);
struct btree_insert_entry *);
int bch2_btree_key_cache_flush(struct btree_trans *,
enum btree_id, struct bpos);
void bch2_btree_key_cache_drop(struct btree_trans *,

View File

@ -388,6 +388,40 @@ int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *p
return ret;
}
void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
struct btree_path *path,
struct btree_bkey_cached_common *b)
{
struct btree_path *linked;
unsigned i;
int ret;
/*
* XXX BIG FAT NOTICE
*
* Drop all read locks before taking a write lock:
*
* This is a hack, because bch2_btree_node_lock_write_nofail() is a
* hack - but by dropping read locks first, this should never fail, and
* we only use this in code paths where whatever read locks we've
* already taken are no longer needed:
*/
trans_for_each_path(trans, linked) {
if (!linked->nodes_locked)
continue;
for (i = 0; i < BTREE_MAX_DEPTH; i++)
if (btree_node_read_locked(linked, i)) {
btree_node_unlock(trans, linked, i);
btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
}
}
ret = __btree_node_lock_write(trans, path, b, true);
BUG_ON(ret);
}
/* relock */
static inline bool btree_path_get_locks(struct btree_trans *trans,

View File

@ -299,15 +299,6 @@ static inline int __btree_node_lock_write(struct btree_trans *trans,
: __bch2_btree_node_lock_write(trans, path, b, lock_may_not_fail);
}
static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
struct btree_path *path,
struct btree_bkey_cached_common *b)
{
int ret = __btree_node_lock_write(trans, path, b, true);
BUG_ON(ret);
}
static inline int __must_check
bch2_btree_node_lock_write(struct btree_trans *trans,
struct btree_path *path,
@ -316,6 +307,10 @@ bch2_btree_node_lock_write(struct btree_trans *trans,
return __btree_node_lock_write(trans, path, b, false);
}
void bch2_btree_node_lock_write_nofail(struct btree_trans *,
struct btree_path *,
struct btree_bkey_cached_common *);
/* relock: */
bool bch2_btree_path_relock_norestart(struct btree_trans *,

View File

@ -13,6 +13,9 @@ void bch2_btree_node_prep_for_write(struct btree_trans *,
bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
struct btree *, struct btree_node_iter *,
struct bkey_i *);
int bch2_btree_node_flush0(struct journal *, struct journal_entry_pin *, u64);
int bch2_btree_node_flush1(struct journal *, struct journal_entry_pin *, u64);
void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,

View File

@ -227,12 +227,12 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
return 0;
}
static int btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
int bch2_btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
{
return __btree_node_flush(j, pin, 0, seq);
}
static int btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
int bch2_btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
{
return __btree_node_flush(j, pin, 1, seq);
}
@ -244,8 +244,8 @@ inline void bch2_btree_add_journal_pin(struct bch_fs *c,
bch2_journal_pin_add(&c->journal, seq, &w->journal,
btree_node_write_idx(b) == 0
? btree_node_flush0
: btree_node_flush1);
? bch2_btree_node_flush0
: bch2_btree_node_flush1);
}
/**
@ -765,7 +765,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
if (!i->cached)
btree_insert_key_leaf(trans, i);
else if (!i->key_cache_already_flushed)
bch2_btree_insert_key_cached(trans, flags, i->path, i->k);
bch2_btree_insert_key_cached(trans, flags, i);
else {
bch2_btree_key_cache_drop(trans, i->path);
btree_path_set_dirty(i->path, BTREE_ITER_NEED_TRAVERSE);

View File

@ -1855,7 +1855,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
if (IS_ERR(a))
return PTR_ERR(a);
if (a->v.data_type && a->v.data_type != type) {
if (a->v.data_type && type && a->v.data_type != type) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
"while marking %s",

View File

@ -92,18 +92,6 @@ static int insert_snapshot_whiteouts(struct btree_trans *trans,
return ret;
}
static void bch2_bkey_mark_dev_cached(struct bkey_s k, unsigned dev)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
struct bch_extent_ptr *ptr;
bkey_for_each_ptr(ptrs, ptr)
if (ptr->dev == dev) {
bch2_extent_ptr_set_cached(k, ptr);
return;
}
}
static int __bch2_data_update_index_update(struct btree_trans *trans,
struct bch_write_op *op)
{
@ -126,15 +114,17 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
while (1) {
struct bkey_s_c k;
struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
struct bkey_i *insert;
struct bkey_i *insert = NULL;
struct bkey_i_extent *new;
const union bch_extent_entry *entry;
const union bch_extent_entry *entry_c;
union bch_extent_entry *entry;
struct extent_ptr_decoded p;
struct bch_extent_ptr *ptr;
const struct bch_extent_ptr *ptr_c;
struct bpos next_pos;
bool did_work = false;
bool should_check_enospc;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
unsigned i;
unsigned rewrites_found = 0, durability, i;
bch2_trans_begin(trans);
@ -146,7 +136,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
new = bkey_i_to_extent(bch2_keylist_front(keys));
if (!bch2_extents_match(k, old))
goto nomatch;
goto nowork;
bkey_reassemble(_insert.k, k);
insert = _insert.k;
@ -169,50 +159,60 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
* Fist, drop rewrite_ptrs from @new:
*/
i = 0;
bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) {
bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry_c) {
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
bch2_extent_has_ptr(old, p, bkey_i_to_s_c(insert))) {
/*
* If we're going to be adding a pointer to the
* same device, we have to drop the old one -
* otherwise, we can just mark it cached:
*/
if (bch2_bkey_has_device(bkey_i_to_s_c(&new->k_i), p.ptr.dev))
bch2_bkey_drop_device_noerror(bkey_i_to_s(insert), p.ptr.dev);
else
bch2_bkey_mark_dev_cached(bkey_i_to_s(insert), p.ptr.dev);
(ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
!ptr->cached) {
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
rewrites_found |= 1U << i;
}
i++;
}
if (m->data_opts.rewrite_ptrs &&
!rewrites_found &&
bch2_bkey_durability(c, k) >= m->op.opts.data_replicas)
goto nowork;
/* Add new ptrs: */
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
const struct bch_extent_ptr *existing_ptr =
bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev);
if (existing_ptr && existing_ptr->cached) {
/*
* We're replacing a cached pointer with a non
* cached pointer:
*/
bch2_bkey_drop_device_noerror(bkey_i_to_s(insert),
existing_ptr->dev);
} else if (existing_ptr) {
/*
* raced with another move op? extent already
* has a pointer to the device we just wrote
* data to
*/
continue;
/*
* A replica that we just wrote might conflict with a replica
* that we want to keep, due to racing with another move:
*/
restart_drop_conflicting_replicas:
extent_for_each_ptr(extent_i_to_s(new), ptr)
if ((ptr_c = bch2_bkey_has_device_c(bkey_i_to_s_c(insert), ptr->dev)) &&
!ptr_c->cached) {
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(&new->k_i), ptr);
goto restart_drop_conflicting_replicas;
}
bch2_extent_ptr_decoded_append(insert, &p);
did_work = true;
if (!bkey_val_u64s(&new->k))
goto nowork;
/* Now, drop pointers that conflict with what we just wrote: */
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry)
if ((ptr = bch2_bkey_has_device(bkey_i_to_s(insert), p.ptr.dev)))
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr);
durability = bch2_bkey_durability(c, bkey_i_to_s_c(insert)) +
bch2_bkey_durability(c, bkey_i_to_s_c(&new->k_i));
/* Now, drop excess replicas: */
restart_drop_extra_replicas:
bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs(bkey_i_to_s(insert)), p, entry) {
unsigned ptr_durability = bch2_extent_ptr_durability(c, &p);
if (!p.ptr.cached &&
durability - ptr_durability >= m->op.opts.data_replicas) {
durability -= ptr_durability;
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
goto restart_drop_extra_replicas;
}
}
if (!did_work)
goto nomatch;
/* Finally, add the pointers we just wrote: */
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry)
bch2_extent_ptr_decoded_append(insert, &p);
bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 });
bch2_extent_normalize(c, bkey_i_to_s(insert));
@ -253,6 +253,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
bch2_trans_commit(trans, &op->res,
NULL,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
m->data_opts.btree_insert_flags);
if (!ret) {
@ -273,7 +274,7 @@ next:
goto out;
}
continue;
nomatch:
nowork:
if (m->ctxt && m->ctxt->stats) {
BUG_ON(k.k->p.offset <= iter.pos.offset);
atomic64_inc(&m->ctxt->stats->keys_raced);

View File

@ -659,14 +659,13 @@ static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s)
static u64 stripe_idx_to_delete(struct bch_fs *c)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
size_t heap_idx;
lockdep_assert_held(&c->ec_stripes_heap_lock);
for (heap_idx = 0; heap_idx < h->used; heap_idx++)
if (h->data[heap_idx].blocks_nonempty == 0 &&
!bch2_stripe_is_open(c, h->data[heap_idx].idx))
return h->data[heap_idx].idx;
if (h->used &&
h->data[0].blocks_nonempty == 0 &&
!bch2_stripe_is_open(c, h->data[0].idx))
return h->data[0].idx;
return 0;
}
@ -959,7 +958,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
bkey_reassemble(n, k);
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev);
ec_ptr = (void *) bch2_bkey_has_device(bkey_i_to_s_c(n), dev);
ec_ptr = bch2_bkey_has_device(bkey_i_to_s(n), dev);
BUG_ON(!ec_ptr);
stripe_ptr = (struct bch_extent_stripe_ptr) {
@ -990,6 +989,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
while (1) {
ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL,
ec_stripe_update_extent(trans, bucket_pos, bucket.gen,
s, &bp_offset));
@ -1057,6 +1057,13 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
s->err = ret;
}
void bch2_ec_stripe_new_free(struct bch_fs *c, struct ec_stripe_new *s)
{
if (s->idx)
bch2_stripe_close(c, s);
kfree(s);
}
/*
* data buckets of new stripe all written: create the stripe
*/
@ -1072,13 +1079,15 @@ static void ec_stripe_create(struct ec_stripe_new *s)
closure_sync(&s->iodone);
for (i = 0; i < nr_data; i++)
if (s->blocks[i]) {
ob = c->open_buckets + s->blocks[i];
if (!s->err) {
for (i = 0; i < nr_data; i++)
if (s->blocks[i]) {
ob = c->open_buckets + s->blocks[i];
if (ob->sectors_free)
zero_out_rest_of_ec_bucket(c, s, i, ob);
}
if (ob->sectors_free)
zero_out_rest_of_ec_bucket(c, s, i, ob);
}
}
if (s->err) {
if (!bch2_err_matches(s->err, EROFS))
@ -1119,7 +1128,9 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err;
}
ret = bch2_trans_do(c, &s->res, NULL, BTREE_INSERT_NOFAIL,
ret = bch2_trans_do(c, &s->res, NULL,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL,
ec_stripe_key_update(&trans, &s->new_stripe.key,
!s->have_existing_stripe));
if (ret) {
@ -1152,13 +1163,11 @@ err:
list_del(&s->list);
mutex_unlock(&c->ec_stripe_new_lock);
if (s->idx)
bch2_stripe_close(c, s);
ec_stripe_buf_exit(&s->existing_stripe);
ec_stripe_buf_exit(&s->new_stripe);
closure_debug_destroy(&s->iodone);
kfree(s);
ec_stripe_new_put(c, s, STRIPE_REF_stripe);
}
static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
@ -1167,7 +1176,7 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
mutex_lock(&c->ec_stripe_new_lock);
list_for_each_entry(s, &c->ec_stripe_new_list, list)
if (!atomic_read(&s->pin))
if (!atomic_read(&s->ref[STRIPE_REF_io]))
goto out;
s = NULL;
out:
@ -1209,7 +1218,7 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
list_add(&s->list, &c->ec_stripe_new_list);
mutex_unlock(&c->ec_stripe_new_lock);
ec_stripe_new_put(c, s);
ec_stripe_new_put(c, s, STRIPE_REF_io);
}
void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
@ -1321,7 +1330,8 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
mutex_init(&s->lock);
closure_init(&s->iodone, NULL);
atomic_set(&s->pin, 1);
atomic_set(&s->ref[STRIPE_REF_stripe], 1);
atomic_set(&s->ref[STRIPE_REF_io], 1);
s->c = c;
s->h = h;
s->nr_data = min_t(unsigned, h->nr_active_devs,
@ -1402,6 +1412,11 @@ struct ec_stripe_head *__bch2_ec_stripe_head_get(struct btree_trans *trans,
if (ret)
return ERR_PTR(ret);
if (test_bit(BCH_FS_GOING_RO, &c->flags)) {
h = ERR_PTR(-EROFS);
goto found;
}
list_for_each_entry(h, &c->ec_stripe_head_list, list)
if (h->target == target &&
h->algo == algo &&
@ -1451,7 +1466,7 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
&devs,
h->s->nr_parity,
&nr_have_parity,
&have_cache,
&have_cache, 0,
BCH_DATA_parity,
reserve,
cl);
@ -1478,7 +1493,7 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
&devs,
h->s->nr_data,
&nr_have_data,
&have_cache,
&have_cache, 0,
BCH_DATA_user,
reserve,
cl);
@ -1706,6 +1721,14 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
goto err;
if (reserve == RESERVE_movinggc) {
ret = new_stripe_alloc_buckets(trans, h, reserve, NULL) ?:
__bch2_ec_stripe_head_reserve(trans, h);
if (ret)
goto err;
goto allocate_buf;
}
/* XXX freelist_wait? */
closure_wait(&c->freelist_wait, cl);
waiting = true;
@ -1738,7 +1761,7 @@ err:
return ERR_PTR(ret);
}
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
{
struct ec_stripe_head *h;
struct open_bucket *ob;
@ -1746,11 +1769,13 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
mutex_lock(&c->ec_stripe_head_lock);
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
mutex_lock(&h->lock);
if (!h->s)
goto unlock;
if (!ca)
goto found;
for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) {
if (!h->s->blocks[i])
continue;
@ -1769,6 +1794,32 @@ unlock:
mutex_unlock(&c->ec_stripe_head_lock);
}
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
{
__bch2_ec_stop(c, ca);
}
void bch2_fs_ec_stop(struct bch_fs *c)
{
__bch2_ec_stop(c, NULL);
}
static bool bch2_fs_ec_flush_done(struct bch_fs *c)
{
bool ret;
mutex_lock(&c->ec_stripe_new_lock);
ret = list_empty(&c->ec_stripe_new_list);
mutex_unlock(&c->ec_stripe_new_lock);
return ret;
}
void bch2_fs_ec_flush(struct bch_fs *c)
{
wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c));
}
int bch2_stripes_read(struct bch_fs *c)
{
struct btree_trans trans;
@ -1821,13 +1872,16 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
size_t i;
mutex_lock(&c->ec_stripes_heap_lock);
for (i = 0; i < min_t(size_t, h->used, 20); i++) {
for (i = 0; i < min_t(size_t, h->used, 50); i++) {
m = genradix_ptr(&c->stripes, h->data[i].idx);
prt_printf(out, "%zu %u/%u+%u\n", h->data[i].idx,
prt_printf(out, "%zu %u/%u+%u", h->data[i].idx,
h->data[i].blocks_nonempty,
m->nr_blocks - m->nr_redundant,
m->nr_redundant);
if (bch2_stripe_is_open(c, h->data[i].idx))
prt_str(out, " open");
prt_newline(out);
}
mutex_unlock(&c->ec_stripes_heap_lock);
}
@ -1839,22 +1893,27 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
mutex_lock(&c->ec_stripe_head_lock);
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
prt_printf(out, "target %u algo %u redundancy %u:\n",
h->target, h->algo, h->redundancy);
prt_printf(out, "target %u algo %u redundancy %u %s:\n",
h->target, h->algo, h->redundancy,
bch2_alloc_reserves[h->reserve]);
if (h->s)
prt_printf(out, "\tpending: idx %llu blocks %u+%u allocated %u\n",
prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n",
h->s->idx, h->s->nr_data, h->s->nr_parity,
bitmap_weight(h->s->blocks_allocated,
h->s->nr_data));
}
mutex_unlock(&c->ec_stripe_head_lock);
prt_printf(out, "in flight:\n");
mutex_lock(&c->ec_stripe_new_lock);
list_for_each_entry(s, &c->ec_stripe_new_list, list) {
prt_printf(out, "\tin flight: idx %llu blocks %u+%u pin %u\n",
prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n",
s->idx, s->nr_data, s->nr_parity,
atomic_read(&s->pin));
atomic_read(&s->ref[STRIPE_REF_io]),
atomic_read(&s->ref[STRIPE_REF_stripe]),
bch2_alloc_reserves[s->h->reserve]);
}
mutex_unlock(&c->ec_stripe_new_lock);
}
@ -1892,14 +1951,22 @@ void bch2_fs_ec_exit(struct bch_fs *c)
void bch2_fs_ec_init_early(struct bch_fs *c)
{
spin_lock_init(&c->ec_stripes_new_lock);
mutex_init(&c->ec_stripes_heap_lock);
INIT_LIST_HEAD(&c->ec_stripe_head_list);
mutex_init(&c->ec_stripe_head_lock);
INIT_LIST_HEAD(&c->ec_stripe_new_list);
mutex_init(&c->ec_stripe_new_lock);
init_waitqueue_head(&c->ec_stripe_new_wait);
INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
}
int bch2_fs_ec_init(struct bch_fs *c)
{
spin_lock_init(&c->ec_stripes_new_lock);
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
BIOSET_NEED_BVECS);
}

View File

@ -143,6 +143,12 @@ struct ec_stripe_buf {
struct ec_stripe_head;
enum ec_stripe_ref {
STRIPE_REF_io,
STRIPE_REF_stripe,
STRIPE_REF_NR
};
struct ec_stripe_new {
struct bch_fs *c;
struct ec_stripe_head *h;
@ -154,8 +160,7 @@ struct ec_stripe_new {
struct closure iodone;
/* counts in flight writes, stripe is created when pin == 0 */
atomic_t pin;
atomic_t ref[STRIPE_REF_NR];
int err;
@ -213,24 +218,35 @@ void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
void bch2_do_stripe_deletes(struct bch_fs *);
void bch2_ec_do_stripe_creates(struct bch_fs *);
void bch2_ec_stripe_new_free(struct bch_fs *, struct ec_stripe_new *);
static inline void ec_stripe_new_get(struct ec_stripe_new *s)
static inline void ec_stripe_new_get(struct ec_stripe_new *s,
enum ec_stripe_ref ref)
{
atomic_inc(&s->pin);
atomic_inc(&s->ref[ref]);
}
static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s)
static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s,
enum ec_stripe_ref ref)
{
BUG_ON(atomic_read(&s->pin) <= 0);
BUG_ON(!s->err && !s->idx);
BUG_ON(atomic_read(&s->ref[ref]) <= 0);
if (atomic_dec_and_test(&s->pin))
bch2_ec_do_stripe_creates(c);
if (atomic_dec_and_test(&s->ref[ref]))
switch (ref) {
case STRIPE_REF_stripe:
bch2_ec_stripe_new_free(c, s);
break;
case STRIPE_REF_io:
bch2_ec_do_stripe_creates(c);
break;
default:
unreachable();
}
}
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
void bch2_ec_flush_new_stripes(struct bch_fs *);
void bch2_fs_ec_stop(struct bch_fs *);
void bch2_fs_ec_flush(struct bch_fs *);
int bch2_stripes_read(struct bch_fs *);

View File

@ -26,8 +26,6 @@
#include <trace/events/bcachefs.h>
static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *);
static unsigned bch2_crc_field_size_max[] = {
[BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
[BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
@ -512,7 +510,7 @@ restart_narrow_pointers:
bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
if (can_narrow_crc(p.crc, n)) {
__bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(k), &i->ptr);
p.ptr.offset += p.crc.offset;
p.crc = n;
bch2_extent_ptr_decoded_append(k, &p);
@ -765,8 +763,8 @@ static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
/*
* Returns pointer to the next entry after the one being dropped:
*/
static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k,
struct bch_extent_ptr *ptr)
union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k,
struct bch_extent_ptr *ptr)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry = to_entry(ptr), *next;
@ -809,7 +807,7 @@ union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
{
bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr;
union bch_extent_entry *ret =
__bch2_bkey_drop_ptr(k, ptr);
bch2_bkey_drop_ptr_noerror(k, ptr);
/*
* If we deleted all the dirty pointers and there's still cached
@ -840,14 +838,13 @@ void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev)
{
struct bch_extent_ptr *ptr = (void *) bch2_bkey_has_device(k.s_c, dev);
struct bch_extent_ptr *ptr = bch2_bkey_has_device(k, dev);
if (ptr)
__bch2_bkey_drop_ptr(k, ptr);
bch2_bkey_drop_ptr_noerror(k, ptr);
}
const struct bch_extent_ptr *
bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
@ -922,11 +919,11 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
}
}
bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1,
struct bkey_s_c k2)
struct bch_extent_ptr *
bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bkey_s k2)
{
struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2);
const union bch_extent_entry *entry2;
struct bkey_ptrs ptrs2 = bch2_bkey_ptrs(k2);
union bch_extent_entry *entry2;
struct extent_ptr_decoded p2;
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
@ -934,9 +931,9 @@ bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1,
p1.ptr.gen == p2.ptr.gen &&
(s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
(s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
return true;
return &entry2->ptr;
return false;
return NULL;
}
void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr)
@ -992,6 +989,9 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca;
bool first = true;
if (c)
prt_printf(out, "durability: %u ", bch2_bkey_durability(c, k));
bkey_extent_entry_for_each(ptrs, entry) {
if (!first)
prt_printf(out, " ");

View File

@ -613,14 +613,21 @@ unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
void bch2_bkey_drop_device(struct bkey_s, unsigned);
void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned);
const struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s_c, unsigned);
const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c, unsigned);
static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsigned dev)
{
return (void *) bch2_bkey_has_device_c(k.s_c, dev);
}
bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *);
static inline void bch2_bkey_append_ptr(struct bkey_i *k, struct bch_extent_ptr ptr)
{
EBUG_ON(bch2_bkey_has_device(bkey_i_to_s_c(k), ptr.dev));
EBUG_ON(bch2_bkey_has_device(bkey_i_to_s(k), ptr.dev));
switch (k->k.type) {
case KEY_TYPE_btree_ptr:
@ -642,6 +649,8 @@ static inline void bch2_bkey_append_ptr(struct bkey_i *k, struct bch_extent_ptr
void bch2_extent_ptr_decoded_append(struct bkey_i *,
struct extent_ptr_decoded *);
union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s,
struct bch_extent_ptr *);
union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
struct bch_extent_ptr *);
@ -665,7 +674,8 @@ do { \
bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
struct bch_extent_ptr, u64);
bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
bool bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s_c);
struct bch_extent_ptr *
bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);

View File

@ -954,11 +954,11 @@ static int check_inode(struct btree_trans *trans,
iter->pos.snapshot),
POS(u.bi_inum, U64_MAX),
0, NULL);
if (ret) {
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err(c, "error in fsck: error truncating inode: %s",
bch2_err_str(ret));
if (ret)
return ret;
}
/*
* We truncated without our normal sector accounting hook, just

View File

@ -218,7 +218,8 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
bch2_trans_copy_iter(&iter, extent_iter);
for_each_btree_key_continue_norestart(iter, BTREE_ITER_SLOTS, old, ret) {
for_each_btree_key_upto_continue_norestart(iter,
new->k.p, BTREE_ITER_SLOTS, old, ret) {
s64 sectors = min(new->k.p.offset, old.k->p.offset) -
max(bkey_start_offset(&new->k),
bkey_start_offset(old.k));
@ -705,7 +706,8 @@ static void bch2_write_done(struct closure *cl)
struct bch_fs *c = op->c;
bch2_disk_reservation_put(c, &op->res);
bch2_write_ref_put(c, BCH_WRITE_REF_write);
if (!(op->flags & BCH_WRITE_MOVE))
bch2_write_ref_put(c, BCH_WRITE_REF_write);
bch2_keylist_free(&op->insert_keys, op->inline_keys);
bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
@ -834,36 +836,30 @@ static void bch2_write_index(struct closure *cl)
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct write_point *wp = op->wp;
struct workqueue_struct *wq = index_update_wq(op);
unsigned long flags;
if ((op->flags & BCH_WRITE_DONE) &&
(op->flags & BCH_WRITE_MOVE))
bch2_bio_free_pages_pool(op->c, &op->wbio.bio);
barrier();
/*
* We're not using wp->writes_lock here, so this is racey: that's ok,
* because this is just for diagnostic purposes, and we're running out
* of interrupt context here so if we were to take the log we'd have to
* switch to spin_lock_irq()/irqsave(), which is not free:
*/
spin_lock_irqsave(&wp->writes_lock, flags);
if (wp->state == WRITE_POINT_waiting_io)
__wp_update_state(wp, WRITE_POINT_waiting_work);
list_add_tail(&op->wp_list, &wp->writes);
spin_unlock_irqrestore (&wp->writes_lock, flags);
op->btree_update_ready = true;
queue_work(wq, &wp->index_update_work);
}
static inline void bch2_write_queue(struct bch_write_op *op, struct write_point *wp)
{
op->btree_update_ready = false;
op->wp = wp;
spin_lock(&wp->writes_lock);
list_add_tail(&op->wp_list, &wp->writes);
if (wp->state == WRITE_POINT_stopped)
if (wp->state == WRITE_POINT_stopped) {
spin_lock_irq(&wp->writes_lock);
__wp_update_state(wp, WRITE_POINT_waiting_io);
spin_unlock(&wp->writes_lock);
spin_unlock_irq(&wp->writes_lock);
}
}
void bch2_write_point_do_index_updates(struct work_struct *work)
@ -873,16 +869,12 @@ void bch2_write_point_do_index_updates(struct work_struct *work)
struct bch_write_op *op;
while (1) {
spin_lock(&wp->writes_lock);
list_for_each_entry(op, &wp->writes, wp_list)
if (op->btree_update_ready) {
list_del(&op->wp_list);
goto unlock;
}
op = NULL;
unlock:
spin_lock_irq(&wp->writes_lock);
op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list);
if (op)
list_del(&op->wp_list);
wp_update_state(wp, op != NULL);
spin_unlock(&wp->writes_lock);
spin_unlock_irq(&wp->writes_lock);
if (!op)
break;
@ -1673,7 +1665,6 @@ static void __bch2_write(struct bch_write_op *op)
}
again:
memset(&op->failed, 0, sizeof(op->failed));
op->btree_update_ready = false;
do {
struct bkey_i *key_to_write;
@ -1853,7 +1844,12 @@ void bch2_write(struct closure *cl)
goto err;
}
if (c->opts.nochanges ||
if (c->opts.nochanges) {
op->error = -BCH_ERR_erofs_no_writes;
goto err;
}
if (!(op->flags & BCH_WRITE_MOVE) &&
!bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
op->error = -BCH_ERR_erofs_no_writes;
goto err;
@ -1881,6 +1877,28 @@ err:
op->end_io(op);
}
const char * const bch2_write_flags[] = {
#define x(f) #f,
BCH_WRITE_FLAGS()
#undef x
NULL
};
void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
{
prt_str(out, "pos: ");
bch2_bpos_to_text(out, op->pos);
prt_newline(out);
prt_str(out, "started: ");
bch2_pr_time_units(out, local_clock() - op->start_time);
prt_newline(out);
prt_str(out, "flags: ");
prt_bitflags(out, bch2_write_flags, op->flags);
prt_newline(out);
}
/* Cache promotion on read */
struct promote_op {

View File

@ -28,41 +28,34 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
const char *bch2_blk_status_to_str(blk_status_t);
enum bch_write_flags {
__BCH_WRITE_ALLOC_NOWAIT,
__BCH_WRITE_CACHED,
__BCH_WRITE_DATA_ENCODED,
__BCH_WRITE_PAGES_STABLE,
__BCH_WRITE_PAGES_OWNED,
__BCH_WRITE_ONLY_SPECIFIED_DEVS,
__BCH_WRITE_WROTE_DATA_INLINE,
__BCH_WRITE_FROM_INTERNAL,
__BCH_WRITE_CHECK_ENOSPC,
__BCH_WRITE_SYNC,
__BCH_WRITE_MOVE,
__BCH_WRITE_IN_WORKER,
__BCH_WRITE_DONE,
__BCH_WRITE_IO_ERROR,
__BCH_WRITE_CONVERT_UNWRITTEN,
#define BCH_WRITE_FLAGS() \
x(ALLOC_NOWAIT) \
x(CACHED) \
x(DATA_ENCODED) \
x(PAGES_STABLE) \
x(PAGES_OWNED) \
x(ONLY_SPECIFIED_DEVS) \
x(WROTE_DATA_INLINE) \
x(FROM_INTERNAL) \
x(CHECK_ENOSPC) \
x(SYNC) \
x(MOVE) \
x(IN_WORKER) \
x(DONE) \
x(IO_ERROR) \
x(CONVERT_UNWRITTEN)
enum __bch_write_flags {
#define x(f) __BCH_WRITE_##f,
BCH_WRITE_FLAGS()
#undef x
};
#define BCH_WRITE_ALLOC_NOWAIT (1U << __BCH_WRITE_ALLOC_NOWAIT)
#define BCH_WRITE_CACHED (1U << __BCH_WRITE_CACHED)
#define BCH_WRITE_DATA_ENCODED (1U << __BCH_WRITE_DATA_ENCODED)
#define BCH_WRITE_PAGES_STABLE (1U << __BCH_WRITE_PAGES_STABLE)
#define BCH_WRITE_PAGES_OWNED (1U << __BCH_WRITE_PAGES_OWNED)
#define BCH_WRITE_ONLY_SPECIFIED_DEVS (1U << __BCH_WRITE_ONLY_SPECIFIED_DEVS)
#define BCH_WRITE_WROTE_DATA_INLINE (1U << __BCH_WRITE_WROTE_DATA_INLINE)
#define BCH_WRITE_FROM_INTERNAL (1U << __BCH_WRITE_FROM_INTERNAL)
#define BCH_WRITE_CHECK_ENOSPC (1U << __BCH_WRITE_CHECK_ENOSPC)
#define BCH_WRITE_SYNC (1U << __BCH_WRITE_SYNC)
#define BCH_WRITE_MOVE (1U << __BCH_WRITE_MOVE)
/* Internal: */
#define BCH_WRITE_IN_WORKER (1U << __BCH_WRITE_IN_WORKER)
#define BCH_WRITE_DONE (1U << __BCH_WRITE_DONE)
#define BCH_WRITE_IO_ERROR (1U << __BCH_WRITE_IO_ERROR)
#define BCH_WRITE_CONVERT_UNWRITTEN (1U << __BCH_WRITE_CONVERT_UNWRITTEN)
enum bch_write_flags {
#define x(f) BCH_WRITE_##f = 1U << __BCH_WRITE_##f,
BCH_WRITE_FLAGS()
#undef x
};
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
{
@ -124,6 +117,8 @@ static inline struct bch_write_bio *wbio_init(struct bio *bio)
return wbio;
}
void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *);
struct bch_devs_mask;
struct cache_promote_op;
struct extent_ptr_decoded;

View File

@ -119,7 +119,7 @@ struct bch_write_op {
unsigned nr_replicas_required:4;
unsigned alloc_reserve:3;
unsigned incompressible:1;
unsigned btree_update_ready:1;
unsigned stripe_waited:1;
struct bch_devs_list devs_have;
u16 target;

View File

@ -68,8 +68,9 @@ journal_seq_to_buf(struct journal *j, u64 seq)
static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
{
INIT_LIST_HEAD(&p->list);
INIT_LIST_HEAD(&p->key_cache_list);
unsigned i;
for (i = 0; i < ARRAY_SIZE(p->list); i++)
INIT_LIST_HEAD(&p->list[i]);
INIT_LIST_HEAD(&p->flushed);
atomic_set(&p->count, count);
p->devs.nr = 0;
@ -758,19 +759,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
u64 *new_bucket_seq = NULL, *new_buckets = NULL;
struct open_bucket **ob = NULL;
long *bu = NULL;
unsigned i, nr_got = 0, nr_want = nr - ja->nr;
unsigned old_nr = ja->nr;
unsigned old_discard_idx = ja->discard_idx;
unsigned old_dirty_idx_ondisk = ja->dirty_idx_ondisk;
unsigned old_dirty_idx = ja->dirty_idx;
unsigned old_cur_idx = ja->cur_idx;
unsigned i, pos, nr_got = 0, nr_want = nr - ja->nr;
int ret = 0;
if (c) {
bch2_journal_flush_all_pins(&c->journal);
bch2_journal_block(&c->journal);
mutex_lock(&c->sb_lock);
}
BUG_ON(nr <= ja->nr);
bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL);
ob = kcalloc(nr_want, sizeof(*ob), GFP_KERNEL);
@ -778,7 +770,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL);
if (!bu || !ob || !new_buckets || !new_bucket_seq) {
ret = -ENOMEM;
goto err_unblock;
goto err_free;
}
for (nr_got = 0; nr_got < nr_want; nr_got++) {
@ -794,87 +786,92 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
if (ret)
break;
ret = bch2_trans_run(c,
bch2_trans_mark_metadata_bucket(&trans, ca,
ob[nr_got]->bucket, BCH_DATA_journal,
ca->mi.bucket_size));
if (ret) {
bch2_open_bucket_put(c, ob[nr_got]);
bch_err(c, "error marking new journal buckets: %s", bch2_err_str(ret));
break;
}
bu[nr_got] = ob[nr_got]->bucket;
}
}
if (!nr_got)
goto err_unblock;
goto err_free;
/*
* We may be called from the device add path, before the new device has
* actually been added to the running filesystem:
*/
if (!new_fs)
spin_lock(&c->journal.lock);
/* Don't return an error if we successfully allocated some buckets: */
ret = 0;
if (c) {
bch2_journal_flush_all_pins(&c->journal);
bch2_journal_block(&c->journal);
mutex_lock(&c->sb_lock);
}
memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64));
memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64));
swap(new_buckets, ja->buckets);
swap(new_bucket_seq, ja->bucket_seq);
BUG_ON(ja->discard_idx > ja->nr);
pos = ja->discard_idx ?: ja->nr;
memmove(new_buckets + pos + nr_got,
new_buckets + pos,
sizeof(new_buckets[0]) * (ja->nr - pos));
memmove(new_bucket_seq + pos + nr_got,
new_bucket_seq + pos,
sizeof(new_bucket_seq[0]) * (ja->nr - pos));
for (i = 0; i < nr_got; i++) {
unsigned pos = ja->discard_idx ?: ja->nr;
long b = bu[i];
__array_insert_item(ja->buckets, ja->nr, pos);
__array_insert_item(ja->bucket_seq, ja->nr, pos);
ja->nr++;
ja->buckets[pos] = b;
ja->bucket_seq[pos] = 0;
if (pos <= ja->discard_idx)
ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
if (pos <= ja->dirty_idx_ondisk)
ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
if (pos <= ja->dirty_idx)
ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
if (pos <= ja->cur_idx)
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
new_buckets[pos + i] = bu[i];
new_bucket_seq[pos + i] = 0;
}
ret = bch2_journal_buckets_to_sb(c, ca);
if (ret) {
/* Revert: */
swap(new_buckets, ja->buckets);
swap(new_bucket_seq, ja->bucket_seq);
ja->nr = old_nr;
ja->discard_idx = old_discard_idx;
ja->dirty_idx_ondisk = old_dirty_idx_ondisk;
ja->dirty_idx = old_dirty_idx;
ja->cur_idx = old_cur_idx;
}
nr = ja->nr + nr_got;
ret = bch2_journal_buckets_to_sb(c, ca, new_buckets, nr);
if (ret)
goto err_unblock;
if (!new_fs)
spin_unlock(&c->journal.lock);
if (ja->nr != old_nr && !new_fs)
bch2_write_super(c);
/* Commit: */
if (c)
spin_lock(&c->journal.lock);
swap(new_buckets, ja->buckets);
swap(new_bucket_seq, ja->bucket_seq);
ja->nr = nr;
if (pos <= ja->discard_idx)
ja->discard_idx = (ja->discard_idx + nr_got) % ja->nr;
if (pos <= ja->dirty_idx_ondisk)
ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + nr_got) % ja->nr;
if (pos <= ja->dirty_idx)
ja->dirty_idx = (ja->dirty_idx + nr_got) % ja->nr;
if (pos <= ja->cur_idx)
ja->cur_idx = (ja->cur_idx + nr_got) % ja->nr;
if (c)
spin_unlock(&c->journal.lock);
err_unblock:
if (c) {
bch2_journal_unblock(&c->journal);
if (ret)
goto err;
if (!new_fs) {
for (i = 0; i < nr_got; i++) {
ret = bch2_trans_run(c,
bch2_trans_mark_metadata_bucket(&trans, ca,
bu[i], BCH_DATA_journal,
ca->mi.bucket_size));
if (ret) {
bch2_fs_inconsistent(c, "error marking new journal buckets: %i", ret);
goto err;
}
}
}
err:
if (c)
mutex_unlock(&c->sb_lock);
}
if (ob && !new_fs)
if (ret && !new_fs)
for (i = 0; i < nr_got; i++)
bch2_trans_run(c,
bch2_trans_mark_metadata_bucket(&trans, ca,
bu[i], BCH_DATA_free, 0));
err_free:
if (!new_fs)
for (i = 0; i < nr_got; i++)
bch2_open_bucket_put(c, ob[i]);
@ -882,12 +879,7 @@ err:
kfree(new_buckets);
kfree(ob);
kfree(bu);
return ret;
err_unblock:
if (c)
bch2_journal_unblock(&c->journal);
goto err;
}
/*
@ -901,13 +893,15 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
struct closure cl;
int ret = 0;
/* don't handle reducing nr of buckets yet: */
if (nr < ja->nr)
return 0;
closure_init_stack(&cl);
while (ja->nr != nr) {
down_write(&c->state_lock);
/* don't handle reducing nr of buckets yet: */
if (nr < ja->nr)
goto unlock;
while (ja->nr < nr) {
struct disk_reservation disk_res = { 0, 0 };
/*
@ -938,7 +932,8 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
if (ret)
bch_err(c, "%s: err %s", __func__, bch2_err_str(ret));
unlock:
up_write(&c->state_lock);
return ret;
}
@ -977,7 +972,7 @@ static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
seq++) {
struct journal_buf *buf = journal_seq_to_buf(j, seq);
if (bch2_bkey_has_device(bkey_i_to_s_c(&buf->key), dev_idx))
if (bch2_bkey_has_device_c(bkey_i_to_s_c(&buf->key), dev_idx))
ret = true;
}
spin_unlock(&j->lock);
@ -1353,6 +1348,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *pin;
unsigned i;
spin_lock(&j->lock);
*seq = max(*seq, j->pin.front);
@ -1370,15 +1366,11 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
prt_newline(out);
printbuf_indent_add(out, 2);
list_for_each_entry(pin, &pin_list->list, list) {
prt_printf(out, "\t%px %ps", pin, pin->flush);
prt_newline(out);
}
list_for_each_entry(pin, &pin_list->key_cache_list, list) {
prt_printf(out, "\t%px %ps", pin, pin->flush);
prt_newline(out);
}
for (i = 0; i < ARRAY_SIZE(pin_list->list); i++)
list_for_each_entry(pin, &pin_list->list[i], list) {
prt_printf(out, "\t%px %ps", pin, pin->flush);
prt_newline(out);
}
if (!list_empty(&pin_list->flushed)) {
prt_printf(out, "flushed:");

View File

@ -1339,8 +1339,7 @@ static void __journal_write_alloc(struct journal *j,
if (!ca->mi.durability ||
ca->mi.state != BCH_MEMBER_STATE_rw ||
!ja->nr ||
bch2_bkey_has_device(bkey_i_to_s_c(&w->key),
ca->dev_idx) ||
bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) ||
sectors > ja->sectors_free)
continue;

View File

@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "btree_key_cache.h"
#include "btree_update.h"
#include "errcode.h"
#include "error.h"
#include "journal.h"
@ -318,9 +319,7 @@ static void bch2_journal_reclaim_fast(struct journal *j)
*/
while (!fifo_empty(&j->pin) &&
!atomic_read(&fifo_peek_front(&j->pin).count)) {
BUG_ON(!list_empty(&fifo_peek_front(&j->pin).list));
BUG_ON(!list_empty(&fifo_peek_front(&j->pin).flushed));
BUG_ON(!fifo_pop(&j->pin, temp));
fifo_pop(&j->pin, temp);
popped = true;
}
@ -379,6 +378,17 @@ void bch2_journal_pin_drop(struct journal *j,
spin_unlock(&j->lock);
}
enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
{
if (fn == bch2_btree_node_flush0 ||
fn == bch2_btree_node_flush1)
return JOURNAL_PIN_btree;
else if (fn == bch2_btree_key_cache_journal_flush)
return JOURNAL_PIN_key_cache;
else
return JOURNAL_PIN_other;
}
void bch2_journal_pin_set(struct journal *j, u64 seq,
struct journal_entry_pin *pin,
journal_pin_flush_fn flush_fn)
@ -407,10 +417,8 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
pin->seq = seq;
pin->flush = flush_fn;
if (flush_fn == bch2_btree_key_cache_journal_flush)
list_add(&pin->list, &pin_list->key_cache_list);
else if (flush_fn)
list_add(&pin->list, &pin_list->list);
if (flush_fn)
list_add(&pin->list, &pin_list->list[journal_pin_type(flush_fn)]);
else
list_add(&pin->list, &pin_list->flushed);
@ -446,37 +454,37 @@ void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
static struct journal_entry_pin *
journal_get_next_pin(struct journal *j,
bool get_any,
bool get_key_cache,
u64 max_seq, u64 *seq)
u64 seq_to_flush,
unsigned allowed_below_seq,
unsigned allowed_above_seq,
u64 *seq)
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *ret = NULL;
unsigned i;
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) {
if (*seq > max_seq && !get_any && !get_key_cache)
if (*seq > seq_to_flush && !allowed_above_seq)
break;
if (*seq <= max_seq || get_any) {
ret = list_first_entry_or_null(&pin_list->list,
struct journal_entry_pin, list);
if (ret)
return ret;
}
if (*seq <= max_seq || get_any || get_key_cache) {
ret = list_first_entry_or_null(&pin_list->key_cache_list,
struct journal_entry_pin, list);
if (ret)
return ret;
}
for (i = 0; i < JOURNAL_PIN_NR; i++)
if ((((1U << i) & allowed_below_seq) && *seq <= seq_to_flush) ||
((1U << i) & allowed_above_seq)) {
ret = list_first_entry_or_null(&pin_list->list[i],
struct journal_entry_pin, list);
if (ret)
return ret;
}
}
return NULL;
}
/* returns true if we did work */
static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
static size_t journal_flush_pins(struct journal *j,
u64 seq_to_flush,
unsigned allowed_below_seq,
unsigned allowed_above_seq,
unsigned min_any,
unsigned min_key_cache)
{
@ -489,15 +497,25 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
lockdep_assert_held(&j->reclaim_lock);
while (1) {
unsigned allowed_above = allowed_above_seq;
unsigned allowed_below = allowed_below_seq;
if (min_any) {
allowed_above |= ~0;
allowed_below |= ~0;
}
if (min_key_cache) {
allowed_above |= 1U << JOURNAL_PIN_key_cache;
allowed_below |= 1U << JOURNAL_PIN_key_cache;
}
cond_resched();
j->last_flushed = jiffies;
spin_lock(&j->lock);
pin = journal_get_next_pin(j,
min_any != 0,
min_key_cache != 0,
seq_to_flush, &seq);
pin = journal_get_next_pin(j, seq_to_flush, allowed_below, allowed_above, &seq);
if (pin) {
BUG_ON(j->flush_in_progress);
j->flush_in_progress = pin;
@ -656,6 +674,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
atomic_long_read(&c->btree_key_cache.nr_keys));
nr_flushed = journal_flush_pins(j, seq_to_flush,
~0, 0,
min_nr, min_key_cache);
if (direct)
@ -776,7 +795,11 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
mutex_lock(&j->reclaim_lock);
if (journal_flush_pins(j, seq_to_flush, 0, 0))
if (journal_flush_pins(j, seq_to_flush,
(1U << JOURNAL_PIN_key_cache)|
(1U << JOURNAL_PIN_other), 0, 0, 0) ||
journal_flush_pins(j, seq_to_flush,
(1U << JOURNAL_PIN_btree), 0, 0, 0))
*did_work = true;
spin_lock(&j->lock);

View File

@ -175,46 +175,45 @@ const struct bch_sb_field_ops bch_sb_field_ops_journal_v2 = {
.to_text = bch2_sb_journal_v2_to_text,
};
int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca)
int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
u64 *buckets, unsigned nr)
{
struct journal_device *ja = &ca->journal;
struct bch_sb_field_journal_v2 *j;
unsigned i, dst = 0, nr = 1;
unsigned i, dst = 0, nr_compacted = 1;
if (c)
lockdep_assert_held(&c->sb_lock);
if (!ja->nr) {
if (!nr) {
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal_v2);
return 0;
}
for (i = 0; i + 1 < ja->nr; i++)
if (ja->buckets[i] + 1 != ja->buckets[i + 1])
nr++;
for (i = 0; i + 1 < nr; i++)
if (buckets[i] + 1 != buckets[i + 1])
nr_compacted++;
j = bch2_sb_resize_journal_v2(&ca->disk_sb,
(sizeof(*j) + sizeof(j->d[0]) * nr) / sizeof(u64));
(sizeof(*j) + sizeof(j->d[0]) * nr_compacted) / sizeof(u64));
if (!j)
return -BCH_ERR_ENOSPC_sb_journal;
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
j->d[dst].start = le64_to_cpu(ja->buckets[0]);
j->d[dst].start = le64_to_cpu(buckets[0]);
j->d[dst].nr = le64_to_cpu(1);
for (i = 1; i < ja->nr; i++) {
if (ja->buckets[i] == ja->buckets[i - 1] + 1) {
for (i = 1; i < nr; i++) {
if (buckets[i] == buckets[i - 1] + 1) {
le64_add_cpu(&j->d[dst].nr, 1);
} else {
dst++;
j->d[dst].start = le64_to_cpu(ja->buckets[i]);
j->d[dst].start = le64_to_cpu(buckets[i]);
j->d[dst].nr = le64_to_cpu(1);
}
}
BUG_ON(dst + 1 != nr);
BUG_ON(dst + 1 != nr_compacted);
return 0;
}

View File

@ -21,4 +21,4 @@ static inline unsigned bch2_sb_field_journal_v2_nr_entries(struct bch_sb_field_j
extern const struct bch_sb_field_ops bch_sb_field_ops_journal;
extern const struct bch_sb_field_ops bch_sb_field_ops_journal_v2;
int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *);
int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *, u64 *, unsigned);

View File

@ -43,9 +43,15 @@ struct journal_buf {
* flushed:
*/
enum journal_pin_type {
JOURNAL_PIN_btree,
JOURNAL_PIN_key_cache,
JOURNAL_PIN_other,
JOURNAL_PIN_NR,
};
struct journal_entry_pin_list {
struct list_head list;
struct list_head key_cache_list;
struct list_head list[JOURNAL_PIN_NR];
struct list_head flushed;
atomic_t count;
struct bch_devs_list devs;

View File

@ -46,7 +46,7 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
struct bkey_i *n;
int ret;
if (!bch2_bkey_has_device(k, dev_idx))
if (!bch2_bkey_has_device_c(k, dev_idx))
return 0;
n = bch2_bkey_make_mut(trans, k);
@ -130,8 +130,7 @@ retry:
while (bch2_trans_begin(&trans),
(b = bch2_btree_iter_peek_node(&iter)) &&
!(ret = PTR_ERR_OR_ZERO(b))) {
if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key),
dev_idx))
if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
goto next;
bch2_bkey_buf_copy(&k, c, &b->key);

View File

@ -41,7 +41,8 @@ static void progress_list_del(struct bch_fs *c, struct bch_move_stats *stats)
}
struct moving_io {
struct list_head list;
struct list_head read_list;
struct list_head io_list;
struct move_bucket_in_flight *b;
struct closure cl;
bool read_completed;
@ -65,8 +66,12 @@ static void move_free(struct moving_io *io)
atomic_dec(&io->b->count);
bch2_data_update_exit(&io->write);
mutex_lock(&ctxt->lock);
list_del(&io->io_list);
wake_up(&ctxt->wait);
bch2_write_ref_put(c, BCH_WRITE_REF_move);
mutex_unlock(&ctxt->lock);
kfree(io);
}
@ -101,7 +106,7 @@ static void move_write(struct moving_io *io)
struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt)
{
struct moving_io *io =
list_first_entry_or_null(&ctxt->reads, struct moving_io, list);
list_first_entry_or_null(&ctxt->reads, struct moving_io, read_list);
return io && io->read_completed ? io : NULL;
}
@ -128,7 +133,7 @@ void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt,
bch2_trans_unlock(trans);
while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
list_del(&io->list);
list_del(&io->read_list);
move_write(io);
}
}
@ -145,6 +150,8 @@ static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt,
void bch2_moving_ctxt_exit(struct moving_context *ctxt)
{
struct bch_fs *c = ctxt->c;
move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads));
closure_sync(&ctxt->cl);
@ -154,12 +161,15 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt)
EBUG_ON(atomic_read(&ctxt->read_ios));
if (ctxt->stats) {
progress_list_del(ctxt->c, ctxt->stats);
trace_move_data(ctxt->c,
progress_list_del(c, ctxt->stats);
trace_move_data(c,
atomic64_read(&ctxt->stats->sectors_moved),
atomic64_read(&ctxt->stats->keys_moved));
}
mutex_lock(&c->moving_context_lock);
list_del(&ctxt->list);
mutex_unlock(&c->moving_context_lock);
}
void bch2_moving_ctxt_init(struct moving_context *ctxt,
@ -172,15 +182,23 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt,
memset(ctxt, 0, sizeof(*ctxt));
ctxt->c = c;
ctxt->fn = (void *) _RET_IP_;
ctxt->rate = rate;
ctxt->stats = stats;
ctxt->wp = wp;
ctxt->wait_on_copygc = wait_on_copygc;
closure_init_stack(&ctxt->cl);
mutex_init(&ctxt->lock);
INIT_LIST_HEAD(&ctxt->reads);
INIT_LIST_HEAD(&ctxt->ios);
init_waitqueue_head(&ctxt->wait);
mutex_lock(&c->moving_context_lock);
list_add(&ctxt->list, &c->moving_context_list);
mutex_unlock(&c->moving_context_lock);
if (stats) {
progress_list_add(c, stats);
stats->data_type = BCH_DATA_user;
@ -262,9 +280,6 @@ static int bch2_move_extent(struct btree_trans *trans,
return 0;
}
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_move))
return -BCH_ERR_erofs_no_writes;
/*
* Before memory allocations & taking nocow locks in
* bch2_data_update_init():
@ -334,9 +349,14 @@ static int bch2_move_extent(struct btree_trans *trans,
this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size);
trace_move_extent_read(k.k);
mutex_lock(&ctxt->lock);
atomic_add(io->read_sectors, &ctxt->read_sectors);
atomic_inc(&ctxt->read_ios);
list_add_tail(&io->list, &ctxt->reads);
list_add_tail(&io->read_list, &ctxt->reads);
list_add_tail(&io->io_list, &ctxt->ios);
mutex_unlock(&ctxt->lock);
/*
* dropped by move_read_endio() - guards against use after free of
@ -354,7 +374,6 @@ err_free_pages:
err_free:
kfree(io);
err:
bch2_write_ref_put(c, BCH_WRITE_REF_move);
trace_and_count(c, move_extent_alloc_mem_fail, k.k);
return ret;
}
@ -759,8 +778,13 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
data_opts.rewrite_ptrs = 0;
bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
if (ptr->dev == bucket.inode)
if (ptr->dev == bucket.inode) {
data_opts.rewrite_ptrs |= 1U << i;
if (ptr->cached) {
bch2_trans_iter_exit(trans, &iter);
goto next;
}
}
i++;
}
@ -819,14 +843,6 @@ next:
}
trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) {
bch2_trans_unlock(trans);
move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads));
closure_sync(&ctxt->cl);
if (!ctxt->write_error)
bch2_verify_bucket_evacuated(trans, bucket, gen);
}
err:
bch2_bkey_buf_exit(&sk, c);
return ret;
@ -1111,3 +1127,67 @@ int bch2_data_job(struct bch_fs *c,
return ret;
}
void bch2_data_jobs_to_text(struct printbuf *out, struct bch_fs *c)
{
struct bch_move_stats *stats;
mutex_lock(&c->data_progress_lock);
list_for_each_entry(stats, &c->data_progress_list, list) {
prt_printf(out, "%s: data type %s btree_id %s position: ",
stats->name,
bch2_data_types[stats->data_type],
bch2_btree_ids[stats->btree_id]);
bch2_bpos_to_text(out, stats->pos);
prt_printf(out, "%s", "\n");
}
mutex_unlock(&c->data_progress_lock);
}
static void bch2_moving_ctxt_to_text(struct printbuf *out, struct moving_context *ctxt)
{
struct moving_io *io;
prt_printf(out, "%ps:", ctxt->fn);
prt_newline(out);
printbuf_indent_add(out, 2);
prt_printf(out, "reads: %u sectors %u",
atomic_read(&ctxt->read_ios),
atomic_read(&ctxt->read_sectors));
prt_newline(out);
prt_printf(out, "writes: %u sectors %u",
atomic_read(&ctxt->write_ios),
atomic_read(&ctxt->write_sectors));
prt_newline(out);
printbuf_indent_add(out, 2);
mutex_lock(&ctxt->lock);
list_for_each_entry(io, &ctxt->ios, io_list) {
bch2_write_op_to_text(out, &io->write.op);
}
mutex_unlock(&ctxt->lock);
printbuf_indent_sub(out, 4);
}
void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c)
{
struct moving_context *ctxt;
mutex_lock(&c->moving_context_lock);
list_for_each_entry(ctxt, &c->moving_context_list, list)
bch2_moving_ctxt_to_text(out, ctxt);
mutex_unlock(&c->moving_context_lock);
}
void bch2_fs_move_init(struct bch_fs *c)
{
INIT_LIST_HEAD(&c->moving_context_list);
mutex_init(&c->moving_context_lock);
INIT_LIST_HEAD(&c->data_progress_list);
mutex_init(&c->data_progress_lock);
}

View File

@ -11,6 +11,9 @@ struct bch_read_bio;
struct moving_context {
struct bch_fs *c;
struct list_head list;
void *fn;
struct bch_ratelimit *rate;
struct bch_move_stats *stats;
struct write_point_specifier wp;
@ -19,7 +22,10 @@ struct moving_context {
/* For waiting on outstanding reads and writes: */
struct closure cl;
struct mutex lock;
struct list_head reads;
struct list_head ios;
/* in flight sectors: */
atomic_t read_sectors;
@ -84,6 +90,9 @@ int bch2_data_job(struct bch_fs *,
struct bch_ioctl_data);
void bch2_move_stats_init(struct bch_move_stats *stats, char *name);
void bch2_data_jobs_to_text(struct printbuf *, struct bch_fs *);
void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *);
void bch2_fs_move_init(struct bch_fs *);
#endif /* _BCACHEFS_MOVE_H */

View File

@ -46,7 +46,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
if (bch2_bucket_is_open(trans->c, bucket.inode, bucket.offset))
return 0;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, 0);
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, BTREE_ITER_CACHED);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
bch2_trans_iter_exit(trans, &iter);
@ -85,7 +85,7 @@ static int move_bucket_cmp(const void *_l, const void *_r)
const struct move_bucket *l = _l;
const struct move_bucket *r = _r;
return bpos_cmp(l->bucket, r->bucket) ?: cmp_int(l->gen, r->gen);
return bkey_cmp(l->bucket, r->bucket);
}
static bool bucket_in_flight(move_buckets *buckets_sorted, struct move_bucket b)
@ -178,13 +178,13 @@ static int bch2_copygc(struct btree_trans *trans,
move_buckets_in_flight *buckets_in_flight)
{
struct bch_fs *c = trans->c;
struct bch_move_stats move_stats;
struct data_update_opts data_opts = {
.btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc,
};
move_buckets buckets = { 0 };
struct move_bucket_in_flight *f;
struct move_bucket *i;
u64 moved = atomic64_read(&ctxt->stats->sectors_moved);
int ret = 0;
ret = bch2_btree_write_buffer_flush(trans);
@ -192,9 +192,6 @@ static int bch2_copygc(struct btree_trans *trans,
__func__, bch2_err_str(ret)))
return ret;
bch2_move_stats_init(&move_stats, "copygc");
ctxt->stats = &move_stats;
ret = bch2_copygc_get_buckets(trans, ctxt, buckets_in_flight, &buckets);
if (ret)
goto err;
@ -222,8 +219,8 @@ err:
if (ret < 0 && !bch2_err_matches(ret, EROFS))
bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
trace_and_count(c, copygc, c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
ctxt->stats = NULL;
moved = atomic64_read(&ctxt->stats->sectors_moved) - moved;
trace_and_count(c, copygc, c, moved, 0, 0, 0);
return ret;
}
@ -282,6 +279,7 @@ static int bch2_copygc_thread(void *arg)
struct bch_fs *c = arg;
struct btree_trans trans;
struct moving_context ctxt;
struct bch_move_stats move_stats;
struct io_clock *clock = &c->io_clock[WRITE];
move_buckets_in_flight move_buckets;
u64 last, wait;
@ -294,7 +292,9 @@ static int bch2_copygc_thread(void *arg)
set_freezable();
bch2_trans_init(&trans, c, 0, 0);
bch2_moving_ctxt_init(&ctxt, c, NULL, NULL,
bch2_move_stats_init(&move_stats, "copygc");
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
writepoint_ptr(&c->copygc_write_point),
false);
@ -334,8 +334,8 @@ static int bch2_copygc_thread(void *arg)
wake_up(&c->copygc_running_wq);
}
bch2_moving_ctxt_exit(&ctxt);
bch2_trans_exit(&trans);
bch2_moving_ctxt_exit(&ctxt);
free_fifo(&move_buckets);
return 0;

View File

@ -92,6 +92,12 @@ enum opt_type {
#define RATELIMIT_ERRORS_DEFAULT false
#endif
#ifdef CONFIG_BCACHEFS_DEBUG
#define BCACHEFS_VERBOSE_DEFAULT true
#else
#define BCACHEFS_VERBOSE_DEFAULT false
#endif
#define BCH_OPTS() \
x(block_size, u16, \
OPT_FS|OPT_FORMAT| \
@ -276,7 +282,7 @@ enum opt_type {
x(verbose, u8, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
BCH2_NO_SB_OPT, BCACHEFS_VERBOSE_DEFAULT, \
NULL, "Extra debugging information during mount/recovery")\
x(journal_flush_delay, u32, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \

View File

@ -189,7 +189,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
for_each_btree_key_norestart(trans, reflink_iter, BTREE_ID_reflink,
POS(0, c->reflink_hint),
BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) {
BTREE_ITER_SLOTS, k, ret) {
if (reflink_iter.pos.inode) {
bch2_btree_iter_set_pos(&reflink_iter, POS_MIN);
continue;

View File

@ -513,7 +513,9 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
n->v.pad = 0;
SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
ret = bch2_trans_update(trans, &iter, &n->k_i, 0);
ret = bch2_trans_update(trans, &iter, &n->k_i, 0) ?:
bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
if (ret)
goto err;
@ -540,7 +542,7 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
n->v.children[1] = cpu_to_le32(new_snapids[1]);
n->v.subvol = 0;
SET_BCH_SNAPSHOT_SUBVOL(&n->v, false);
ret = bch2_trans_update(trans, &iter, &n->k_i, 0);
ret = bch2_trans_update(trans, &iter, &n->k_i, 0);
if (ret)
goto err;
}

View File

@ -206,11 +206,15 @@ static void __bch2_fs_read_only(struct bch_fs *c)
unsigned i, clean_passes = 0;
u64 seq = 0;
bch2_fs_ec_stop(c);
bch2_open_buckets_stop(c, NULL, true);
bch2_rebalance_stop(c);
bch2_copygc_stop(c);
bch2_gc_thread_stop(c);
bch2_fs_ec_flush(c);
bch_verbose(c, "flushing journal and stopping allocators");
bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu",
journal_cur_seq(&c->journal));
do {
clean_passes++;
@ -224,7 +228,8 @@ static void __bch2_fs_read_only(struct bch_fs *c)
}
} while (clean_passes < 2);
bch_verbose(c, "flushing journal and stopping allocators complete");
bch_verbose(c, "flushing journal and stopping allocators complete, journal seq %llu",
journal_cur_seq(&c->journal));
if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
@ -679,6 +684,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_rebalance_init(c);
bch2_fs_quota_init(c);
bch2_fs_ec_init_early(c);
bch2_fs_move_init(c);
INIT_LIST_HEAD(&c->list);
@ -697,17 +703,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
INIT_LIST_HEAD(&c->fsck_errors);
mutex_init(&c->fsck_error_lock);
INIT_LIST_HEAD(&c->ec_stripe_head_list);
mutex_init(&c->ec_stripe_head_lock);
INIT_LIST_HEAD(&c->ec_stripe_new_list);
mutex_init(&c->ec_stripe_new_lock);
INIT_LIST_HEAD(&c->data_progress_list);
mutex_init(&c->data_progress_lock);
mutex_init(&c->ec_stripes_heap_lock);
seqcount_init(&c->gc_pos_lock);
seqcount_init(&c->usage_lock);

View File

@ -248,6 +248,7 @@ read_attribute(io_timers_read);
read_attribute(io_timers_write);
read_attribute(data_jobs);
read_attribute(moving_ctxts);
#ifdef CONFIG_BCACHEFS_TESTS
write_attribute(perf_test);
@ -277,25 +278,6 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
return ret;
}
static long data_progress_to_text(struct printbuf *out, struct bch_fs *c)
{
long ret = 0;
struct bch_move_stats *stats;
mutex_lock(&c->data_progress_lock);
list_for_each_entry(stats, &c->data_progress_list, list) {
prt_printf(out, "%s: data type %s btree_id %s position: ",
stats->name,
bch2_data_types[stats->data_type],
bch2_btree_ids[stats->btree_id]);
bch2_bpos_to_text(out, stats->pos);
prt_printf(out, "%s", "\n");
}
mutex_unlock(&c->data_progress_lock);
return ret;
}
static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
{
struct btree_trans trans;
@ -476,7 +458,10 @@ SHOW(bch2_fs)
bch2_io_timers_to_text(out, &c->io_clock[WRITE]);
if (attr == &sysfs_data_jobs)
data_progress_to_text(out, c);
bch2_data_jobs_to_text(out, c);
if (attr == &sysfs_moving_ctxts)
bch2_fs_moving_ctxts_to_text(out, c);
#ifdef BCH_WRITE_REF_DEBUG
if (attr == &sysfs_write_refs)
@ -693,6 +678,7 @@ struct attribute *bch2_fs_internal_files[] = {
sysfs_pd_controller_files(rebalance),
&sysfs_data_jobs,
&sysfs_moving_ctxts,
&sysfs_internal_uuid,
NULL

View File

@ -143,8 +143,17 @@ static int __do_six_trylock_type(struct six_lock *lock,
* lock, issue a wakeup because we might have caused a
* spurious trylock failure:
*/
#if 0
/*
* This code should be sufficient, but we're seeing unexplained
* lost wakeups:
*/
if (old.write_locking)
ret = -1 - SIX_LOCK_write;
#else
if (!ret)
ret = -1 - SIX_LOCK_write;
#endif
} else if (type == SIX_LOCK_write && lock->readers) {
if (try) {
atomic64_add(__SIX_VAL(write_locking, 1),
@ -320,11 +329,10 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
* Similar to the lock path, we may have caused a spurious write
* lock fail and need to issue a wakeup:
*/
if (old.write_locking)
six_lock_wakeup(lock, old, SIX_LOCK_write);
if (ret)
six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
else
six_lock_wakeup(lock, old, SIX_LOCK_write);
return ret;
}