mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-02 00:00:03 +03:00
Update bcachefs sources to c9b4a210f9 fixup! bcachefs: Fixes for going RO
This commit is contained in:
parent
90d54b3886
commit
1952c0790c
@ -1 +1 @@
|
||||
e1f6739c4a9fee1db7d94a5087a253041542cb62
|
||||
c9b4a210f946889f56654dda24dd8ced3b1aac24
|
||||
|
@ -208,29 +208,25 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
get_alloc_field(a.v, &d, i));
|
||||
}
|
||||
|
||||
int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
|
||||
static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
|
||||
unsigned level, struct bkey_s_c k)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_and_journal_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
bch2_btree_and_journal_iter_init(&iter, &trans, journal_keys,
|
||||
BTREE_ID_ALLOC, POS_MIN);
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
if (!level)
|
||||
bch2_mark_key(c, k, 0, 0, NULL, 0,
|
||||
BTREE_TRIGGER_ALLOC_READ|
|
||||
BTREE_TRIGGER_NOATOMIC);
|
||||
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = bch2_trans_exit(&trans) ?: ret;
|
||||
int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_ALLOC,
|
||||
NULL, bch2_alloc_read_fn);
|
||||
if (ret) {
|
||||
bch_err(c, "error reading alloc info: %i", ret);
|
||||
return ret;
|
||||
@ -847,7 +843,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
|
||||
struct bkey_s_c k;
|
||||
bool invalidating_cached_data;
|
||||
size_t b;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(!ca->alloc_heap.used ||
|
||||
!ca->alloc_heap.data[0].nr);
|
||||
@ -861,11 +857,27 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
|
||||
|
||||
BUG_ON(!fifo_push(&ca->free_inc, b));
|
||||
|
||||
g = bucket(ca, b);
|
||||
m = READ_ONCE(g->mark);
|
||||
|
||||
bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
invalidating_cached_data = m.cached_sectors != 0;
|
||||
if (!invalidating_cached_data)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* If the read-only path is trying to shut down, we can't be generating
|
||||
* new btree updates:
|
||||
*/
|
||||
if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
|
||||
|
||||
bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
|
||||
@ -919,7 +931,7 @@ retry:
|
||||
flags);
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
||||
out:
|
||||
if (!ret) {
|
||||
/* remove from alloc_heap: */
|
||||
struct alloc_heap_entry e, *top = ca->alloc_heap.data;
|
||||
@ -953,7 +965,7 @@ retry:
|
||||
percpu_up_read(&c->mark_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
|
||||
static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
@ -1465,11 +1477,6 @@ again:
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (c->btree_roots_dirty) {
|
||||
bch2_journal_meta(&c->journal);
|
||||
goto again;
|
||||
}
|
||||
|
||||
return !nodes_unwritten &&
|
||||
!bch2_btree_interior_updates_nr_pending(c);
|
||||
}
|
||||
|
@ -477,8 +477,10 @@ struct bch_dev {
|
||||
enum {
|
||||
/* startup: */
|
||||
BCH_FS_ALLOC_READ_DONE,
|
||||
BCH_FS_ALLOC_CLEAN,
|
||||
BCH_FS_ALLOCATOR_STARTED,
|
||||
BCH_FS_ALLOCATOR_RUNNING,
|
||||
BCH_FS_ALLOCATOR_STOPPING,
|
||||
BCH_FS_INITIAL_GC_DONE,
|
||||
BCH_FS_FSCK_DONE,
|
||||
BCH_FS_STARTED,
|
||||
@ -600,13 +602,10 @@ struct bch_fs {
|
||||
struct bio_set btree_bio;
|
||||
|
||||
struct btree_root btree_roots[BTREE_ID_NR];
|
||||
bool btree_roots_dirty;
|
||||
struct mutex btree_root_lock;
|
||||
|
||||
struct btree_cache btree_cache;
|
||||
|
||||
mempool_t btree_reserve_pool;
|
||||
|
||||
/*
|
||||
* Cache of allocated btree nodes - if we allocate a btree node and
|
||||
* don't use it, if we free it that space can't be reused until going
|
||||
@ -624,6 +623,12 @@ struct bch_fs {
|
||||
struct mutex btree_interior_update_lock;
|
||||
struct closure_waitlist btree_interior_update_wait;
|
||||
|
||||
struct workqueue_struct *btree_interior_update_worker;
|
||||
struct work_struct btree_interior_update_work;
|
||||
|
||||
/* btree_iter.c: */
|
||||
struct mutex btree_trans_lock;
|
||||
struct list_head btree_trans_list;
|
||||
mempool_t btree_iters_pool;
|
||||
|
||||
struct workqueue_struct *wq;
|
||||
|
@ -1262,6 +1262,8 @@ LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60);
|
||||
|
||||
LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61);
|
||||
|
||||
LE64_BITMASK(BCH_SB_REFLINK, struct bch_sb, flags[0], 61, 62);
|
||||
|
||||
/* 61-64 unused */
|
||||
|
||||
LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);
|
||||
|
@ -176,13 +176,17 @@ void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
|
||||
|
||||
void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
|
||||
{
|
||||
pr_buf(out, "u64s %u type %s ", k->u64s,
|
||||
bch2_bkey_types[k->type]);
|
||||
if (k) {
|
||||
pr_buf(out, "u64s %u type %s ", k->u64s,
|
||||
bch2_bkey_types[k->type]);
|
||||
|
||||
bch2_bpos_to_text(out, k->p);
|
||||
bch2_bpos_to_text(out, k->p);
|
||||
|
||||
pr_buf(out, " snap %u len %u ver %llu",
|
||||
k->p.snapshot, k->size, k->version.lo);
|
||||
pr_buf(out, " snap %u len %u ver %llu",
|
||||
k->p.snapshot, k->size, k->version.lo);
|
||||
} else {
|
||||
pr_buf(out, "(null)");
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
@ -198,8 +202,11 @@ void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
bch2_bkey_to_text(out, k.k);
|
||||
pr_buf(out, ": ");
|
||||
bch2_val_to_text(out, c, k);
|
||||
|
||||
if (k.k) {
|
||||
pr_buf(out, ": ");
|
||||
bch2_val_to_text(out, c, k);
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_bkey_swab_val(struct bkey_s k)
|
||||
|
@ -553,7 +553,6 @@ out_unlock:
|
||||
|
||||
list_del_init(&b->list);
|
||||
mutex_unlock(&bc->lock);
|
||||
memalloc_nofs_restore(flags);
|
||||
out:
|
||||
b->flags = 0;
|
||||
b->written = 0;
|
||||
@ -566,6 +565,7 @@ out:
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
|
||||
start_time);
|
||||
|
||||
memalloc_nofs_restore(flags);
|
||||
return b;
|
||||
err:
|
||||
/* Try to cannibalize another cached btree node: */
|
||||
@ -581,6 +581,7 @@ err:
|
||||
}
|
||||
|
||||
mutex_unlock(&bc->lock);
|
||||
memalloc_nofs_restore(flags);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
@ -849,6 +850,18 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
|
||||
if (!parent)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* There's a corner case where a btree_iter might have a node locked
|
||||
* that is just outside its current pos - when
|
||||
* bch2_btree_iter_set_pos_same_leaf() gets to the end of the node.
|
||||
*
|
||||
* But the lock ordering checks in __bch2_btree_node_lock() go off of
|
||||
* iter->pos, not the node's key: so if the iterator is marked as
|
||||
* needing to be traversed, we risk deadlock if we don't bail out here:
|
||||
*/
|
||||
if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
|
||||
return ERR_PTR(-EINTR);
|
||||
|
||||
if (!bch2_btree_node_relock(iter, level + 1)) {
|
||||
ret = ERR_PTR(-EINTR);
|
||||
goto out;
|
||||
|
@ -464,6 +464,7 @@ static void bch2_mark_superblocks(struct bch_fs *c)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* Also see bch2_pending_btree_node_free_insert_done() */
|
||||
static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
|
||||
{
|
||||
@ -481,6 +482,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void bch2_mark_allocator_buckets(struct bch_fs *c)
|
||||
{
|
||||
@ -579,8 +581,10 @@ static int bch2_gc_done(struct bch_fs *c,
|
||||
#define copy_bucket_field(_f) \
|
||||
if (dst->b[b].mark._f != src->b[b].mark._f) { \
|
||||
if (verify) \
|
||||
fsck_err(c, "dev %u bucket %zu has wrong " #_f \
|
||||
fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f \
|
||||
": got %u, should be %u", i, b, \
|
||||
dst->b[b].mark.gen, \
|
||||
bch2_data_types[dst->b[b].mark.data_type],\
|
||||
dst->b[b].mark._f, src->b[b].mark._f); \
|
||||
dst->b[b]._mark._f = src->b[b].mark._f; \
|
||||
}
|
||||
@ -797,6 +801,10 @@ int bch2_gc(struct bch_fs *c, struct journal_keys *journal_keys,
|
||||
trace_gc_start(c);
|
||||
|
||||
down_write(&c->gc_lock);
|
||||
|
||||
/* flush interior btree updates: */
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
again:
|
||||
ret = bch2_gc_start(c, metadata_only);
|
||||
if (ret)
|
||||
@ -808,7 +816,9 @@ again:
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
#if 0
|
||||
bch2_mark_pending_btree_node_frees(c);
|
||||
#endif
|
||||
bch2_mark_allocator_buckets(c);
|
||||
|
||||
c->gc_count++;
|
||||
@ -1033,6 +1043,8 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
|
||||
btree_node_reset_sib_u64s(n);
|
||||
|
||||
bch2_btree_build_aux_trees(n);
|
||||
|
||||
bch2_btree_update_add_new_node(as, n);
|
||||
six_unlock_write(&n->lock);
|
||||
|
||||
bch2_btree_node_write(c, n, SIX_LOCK_intent);
|
||||
@ -1081,7 +1093,7 @@ next:
|
||||
bch2_btree_iter_node_replace(iter, new_nodes[0]);
|
||||
|
||||
for (i = 0; i < nr_new_nodes; i++)
|
||||
bch2_open_buckets_put(c, &new_nodes[i]->ob);
|
||||
bch2_btree_update_get_open_buckets(as, new_nodes[i]);
|
||||
|
||||
/* Free the old nodes and update our sliding window */
|
||||
for (i = 0; i < nr_old_nodes; i++) {
|
||||
|
@ -631,14 +631,14 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
|
||||
struct btree *b, struct bset *i,
|
||||
unsigned offset, int write)
|
||||
{
|
||||
pr_buf(out, "error validating btree node %s"
|
||||
"at btree %u level %u/%u\n"
|
||||
"pos %llu:%llu node offset %u",
|
||||
pr_buf(out, "error validating btree node %sat btree %u level %u/%u\n"
|
||||
"pos ",
|
||||
write ? "before write " : "",
|
||||
b->btree_id, b->level,
|
||||
c->btree_roots[b->btree_id].level,
|
||||
b->key.k.p.inode, b->key.k.p.offset,
|
||||
b->written);
|
||||
c->btree_roots[b->btree_id].level);
|
||||
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
pr_buf(out, " node offset %u", b->written);
|
||||
if (i)
|
||||
pr_buf(out, " bset u64s %u", le16_to_cpu(i->u64s));
|
||||
}
|
||||
@ -944,7 +944,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
|
||||
btree_err_on(b->data->keys.seq != bp->seq,
|
||||
BTREE_ERR_MUST_RETRY, c, b, NULL,
|
||||
"got wrong btree node");
|
||||
"got wrong btree node (seq %llx want %llx)",
|
||||
b->data->keys.seq, bp->seq);
|
||||
}
|
||||
|
||||
while (b->written < c->opts.btree_node_size) {
|
||||
|
@ -205,8 +205,9 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
if (!linked->nodes_locked)
|
||||
continue;
|
||||
|
||||
/* * Must lock btree nodes in key order: */
|
||||
if (__btree_iter_cmp(iter->btree_id, pos, linked) < 0)
|
||||
/* Must lock btree nodes in key order: */
|
||||
if ((cmp_int(iter->btree_id, linked->btree_id) ?:
|
||||
bkey_cmp(pos, linked->pos)) < 0)
|
||||
ret = false;
|
||||
|
||||
/*
|
||||
@ -1320,6 +1321,16 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
|
||||
|
||||
btree_iter_advance_to_pos(iter, l, -1);
|
||||
|
||||
/*
|
||||
* XXX:
|
||||
* keeping a node locked that's outside (even just outside) iter->pos
|
||||
* breaks __bch2_btree_node_lock(). This seems to only affect
|
||||
* bch2_btree_node_get_sibling so for now it's fixed there, but we
|
||||
* should try to get rid of this corner case.
|
||||
*
|
||||
* (this behaviour is currently needed for BTREE_INSERT_NOUNLOCK)
|
||||
*/
|
||||
|
||||
if (bch2_btree_node_iter_end(&l->iter) &&
|
||||
btree_iter_pos_after_node(iter, l->b))
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
|
||||
@ -1912,7 +1923,7 @@ static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
|
||||
struct btree_iter *iter;
|
||||
|
||||
trans_for_each_iter(trans, iter) {
|
||||
pr_err("iter: btree %s pos %llu:%llu%s%s%s %pf",
|
||||
pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
|
||||
bch2_btree_ids[iter->btree_id],
|
||||
iter->pos.inode,
|
||||
iter->pos.offset,
|
||||
@ -2153,6 +2164,9 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
|
||||
trans->nr_updates2 = 0;
|
||||
trans->mem_top = 0;
|
||||
|
||||
trans->extra_journal_entries = NULL;
|
||||
trans->extra_journal_entry_u64s = 0;
|
||||
|
||||
if (trans->fs_usage_deltas) {
|
||||
trans->fs_usage_deltas->used = 0;
|
||||
memset(&trans->fs_usage_deltas->memset_start, 0,
|
||||
@ -2189,12 +2203,25 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
|
||||
|
||||
if (expected_mem_bytes)
|
||||
bch2_trans_preload_mem(trans, expected_mem_bytes);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans->pid = current->pid;
|
||||
mutex_lock(&c->btree_trans_lock);
|
||||
list_add(&trans->list, &c->btree_trans_list);
|
||||
mutex_unlock(&c->btree_trans_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
int bch2_trans_exit(struct btree_trans *trans)
|
||||
{
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
mutex_lock(&trans->c->btree_trans_lock);
|
||||
list_del(&trans->list);
|
||||
mutex_unlock(&trans->c->btree_trans_lock);
|
||||
#endif
|
||||
|
||||
kfree(trans->fs_usage_deltas);
|
||||
kfree(trans->mem);
|
||||
if (trans->used_mempool)
|
||||
@ -2207,6 +2234,51 @@ int bch2_trans_exit(struct btree_trans *trans)
|
||||
return trans->error ? -EIO : 0;
|
||||
}
|
||||
|
||||
void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
struct btree_trans *trans;
|
||||
struct btree_iter *iter;
|
||||
struct btree *b;
|
||||
unsigned l;
|
||||
|
||||
mutex_lock(&c->btree_trans_lock);
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||
pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip);
|
||||
|
||||
trans_for_each_iter(trans, iter) {
|
||||
if (!iter->nodes_locked)
|
||||
continue;
|
||||
|
||||
pr_buf(out, " iter %s:", bch2_btree_ids[iter->btree_id]);
|
||||
bch2_bpos_to_text(out, iter->pos);
|
||||
pr_buf(out, "\n");
|
||||
|
||||
for (l = 0; l < BTREE_MAX_DEPTH; l++) {
|
||||
if (btree_node_locked(iter, l)) {
|
||||
b = iter->l[l].b;
|
||||
|
||||
pr_buf(out, " %p l=%u %s ",
|
||||
b, l, btree_node_intent_locked(iter, l) ? "i" : "r");
|
||||
bch2_bpos_to_text(out, b->key.k.p);
|
||||
pr_buf(out, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
b = READ_ONCE(trans->locking);
|
||||
if (b) {
|
||||
pr_buf(out, " locking %px l=%u %s:",
|
||||
b, b->level,
|
||||
bch2_btree_ids[b->btree_id]);
|
||||
bch2_bpos_to_text(out, b->key.k.p);
|
||||
pr_buf(out, "\n");
|
||||
}
|
||||
}
|
||||
mutex_unlock(&c->btree_trans_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
void bch2_fs_btree_iter_exit(struct bch_fs *c)
|
||||
{
|
||||
mempool_exit(&c->btree_iters_pool);
|
||||
@ -2216,6 +2288,9 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
|
||||
{
|
||||
unsigned nr = BTREE_ITER_MAX;
|
||||
|
||||
INIT_LIST_HEAD(&c->btree_trans_list);
|
||||
mutex_init(&c->btree_trans_lock);
|
||||
|
||||
return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
|
||||
sizeof(struct btree_iter) * nr +
|
||||
sizeof(struct btree_insert_entry) * nr +
|
||||
|
@ -172,17 +172,10 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
|
||||
void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
|
||||
void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
|
||||
|
||||
static inline int __btree_iter_cmp(enum btree_id id,
|
||||
struct bpos pos,
|
||||
const struct btree_iter *r)
|
||||
{
|
||||
return cmp_int(id, r->btree_id) ?: bkey_cmp(pos, r->pos);
|
||||
}
|
||||
|
||||
static inline int btree_iter_cmp(const struct btree_iter *l,
|
||||
const struct btree_iter *r)
|
||||
{
|
||||
return __btree_iter_cmp(l->btree_id, l->pos, r);
|
||||
return cmp_int(l->btree_id, r->btree_id) ?: bkey_cmp(l->pos, r->pos);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -303,6 +296,8 @@ void *bch2_trans_kmalloc(struct btree_trans *, size_t);
|
||||
void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
|
||||
int bch2_trans_exit(struct btree_trans *);
|
||||
|
||||
void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
void bch2_fs_btree_iter_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_iter_init(struct bch_fs *);
|
||||
|
||||
|
@ -182,11 +182,21 @@ static inline bool btree_node_lock(struct btree *b, struct bpos pos,
|
||||
struct btree_iter *iter,
|
||||
enum six_lock_type type)
|
||||
{
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
bool ret;
|
||||
|
||||
return likely(six_trylock_type(&b->lock, type)) ||
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
iter->trans->locking = b;
|
||||
#endif
|
||||
|
||||
ret = likely(six_trylock_type(&b->lock, type)) ||
|
||||
btree_node_lock_increment(iter, b, level, type) ||
|
||||
__bch2_btree_node_lock(b, pos, level, iter, type);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
iter->trans->locking = NULL;
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool __bch2_btree_node_relock(struct btree_iter *, unsigned);
|
||||
|
@ -281,6 +281,11 @@ struct btree_insert_entry {
|
||||
|
||||
struct btree_trans {
|
||||
struct bch_fs *c;
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
struct list_head list;
|
||||
struct btree *locking;
|
||||
pid_t pid;
|
||||
#endif
|
||||
unsigned long ip;
|
||||
|
||||
u64 iters_linked;
|
||||
@ -305,6 +310,10 @@ struct btree_trans {
|
||||
struct btree_insert_entry *updates2;
|
||||
|
||||
/* update path: */
|
||||
struct jset_entry *extra_journal_entries;
|
||||
unsigned extra_journal_entry_u64s;
|
||||
struct journal_entry_pin *journal_pin;
|
||||
|
||||
struct journal_res journal_res;
|
||||
struct journal_preres journal_preres;
|
||||
u64 *journal_seq;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6,34 +6,13 @@
|
||||
#include "btree_locking.h"
|
||||
#include "btree_update.h"
|
||||
|
||||
struct btree_reserve {
|
||||
struct disk_reservation disk_res;
|
||||
unsigned nr;
|
||||
struct btree *b[BTREE_RESERVE_MAX];
|
||||
};
|
||||
|
||||
void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *);
|
||||
bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *,
|
||||
struct bkey_format *);
|
||||
|
||||
/* Btree node freeing/allocation: */
|
||||
#define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
|
||||
|
||||
/*
|
||||
* Tracks a btree node that has been (or is about to be) freed in memory, but
|
||||
* has _not_ yet been freed on disk (because the write that makes the new
|
||||
* node(s) visible and frees the old hasn't completed yet)
|
||||
*/
|
||||
struct pending_btree_node_free {
|
||||
bool index_update_done;
|
||||
|
||||
__le64 seq;
|
||||
enum btree_id btree_id;
|
||||
unsigned level;
|
||||
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
|
||||
};
|
||||
|
||||
#define BTREE_UPDATE_JOURNAL_RES \
|
||||
((BKEY_BTREE_PTR_U64s_MAX + 1) * (BTREE_MAX_DEPTH - 1) * 2)
|
||||
#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
|
||||
|
||||
/*
|
||||
* Tracks an in progress split/rewrite of a btree node and the update to the
|
||||
@ -72,9 +51,8 @@ struct btree_update {
|
||||
unsigned nodes_written:1;
|
||||
|
||||
enum btree_id btree_id;
|
||||
u8 level;
|
||||
|
||||
struct btree_reserve *reserve;
|
||||
struct disk_reservation disk_res;
|
||||
struct journal_preres journal_preres;
|
||||
|
||||
/*
|
||||
@ -96,17 +74,28 @@ struct btree_update {
|
||||
*/
|
||||
struct journal_entry_pin journal;
|
||||
|
||||
/*
|
||||
* Nodes being freed:
|
||||
* Protected by c->btree_node_pending_free_lock
|
||||
*/
|
||||
struct pending_btree_node_free pending[BTREE_MAX_DEPTH + GC_MERGE_NODES];
|
||||
unsigned nr_pending;
|
||||
/* Preallocated nodes we reserve when we start the update: */
|
||||
struct btree *prealloc_nodes[BTREE_UPDATE_NODES_MAX];
|
||||
unsigned nr_prealloc_nodes;
|
||||
|
||||
/* Nodes being freed: */
|
||||
struct keylist old_keys;
|
||||
u64 _old_keys[BTREE_UPDATE_NODES_MAX *
|
||||
BKEY_BTREE_PTR_VAL_U64s_MAX];
|
||||
|
||||
/* Nodes being added: */
|
||||
struct keylist new_keys;
|
||||
u64 _new_keys[BTREE_UPDATE_NODES_MAX *
|
||||
BKEY_BTREE_PTR_VAL_U64s_MAX];
|
||||
|
||||
/* New nodes, that will be made reachable by this update: */
|
||||
struct btree *new_nodes[BTREE_MAX_DEPTH * 2 + GC_MERGE_NODES];
|
||||
struct btree *new_nodes[BTREE_UPDATE_NODES_MAX];
|
||||
unsigned nr_new_nodes;
|
||||
|
||||
u8 open_buckets[BTREE_UPDATE_NODES_MAX *
|
||||
BCH_REPLICAS_MAX];
|
||||
u8 nr_open_buckets;
|
||||
|
||||
unsigned journal_u64s;
|
||||
u64 journal_entries[BTREE_UPDATE_JOURNAL_RES];
|
||||
|
||||
@ -120,14 +109,12 @@ struct btree_update {
|
||||
u64 inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3];
|
||||
};
|
||||
|
||||
#define for_each_pending_btree_node_free(c, as, p) \
|
||||
list_for_each_entry(as, &c->btree_interior_update_list, list) \
|
||||
for (p = as->pending; p < as->pending + as->nr_pending; p++)
|
||||
|
||||
void bch2_btree_node_free_inmem(struct bch_fs *, struct btree *,
|
||||
struct btree_iter *);
|
||||
void bch2_btree_node_free_never_inserted(struct bch_fs *, struct btree *);
|
||||
|
||||
void bch2_btree_update_get_open_buckets(struct btree_update *, struct btree *);
|
||||
|
||||
struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
|
||||
struct btree *,
|
||||
struct bkey_format);
|
||||
@ -139,6 +126,7 @@ bch2_btree_update_start(struct btree_trans *, enum btree_id, unsigned,
|
||||
|
||||
void bch2_btree_interior_update_will_free_node(struct btree_update *,
|
||||
struct btree *);
|
||||
void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
|
||||
|
||||
void bch2_btree_insert_node(struct btree_update *, struct btree *,
|
||||
struct btree_iter *, struct keylist *,
|
||||
@ -333,4 +321,11 @@ ssize_t bch2_btree_updates_print(struct bch_fs *, char *);
|
||||
|
||||
size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *);
|
||||
|
||||
void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *);
|
||||
struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
|
||||
struct jset_entry *, struct jset_entry *);
|
||||
|
||||
void bch2_fs_btree_interior_update_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_interior_update_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */
|
||||
|
@ -413,6 +413,15 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (unlikely(trans->extra_journal_entry_u64s)) {
|
||||
memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
|
||||
trans->extra_journal_entries,
|
||||
trans->extra_journal_entry_u64s);
|
||||
|
||||
trans->journal_res.offset += trans->extra_journal_entry_u64s;
|
||||
trans->journal_res.u64s -= trans->extra_journal_entry_u64s;
|
||||
}
|
||||
|
||||
/*
|
||||
* Not allowed to fail after we've gotten our journal reservation - we
|
||||
* have to use it:
|
||||
@ -511,6 +520,10 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b,
|
||||
i->iter);
|
||||
|
||||
if (!ret && trans->journal_pin)
|
||||
bch2_journal_pin_add(&trans->c->journal, trans->journal_res.seq,
|
||||
trans->journal_pin, NULL);
|
||||
|
||||
/*
|
||||
* Drop journal reservation after dropping write locks, since dropping
|
||||
* the journal reservation may kick off a journal write:
|
||||
@ -800,7 +813,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
|
||||
|
||||
memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
|
||||
|
||||
trans->journal_u64s = 0;
|
||||
trans->journal_u64s = trans->extra_journal_entry_u64s;
|
||||
trans->journal_preres_u64s = 0;
|
||||
|
||||
if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
|
||||
|
@ -778,29 +778,31 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
|
||||
})
|
||||
|
||||
static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, enum bch_data_type type,
|
||||
size_t b, enum bch_data_type data_type,
|
||||
unsigned sectors, bool gc)
|
||||
{
|
||||
struct bucket *g = __bucket(ca, b, gc);
|
||||
struct bucket_mark old, new;
|
||||
bool overflow;
|
||||
|
||||
BUG_ON(type != BCH_DATA_SB &&
|
||||
type != BCH_DATA_JOURNAL);
|
||||
BUG_ON(data_type != BCH_DATA_SB &&
|
||||
data_type != BCH_DATA_JOURNAL);
|
||||
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
new.data_type = type;
|
||||
new.data_type = data_type;
|
||||
overflow = checked_add(new.dirty_sectors, sectors);
|
||||
}));
|
||||
|
||||
bch2_fs_inconsistent_on(old.data_type &&
|
||||
old.data_type != type, c,
|
||||
old.data_type != data_type, c,
|
||||
"different types of data in same bucket: %s, %s",
|
||||
bch2_data_types[old.data_type],
|
||||
bch2_data_types[type]);
|
||||
bch2_data_types[data_type]);
|
||||
|
||||
bch2_fs_inconsistent_on(overflow, c,
|
||||
"bucket sector count overflow: %u + %u > U16_MAX",
|
||||
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > U16_MAX",
|
||||
ca->dev_idx, b, new.gen,
|
||||
bch2_data_types[old.data_type ?: data_type],
|
||||
old.dirty_sectors, sectors);
|
||||
|
||||
if (c)
|
||||
@ -916,58 +918,117 @@ static void bucket_set_stripe(struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
static bool bch2_mark_pointer(struct bch_fs *c,
|
||||
struct extent_ptr_decoded p,
|
||||
s64 sectors, enum bch_data_type data_type,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
u64 journal_seq, unsigned flags)
|
||||
static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct extent_ptr_decoded p,
|
||||
s64 sectors, enum bch_data_type ptr_data_type,
|
||||
u8 bucket_gen, u8 *bucket_data_type,
|
||||
u16 *dirty_sectors, u16 *cached_sectors)
|
||||
{
|
||||
u16 *dst_sectors = !p.ptr.cached
|
||||
? dirty_sectors
|
||||
: cached_sectors;
|
||||
u16 orig_sectors = *dst_sectors;
|
||||
char buf[200];
|
||||
|
||||
if (gen_after(p.ptr.gen, bucket_gen)) {
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
|
||||
bucket_gen,
|
||||
bch2_data_types[*bucket_data_type ?: ptr_data_type],
|
||||
p.ptr.gen,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (gen_cmp(bucket_gen, p.ptr.gen) >= 96U) {
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
|
||||
bucket_gen,
|
||||
bch2_data_types[*bucket_data_type ?: ptr_data_type],
|
||||
p.ptr.gen,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (bucket_gen != p.ptr.gen && !p.ptr.cached) {
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
|
||||
bucket_gen,
|
||||
bch2_data_types[*bucket_data_type ?: ptr_data_type],
|
||||
p.ptr.gen,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (bucket_gen != p.ptr.gen)
|
||||
return 1;
|
||||
|
||||
if (*bucket_data_type && *bucket_data_type != ptr_data_type) {
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
|
||||
bucket_gen,
|
||||
bch2_data_types[*bucket_data_type],
|
||||
bch2_data_types[ptr_data_type],
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (checked_add(*dst_sectors, sectors)) {
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
|
||||
bucket_gen,
|
||||
bch2_data_types[*bucket_data_type ?: ptr_data_type],
|
||||
orig_sectors, sectors,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
*bucket_data_type = *dirty_sectors || *cached_sectors
|
||||
? ptr_data_type : 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct extent_ptr_decoded p,
|
||||
s64 sectors, enum bch_data_type data_type,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
u64 journal_seq, unsigned flags)
|
||||
{
|
||||
bool gc = flags & BTREE_TRIGGER_GC;
|
||||
struct bucket_mark old, new;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc);
|
||||
bool overflow;
|
||||
u8 bucket_data_type;
|
||||
u64 v;
|
||||
int ret;
|
||||
|
||||
v = atomic64_read(&g->_mark.v);
|
||||
do {
|
||||
new.v.counter = old.v.counter = v;
|
||||
bucket_data_type = new.data_type;
|
||||
|
||||
/*
|
||||
* Check this after reading bucket mark to guard against
|
||||
* the allocator invalidating a bucket after we've already
|
||||
* checked the gen
|
||||
*/
|
||||
if (gen_after(p.ptr.gen, new.gen)) {
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"pointer gen in the future");
|
||||
return true;
|
||||
}
|
||||
ret = __mark_pointer(c, k, p, sectors, data_type, new.gen,
|
||||
&bucket_data_type,
|
||||
&new.dirty_sectors,
|
||||
&new.cached_sectors);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (new.gen != p.ptr.gen) {
|
||||
/* XXX write repair code for this */
|
||||
if (!p.ptr.cached &&
|
||||
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"stale dirty pointer");
|
||||
return true;
|
||||
}
|
||||
new.data_type = bucket_data_type;
|
||||
|
||||
if (!p.ptr.cached)
|
||||
overflow = checked_add(new.dirty_sectors, sectors);
|
||||
else
|
||||
overflow = checked_add(new.cached_sectors, sectors);
|
||||
|
||||
if (!new.dirty_sectors &&
|
||||
!new.cached_sectors) {
|
||||
new.data_type = 0;
|
||||
|
||||
if (journal_seq) {
|
||||
new.journal_seq_valid = 1;
|
||||
new.journal_seq = journal_seq;
|
||||
}
|
||||
} else {
|
||||
new.data_type = data_type;
|
||||
if (journal_seq) {
|
||||
new.journal_seq_valid = 1;
|
||||
new.journal_seq = journal_seq;
|
||||
}
|
||||
|
||||
if (flags & BTREE_TRIGGER_NOATOMIC) {
|
||||
@ -978,25 +1039,11 @@ static bool bch2_mark_pointer(struct bch_fs *c,
|
||||
old.v.counter,
|
||||
new.v.counter)) != old.v.counter);
|
||||
|
||||
if (old.data_type && old.data_type != data_type)
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"bucket %u:%zu gen %u different types of data in same bucket: %s, %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
||||
new.gen,
|
||||
bch2_data_types[old.data_type],
|
||||
bch2_data_types[data_type]);
|
||||
|
||||
bch2_fs_inconsistent_on(overflow, c,
|
||||
"bucket sector count overflow: %u + %lli > U16_MAX",
|
||||
!p.ptr.cached
|
||||
? old.dirty_sectors
|
||||
: old.cached_sectors, sectors);
|
||||
|
||||
bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
|
||||
|
||||
BUG_ON(!gc && bucket_became_unavailable(old, new));
|
||||
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_mark_stripe_ptr(struct bch_fs *c,
|
||||
@ -1060,6 +1107,7 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct extent_ptr_decoded p;
|
||||
struct bch_replicas_padded r;
|
||||
s64 dirty_sectors = 0;
|
||||
bool stale;
|
||||
int ret;
|
||||
|
||||
r.e.data_type = data_type;
|
||||
@ -1072,8 +1120,13 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
|
||||
s64 disk_sectors = data_type == BCH_DATA_BTREE
|
||||
? sectors
|
||||
: ptr_disk_sectors_delta(p, offset, sectors, flags);
|
||||
bool stale = bch2_mark_pointer(c, p, disk_sectors, data_type,
|
||||
fs_usage, journal_seq, flags);
|
||||
|
||||
ret = bch2_mark_pointer(c, k, p, disk_sectors, data_type,
|
||||
fs_usage, journal_seq, flags);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
stale = ret > 0;
|
||||
|
||||
if (p.ptr.cached) {
|
||||
if (!stale)
|
||||
@ -1175,7 +1228,7 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_mark_key_locked(struct bch_fs *c,
|
||||
static int bch2_mark_key_locked(struct bch_fs *c,
|
||||
struct bkey_s_c k,
|
||||
unsigned offset, s64 sectors,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
@ -1434,29 +1487,30 @@ static int trans_get_key(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
static int bch2_trans_mark_pointer(struct btree_trans *trans,
|
||||
struct extent_ptr_decoded p,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||
s64 sectors, enum bch_data_type data_type)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
struct btree_iter *iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c k_a;
|
||||
struct bkey_alloc_unpacked u;
|
||||
struct bkey_i_alloc *a;
|
||||
u16 *dst_sectors, orig_sectors;
|
||||
int ret;
|
||||
|
||||
ret = trans_get_key(trans, BTREE_ID_ALLOC,
|
||||
POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)),
|
||||
&iter, &k);
|
||||
&iter, &k_a);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (!ret && unlikely(!test_bit(BCH_FS_ALLOC_WRITTEN, &c->flags))) {
|
||||
if (k_a.k->type != KEY_TYPE_alloc ||
|
||||
(!ret && unlikely(!test_bit(BCH_FS_ALLOC_WRITTEN, &c->flags)))) {
|
||||
/*
|
||||
* During journal replay, and if gc repairs alloc info at
|
||||
* runtime, the alloc info in the btree might not be up to date
|
||||
* yet - so, trust the in memory mark:
|
||||
* yet - so, trust the in memory mark - unless we're already
|
||||
* updating that key:
|
||||
*/
|
||||
struct bucket *g;
|
||||
struct bucket_mark m;
|
||||
@ -1467,52 +1521,13 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
|
||||
u = alloc_mem_to_key(g, m);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
} else {
|
||||
/*
|
||||
* Unless we're already updating that key:
|
||||
*/
|
||||
if (k.k->type != KEY_TYPE_alloc) {
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"pointer to nonexistent bucket %llu:%llu",
|
||||
iter->pos.inode, iter->pos.offset);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
u = bch2_alloc_unpack(k);
|
||||
u = bch2_alloc_unpack(k_a);
|
||||
}
|
||||
|
||||
if (gen_after(u.gen, p.ptr.gen)) {
|
||||
ret = 1;
|
||||
ret = __mark_pointer(c, k, p, sectors, data_type, u.gen, &u.data_type,
|
||||
&u.dirty_sectors, &u.cached_sectors);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (u.data_type && u.data_type != data_type) {
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s",
|
||||
iter->pos.inode, iter->pos.offset,
|
||||
u.gen,
|
||||
bch2_data_types[u.data_type],
|
||||
bch2_data_types[data_type]);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
dst_sectors = !p.ptr.cached
|
||||
? &u.dirty_sectors
|
||||
: &u.cached_sectors;
|
||||
orig_sectors = *dst_sectors;
|
||||
|
||||
if (checked_add(*dst_sectors, sectors)) {
|
||||
bch2_fs_inconsistent(c,
|
||||
"bucket sector count overflow: %u + %lli > U16_MAX",
|
||||
orig_sectors, sectors);
|
||||
/* return an error indicating that we need full fsck */
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
u.data_type = u.dirty_sectors || u.cached_sectors
|
||||
? data_type : 0;
|
||||
|
||||
a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
|
||||
ret = PTR_ERR_OR_ZERO(a);
|
||||
@ -1597,7 +1612,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
|
||||
? sectors
|
||||
: ptr_disk_sectors_delta(p, offset, sectors, flags);
|
||||
|
||||
ret = bch2_trans_mark_pointer(trans, p, disk_sectors,
|
||||
ret = bch2_trans_mark_pointer(trans, k, p, disk_sectors,
|
||||
data_type);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
@ -259,8 +259,6 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
|
||||
size_t, enum bch_data_type, unsigned,
|
||||
struct gc_pos, unsigned);
|
||||
|
||||
int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, unsigned, s64,
|
||||
struct bch_fs_usage *, u64, unsigned);
|
||||
int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned, s64,
|
||||
struct bch_fs_usage *, u64, unsigned);
|
||||
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
|
||||
|
@ -162,7 +162,7 @@ ssize_t bch2_io_timers_show(struct io_clock *clock, char *buf)
|
||||
now = atomic_long_read(&clock->now);
|
||||
|
||||
for (i = 0; i < clock->timers.used; i++)
|
||||
pr_buf(&out, "%pf:\t%li\n",
|
||||
pr_buf(&out, "%ps:\t%li\n",
|
||||
clock->timers.data[i]->fn,
|
||||
clock->timers.data[i]->expire - now);
|
||||
spin_unlock(&clock->timer_lock);
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/lz4.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/zlib.h>
|
||||
#include <linux/zstd.h>
|
||||
|
||||
@ -63,7 +64,7 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
|
||||
struct bbuf ret;
|
||||
struct bio_vec bv;
|
||||
struct bvec_iter iter;
|
||||
unsigned nr_pages = 0;
|
||||
unsigned nr_pages = 0, flags;
|
||||
struct page *stack_pages[16];
|
||||
struct page **pages = NULL;
|
||||
void *data;
|
||||
@ -103,7 +104,10 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
|
||||
__bio_for_each_segment(bv, bio, iter, start)
|
||||
pages[nr_pages++] = bv.bv_page;
|
||||
|
||||
flags = memalloc_nofs_save();
|
||||
data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
|
||||
memalloc_nofs_restore(flags);
|
||||
|
||||
if (pages != stack_pages)
|
||||
kfree(pages);
|
||||
|
||||
@ -603,7 +607,7 @@ have_compressed:
|
||||
}
|
||||
|
||||
if (!mempool_initialized(&c->decompress_workspace)) {
|
||||
ret = mempool_init_kmalloc_pool(
|
||||
ret = mempool_init_kvpmalloc_pool(
|
||||
&c->decompress_workspace,
|
||||
1, decompress_workspace_size);
|
||||
if (ret)
|
||||
|
@ -104,7 +104,7 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
|
||||
bch_scnmemcpy(out, d.v->d_name,
|
||||
bch2_dirent_name_bytes(d));
|
||||
pr_buf(out, " -> %llu", d.v->d_inum);
|
||||
pr_buf(out, " -> %llu type %u", d.v->d_inum, d.v->d_type);
|
||||
}
|
||||
|
||||
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
|
||||
|
@ -1273,38 +1273,28 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
|
||||
unsigned level, struct bkey_s_c k)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (k.k->type == KEY_TYPE_stripe)
|
||||
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
|
||||
bch2_mark_key(c, k, 0, 0, NULL, 0,
|
||||
BTREE_TRIGGER_ALLOC_READ|
|
||||
BTREE_TRIGGER_NOATOMIC);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_and_journal_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
ret = bch2_fs_ec_start(c);
|
||||
int ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_EC,
|
||||
NULL, bch2_stripes_read_fn);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
bch2_btree_and_journal_iter_init(&iter, &trans, journal_keys,
|
||||
BTREE_ID_EC, POS_MIN);
|
||||
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
bch2_mark_key(c, k, 0, 0, NULL, 0,
|
||||
BTREE_TRIGGER_ALLOC_READ|
|
||||
BTREE_TRIGGER_NOATOMIC);
|
||||
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
}
|
||||
|
||||
ret = bch2_trans_exit(&trans) ?: ret;
|
||||
if (ret) {
|
||||
bch_err(c, "error reading stripes: %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
|
||||
@ -1343,11 +1333,6 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_fs_ec_start(struct bch_fs *c)
|
||||
{
|
||||
return bch2_ec_mem_alloc(c, false);
|
||||
}
|
||||
|
||||
void bch2_fs_ec_exit(struct bch_fs *c)
|
||||
{
|
||||
struct ec_stripe_head *h;
|
||||
|
@ -157,8 +157,6 @@ int bch2_stripes_write(struct bch_fs *, unsigned, bool *);
|
||||
|
||||
int bch2_ec_mem_alloc(struct bch_fs *, bool);
|
||||
|
||||
int bch2_fs_ec_start(struct bch_fs *);
|
||||
|
||||
void bch2_fs_ec_exit(struct bch_fs *);
|
||||
int bch2_fs_ec_init(struct bch_fs *);
|
||||
|
||||
|
@ -85,7 +85,7 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
|
||||
if (s->fmt == fmt)
|
||||
goto found;
|
||||
|
||||
s = kzalloc(sizeof(*s), GFP_KERNEL);
|
||||
s = kzalloc(sizeof(*s), GFP_NOFS);
|
||||
if (!s) {
|
||||
if (!c->fsck_alloc_err)
|
||||
bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
|
||||
|
@ -102,6 +102,7 @@ struct fsck_err_state {
|
||||
#define FSCK_CAN_IGNORE (1 << 1)
|
||||
#define FSCK_NEED_FSCK (1 << 2)
|
||||
|
||||
__printf(3, 4) __cold
|
||||
enum fsck_err_ret bch2_fsck_err(struct bch_fs *,
|
||||
unsigned, const char *, ...);
|
||||
void bch2_flush_fsck_errs(struct bch_fs *);
|
||||
|
@ -220,7 +220,7 @@ void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
{
|
||||
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
|
||||
|
||||
pr_buf(out, "seq %llu sectors %u written %u min_key ",
|
||||
pr_buf(out, "seq %llx sectors %u written %u min_key ",
|
||||
le64_to_cpu(bp.v->seq),
|
||||
le16_to_cpu(bp.v->sectors),
|
||||
le16_to_cpu(bp.v->sectors_written));
|
||||
|
@ -845,7 +845,7 @@ retry:
|
||||
sectors = k.k->size - offset_into_extent;
|
||||
|
||||
ret = bch2_read_indirect_extent(trans,
|
||||
&offset_into_extent, sk.k);
|
||||
&offset_into_extent, &sk);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
@ -2844,6 +2844,9 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
|
||||
u64 aligned_len;
|
||||
loff_t ret = 0;
|
||||
|
||||
if (!c->opts.reflink)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -889,7 +889,7 @@ retry:
|
||||
sectors = k.k->size - offset_into_extent;
|
||||
|
||||
ret = bch2_read_indirect_extent(&trans,
|
||||
&offset_into_extent, cur.k);
|
||||
&offset_into_extent, &cur);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
|
@ -1169,7 +1169,7 @@ static int check_inode_nlink(struct bch_fs *c,
|
||||
}
|
||||
|
||||
if (!S_ISDIR(u->bi_mode) && link->dir_count) {
|
||||
need_fsck_err(c, "non directory with subdirectories",
|
||||
need_fsck_err(c, "non directory with subdirectories (inum %llu)",
|
||||
u->bi_inum);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1641,7 +1641,7 @@ retry:
|
||||
sectors = k.k->size - offset_into_extent;
|
||||
|
||||
ret = bch2_read_indirect_extent(&trans,
|
||||
&offset_into_extent, sk.k);
|
||||
&offset_into_extent, &sk);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
@ -1943,14 +1943,14 @@ static void bch2_read_endio(struct bio *bio)
|
||||
|
||||
int __bch2_read_indirect_extent(struct btree_trans *trans,
|
||||
unsigned *offset_into_extent,
|
||||
struct bkey_i *orig_k)
|
||||
struct bkey_on_stack *orig_k)
|
||||
{
|
||||
struct btree_iter *iter;
|
||||
struct bkey_s_c k;
|
||||
u64 reflink_offset;
|
||||
int ret;
|
||||
|
||||
reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k)->v.idx) +
|
||||
reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
|
||||
*offset_into_extent;
|
||||
|
||||
iter = bch2_trans_get_iter(trans, BTREE_ID_REFLINK,
|
||||
@ -1973,7 +1973,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
*offset_into_extent = iter->pos.offset - bkey_start_offset(k.k);
|
||||
bkey_reassemble(orig_k, k);
|
||||
bkey_on_stack_reassemble(orig_k, trans->c, k);
|
||||
err:
|
||||
bch2_trans_iter_put(trans, iter);
|
||||
return ret;
|
||||
@ -2273,7 +2273,7 @@ retry:
|
||||
k = bkey_i_to_s_c(sk.k);
|
||||
|
||||
ret = bch2_read_indirect_extent(&trans,
|
||||
&offset_into_extent, sk.k);
|
||||
&offset_into_extent, &sk);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#define _BCACHEFS_IO_H
|
||||
|
||||
#include "checksum.h"
|
||||
#include "bkey_on_stack.h"
|
||||
#include "io_types.h"
|
||||
|
||||
#define to_wbio(_bio) \
|
||||
@ -110,13 +111,13 @@ struct cache_promote_op;
|
||||
struct extent_ptr_decoded;
|
||||
|
||||
int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
|
||||
struct bkey_i *);
|
||||
struct bkey_on_stack *);
|
||||
|
||||
static inline int bch2_read_indirect_extent(struct btree_trans *trans,
|
||||
unsigned *offset_into_extent,
|
||||
struct bkey_i *k)
|
||||
struct bkey_on_stack *k)
|
||||
{
|
||||
return k->k.type == KEY_TYPE_reflink_p
|
||||
return k->k->k.type == KEY_TYPE_reflink_p
|
||||
? __bch2_read_indirect_extent(trans, offset_into_extent, k)
|
||||
: 0;
|
||||
}
|
||||
|
@ -959,15 +959,12 @@ void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
|
||||
|
||||
void bch2_fs_journal_stop(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
|
||||
bch2_journal_flush_all_pins(j);
|
||||
|
||||
wait_event(j->wait, journal_entry_close(j));
|
||||
|
||||
/* do we need to write another journal entry? */
|
||||
if (test_bit(JOURNAL_NOT_EMPTY, &j->flags) ||
|
||||
c->btree_roots_dirty)
|
||||
if (test_bit(JOURNAL_NOT_EMPTY, &j->flags))
|
||||
bch2_journal_meta(j);
|
||||
|
||||
journal_quiesce(j);
|
||||
@ -1238,14 +1235,14 @@ ssize_t bch2_journal_print_pins(struct journal *j, char *buf)
|
||||
i, atomic_read(&pin_list->count));
|
||||
|
||||
list_for_each_entry(pin, &pin_list->list, list)
|
||||
pr_buf(&out, "\t%p %pf\n",
|
||||
pr_buf(&out, "\t%px %ps\n",
|
||||
pin, pin->flush);
|
||||
|
||||
if (!list_empty(&pin_list->flushed))
|
||||
pr_buf(&out, "flushed:\n");
|
||||
|
||||
list_for_each_entry(pin, &pin_list->flushed, list)
|
||||
pr_buf(&out, "\t%p %pf\n",
|
||||
pr_buf(&out, "\t%px %ps\n",
|
||||
pin, pin->flush);
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
|
@ -199,27 +199,39 @@ bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
|
||||
return entry;
|
||||
}
|
||||
|
||||
static inline struct jset_entry *
|
||||
journal_res_entry(struct journal *j, struct journal_res *res)
|
||||
{
|
||||
return vstruct_idx(j->buf[res->idx].data, res->offset);
|
||||
}
|
||||
|
||||
static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type,
|
||||
enum btree_id id, unsigned level,
|
||||
const void *data, unsigned u64s)
|
||||
{
|
||||
memset(entry, 0, sizeof(*entry));
|
||||
entry->u64s = cpu_to_le16(u64s);
|
||||
entry->type = type;
|
||||
entry->btree_id = id;
|
||||
entry->level = level;
|
||||
memcpy_u64s_small(entry->_data, data, u64s);
|
||||
|
||||
return jset_u64s(u64s);
|
||||
}
|
||||
|
||||
static inline void bch2_journal_add_entry(struct journal *j, struct journal_res *res,
|
||||
unsigned type, enum btree_id id,
|
||||
unsigned level,
|
||||
const void *data, unsigned u64s)
|
||||
{
|
||||
struct journal_buf *buf = &j->buf[res->idx];
|
||||
struct jset_entry *entry = vstruct_idx(buf->data, res->offset);
|
||||
unsigned actual = jset_u64s(u64s);
|
||||
unsigned actual = journal_entry_set(journal_res_entry(j, res),
|
||||
type, id, level, data, u64s);
|
||||
|
||||
EBUG_ON(!res->ref);
|
||||
EBUG_ON(actual > res->u64s);
|
||||
|
||||
res->offset += actual;
|
||||
res->u64s -= actual;
|
||||
|
||||
memset(entry, 0, sizeof(*entry));
|
||||
entry->u64s = cpu_to_le16(u64s);
|
||||
entry->type = type;
|
||||
entry->btree_id = id;
|
||||
entry->level = level;
|
||||
memcpy_u64s(entry->_data, data, u64s);
|
||||
}
|
||||
|
||||
static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *res,
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_foreground.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "buckets.h"
|
||||
#include "checksum.h"
|
||||
#include "error.h"
|
||||
@ -993,8 +994,23 @@ void bch2_journal_write(struct closure *cl)
|
||||
|
||||
j->write_start_time = local_clock();
|
||||
|
||||
start = vstruct_last(jset);
|
||||
end = bch2_journal_super_entries_add_common(c, start,
|
||||
/*
|
||||
* New btree roots are set by journalling them; when the journal entry
|
||||
* gets written we have to propagate them to c->btree_roots
|
||||
*
|
||||
* But, every journal entry we write has to contain all the btree roots
|
||||
* (at least for now); so after we copy btree roots to c->btree_roots we
|
||||
* have to get any missing btree roots and add them to this journal
|
||||
* entry:
|
||||
*/
|
||||
|
||||
bch2_journal_entries_to_btree_roots(c, jset);
|
||||
|
||||
start = end = vstruct_last(jset);
|
||||
|
||||
end = bch2_btree_roots_to_journal_entries(c, jset->start, end);
|
||||
|
||||
end = bch2_journal_super_entries_add_common(c, end,
|
||||
le64_to_cpu(jset->seq));
|
||||
u64s = (u64 *) end - (u64 *) start;
|
||||
BUG_ON(u64s > j->entry_u64s_reserved);
|
||||
|
@ -330,7 +330,7 @@ static void bch2_journal_pin_add_locked(struct journal *j, u64 seq,
|
||||
|
||||
__journal_pin_drop(j, pin);
|
||||
|
||||
BUG_ON(!atomic_read(&pin_list->count));
|
||||
BUG_ON(!atomic_read(&pin_list->count) && seq == journal_last_seq(j));
|
||||
|
||||
atomic_inc(&pin_list->count);
|
||||
pin->seq = seq;
|
||||
@ -413,10 +413,12 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
/* returns true if we did work */
|
||||
static bool journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
unsigned min_nr)
|
||||
{
|
||||
struct journal_entry_pin *pin;
|
||||
bool ret = false;
|
||||
u64 seq;
|
||||
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
@ -431,7 +433,10 @@ static void journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
BUG_ON(j->flush_in_progress != pin);
|
||||
j->flush_in_progress = NULL;
|
||||
wake_up(&j->pin_flush_wait);
|
||||
ret = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -523,7 +528,8 @@ void bch2_journal_reclaim_work(struct work_struct *work)
|
||||
mutex_unlock(&j->reclaim_lock);
|
||||
}
|
||||
|
||||
static int journal_flush_done(struct journal *j, u64 seq_to_flush)
|
||||
static int journal_flush_done(struct journal *j, u64 seq_to_flush,
|
||||
bool *did_work)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -533,7 +539,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush)
|
||||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
|
||||
journal_flush_pins(j, seq_to_flush, 0);
|
||||
*did_work = journal_flush_pins(j, seq_to_flush, 0);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
/*
|
||||
@ -551,12 +557,17 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
|
||||
bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
|
||||
{
|
||||
if (!test_bit(JOURNAL_STARTED, &j->flags))
|
||||
return;
|
||||
bool did_work = false;
|
||||
|
||||
closure_wait_event(&j->async_wait, journal_flush_done(j, seq_to_flush));
|
||||
if (!test_bit(JOURNAL_STARTED, &j->flags))
|
||||
return false;
|
||||
|
||||
closure_wait_event(&j->async_wait,
|
||||
journal_flush_done(j, seq_to_flush, &did_work));
|
||||
|
||||
return did_work;
|
||||
}
|
||||
|
||||
int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
|
||||
|
@ -38,7 +38,7 @@ static inline void bch2_journal_pin_add(struct journal *j, u64 seq,
|
||||
struct journal_entry_pin *pin,
|
||||
journal_pin_flush_fn flush_fn)
|
||||
{
|
||||
if (unlikely(!journal_pin_active(pin)))
|
||||
if (unlikely(!journal_pin_active(pin) || pin->seq > seq))
|
||||
__bch2_journal_pin_add(j, seq, pin, flush_fn);
|
||||
}
|
||||
|
||||
@ -53,11 +53,11 @@ void bch2_journal_do_discards(struct journal *);
|
||||
void bch2_journal_reclaim(struct journal *);
|
||||
void bch2_journal_reclaim_work(struct work_struct *);
|
||||
|
||||
void bch2_journal_flush_pins(struct journal *, u64);
|
||||
bool bch2_journal_flush_pins(struct journal *, u64);
|
||||
|
||||
static inline void bch2_journal_flush_all_pins(struct journal *j)
|
||||
static inline bool bch2_journal_flush_all_pins(struct journal *j)
|
||||
{
|
||||
bch2_journal_flush_pins(j, U64_MAX);
|
||||
return bch2_journal_flush_pins(j, U64_MAX);
|
||||
}
|
||||
|
||||
int bch2_journal_flush_device_pins(struct journal *, int);
|
||||
|
@ -6,7 +6,7 @@
|
||||
int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
|
||||
size_t nr_inline_u64s, size_t new_u64s)
|
||||
{
|
||||
size_t oldsize = bch_keylist_u64s(l);
|
||||
size_t oldsize = bch2_keylist_u64s(l);
|
||||
size_t newsize = oldsize + new_u64s;
|
||||
u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p;
|
||||
u64 *new_keys;
|
||||
@ -52,7 +52,7 @@ void bch2_keylist_pop_front(struct keylist *l)
|
||||
|
||||
memmove_u64s_down(l->keys,
|
||||
bkey_next(l->keys),
|
||||
bch_keylist_u64s(l));
|
||||
bch2_keylist_u64s(l));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
|
@ -36,14 +36,14 @@ static inline bool bch2_keylist_empty(struct keylist *l)
|
||||
return l->top == l->keys;
|
||||
}
|
||||
|
||||
static inline size_t bch_keylist_u64s(struct keylist *l)
|
||||
static inline size_t bch2_keylist_u64s(struct keylist *l)
|
||||
{
|
||||
return l->top_p - l->keys_p;
|
||||
}
|
||||
|
||||
static inline size_t bch2_keylist_bytes(struct keylist *l)
|
||||
{
|
||||
return bch_keylist_u64s(l) * sizeof(u64);
|
||||
return bch2_keylist_u64s(l) * sizeof(u64);
|
||||
}
|
||||
|
||||
static inline struct bkey_i *bch2_keylist_front(struct keylist *l)
|
||||
|
@ -151,15 +151,8 @@ retry:
|
||||
}
|
||||
|
||||
/* flush relevant btree updates */
|
||||
while (1) {
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c) ||
|
||||
c->btree_roots_dirty);
|
||||
if (c->btree_roots_dirty)
|
||||
bch2_journal_meta(&c->journal);
|
||||
if (!bch2_btree_interior_updates_nr_pending(c))
|
||||
break;
|
||||
}
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
|
||||
ret = 0;
|
||||
err:
|
||||
|
@ -775,14 +775,8 @@ int bch2_data_job(struct bch_fs *c,
|
||||
|
||||
ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
|
||||
|
||||
while (1) {
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c) ||
|
||||
c->btree_roots_dirty);
|
||||
if (!bch2_btree_interior_updates_nr_pending(c))
|
||||
break;
|
||||
bch2_journal_meta(&c->journal);
|
||||
}
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
|
||||
ret = bch2_replicas_gc2(c) ?: ret;
|
||||
|
||||
|
@ -207,6 +207,11 @@ enum opt_type {
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_PRJQUOTA, false, \
|
||||
NULL, "Enable project quotas") \
|
||||
x(reflink, u8, \
|
||||
OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_REFLINK, true, \
|
||||
NULL, "Enable reflink support") \
|
||||
x(degraded, u8, \
|
||||
OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
|
@ -191,6 +191,78 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *i
|
||||
b->btree_id, b->level, b->data->min_key);
|
||||
}
|
||||
|
||||
/* Walk btree, overlaying keys from the journal: */
|
||||
|
||||
static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
|
||||
struct journal_keys *journal_keys,
|
||||
enum btree_id btree_id,
|
||||
btree_walk_node_fn node_fn,
|
||||
btree_walk_key_fn key_fn)
|
||||
{
|
||||
struct btree_and_journal_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
ret = key_fn(c, btree_id, b->level, k);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (b->level) {
|
||||
struct btree *child;
|
||||
BKEY_PADDED(k) tmp;
|
||||
|
||||
bkey_reassemble(&tmp.k, k);
|
||||
k = bkey_i_to_s_c(&tmp.k);
|
||||
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
|
||||
if (b->level > 0) {
|
||||
child = bch2_btree_node_get_noiter(c, &tmp.k,
|
||||
b->btree_id, b->level - 1);
|
||||
ret = PTR_ERR_OR_ZERO(child);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = (node_fn ? node_fn(c, b) : 0) ?:
|
||||
bch2_btree_and_journal_walk_recurse(c, child,
|
||||
journal_keys, btree_id, node_fn, key_fn);
|
||||
six_unlock_read(&child->lock);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_keys,
|
||||
enum btree_id btree_id,
|
||||
btree_walk_node_fn node_fn,
|
||||
btree_walk_key_fn key_fn)
|
||||
{
|
||||
struct btree *b = c->btree_roots[btree_id].b;
|
||||
int ret = 0;
|
||||
|
||||
if (btree_node_fake(b))
|
||||
return 0;
|
||||
|
||||
six_lock_read(&b->lock);
|
||||
ret = (node_fn ? node_fn(c, b) : 0) ?:
|
||||
bch2_btree_and_journal_walk_recurse(c, b, journal_keys, btree_id,
|
||||
node_fn, key_fn) ?:
|
||||
key_fn(c, btree_id, b->level + 1, bkey_i_to_s_c(&b->key));
|
||||
six_unlock_read(&b->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* sort and dedup all keys in the journal: */
|
||||
|
||||
void bch2_journal_entries_free(struct list_head *list)
|
||||
@ -691,6 +763,7 @@ static int verify_superblock_clean(struct bch_fs *c,
|
||||
"superblock read clock doesn't match journal after clean shutdown");
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
char buf1[200], buf2[200];
|
||||
struct bkey_i *k1, *k2;
|
||||
unsigned l1 = 0, l2 = 0;
|
||||
|
||||
@ -706,7 +779,11 @@ static int verify_superblock_clean(struct bch_fs *c,
|
||||
k1->k.u64s != k2->k.u64s ||
|
||||
memcmp(k1, k2, bkey_bytes(k1)) ||
|
||||
l1 != l2, c,
|
||||
"superblock btree root doesn't match journal after clean shutdown");
|
||||
"superblock btree root %u doesn't match journal after clean shutdown\n"
|
||||
"sb: l=%u %s\n"
|
||||
"journal: l=%u %s\n", i,
|
||||
l1, (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(k1)), buf1),
|
||||
l2, (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(k2)), buf2));
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
@ -1077,6 +1154,15 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
bch2_mark_dev_superblock(c, ca, 0);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
c->disk_sb.sb->version = c->disk_sb.sb->version_min =
|
||||
le16_to_cpu(bcachefs_metadata_version_current);
|
||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
|
||||
c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
|
||||
@ -1135,11 +1221,6 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
goto err;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
c->disk_sb.sb->version = c->disk_sb.sb->version_min =
|
||||
le16_to_cpu(bcachefs_metadata_version_current);
|
||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
|
||||
c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
|
||||
|
||||
SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
|
||||
|
||||
|
@ -44,6 +44,13 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
||||
struct journal_keys *,
|
||||
struct btree *);
|
||||
|
||||
typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b);
|
||||
typedef int (*btree_walk_key_fn)(struct bch_fs *c, enum btree_id id,
|
||||
unsigned level, struct bkey_s_c k);
|
||||
|
||||
int bch2_btree_and_journal_walk(struct bch_fs *, struct journal_keys *, enum btree_id,
|
||||
btree_walk_node_fn, btree_walk_key_fn);
|
||||
|
||||
void bch2_journal_keys_free(struct journal_keys *);
|
||||
void bch2_journal_entries_free(struct list_head *);
|
||||
|
||||
|
@ -167,6 +167,9 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
u64 src_done, dst_done;
|
||||
int ret = 0, ret2 = 0;
|
||||
|
||||
if (!c->opts.reflink)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!percpu_ref_tryget(&c->writes))
|
||||
return -EROFS;
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "buckets.h"
|
||||
#include "checksum.h"
|
||||
#include "disk_groups.h"
|
||||
@ -955,7 +956,6 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
|
||||
c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA);
|
||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
|
||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates;
|
||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled;
|
||||
@ -989,27 +989,8 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
struct jset_entry *entry,
|
||||
u64 journal_seq)
|
||||
{
|
||||
struct btree_root *r;
|
||||
unsigned i;
|
||||
|
||||
mutex_lock(&c->btree_root_lock);
|
||||
|
||||
for (r = c->btree_roots;
|
||||
r < c->btree_roots + BTREE_ID_NR;
|
||||
r++)
|
||||
if (r->alive) {
|
||||
entry_init_u64s(entry, r->key.u64s + 1);
|
||||
entry->btree_id = r - c->btree_roots;
|
||||
entry->level = r->level;
|
||||
entry->type = BCH_JSET_ENTRY_btree_root;
|
||||
bkey_copy(&entry->start[0], &r->key);
|
||||
|
||||
entry = vstruct_next(entry);
|
||||
}
|
||||
c->btree_roots_dirty = false;
|
||||
|
||||
mutex_unlock(&c->btree_root_lock);
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
|
||||
if (!journal_seq) {
|
||||
@ -1110,6 +1091,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
|
||||
|
||||
entry = sb_clean->start;
|
||||
entry = bch2_journal_super_entries_add_common(c, entry, 0);
|
||||
entry = bch2_btree_roots_to_journal_entries(c, entry, entry);
|
||||
BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
|
||||
|
||||
memset(entry, 0,
|
||||
|
@ -207,7 +207,7 @@ int bch2_congested(void *data, int bdi_bits)
|
||||
static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
bool wrote;
|
||||
bool wrote = false;
|
||||
unsigned i, clean_passes = 0;
|
||||
int ret;
|
||||
|
||||
@ -224,48 +224,68 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
*/
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
|
||||
/*
|
||||
* If the allocator threads didn't all start up, the btree updates to
|
||||
* write out alloc info aren't going to work:
|
||||
*/
|
||||
if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
|
||||
goto allocator_not_running;
|
||||
goto nowrote_alloc;
|
||||
|
||||
bch_verbose(c, "writing alloc info");
|
||||
/*
|
||||
* This should normally just be writing the bucket read/write clocks:
|
||||
*/
|
||||
ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
|
||||
bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
|
||||
bch_verbose(c, "writing alloc info complete");
|
||||
|
||||
if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
|
||||
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
|
||||
|
||||
if (ret)
|
||||
goto nowrote_alloc;
|
||||
|
||||
bch_verbose(c, "flushing journal and stopping allocators");
|
||||
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
set_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
|
||||
|
||||
do {
|
||||
wrote = false;
|
||||
clean_passes++;
|
||||
|
||||
ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
|
||||
bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
|
||||
|
||||
if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
|
||||
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_allocator_quiesce(c, ca);
|
||||
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
if (bch2_journal_flush_all_pins(&c->journal))
|
||||
clean_passes = 0;
|
||||
|
||||
/*
|
||||
* We need to explicitly wait on btree interior updates to complete
|
||||
* before stopping the journal, flushing all journal pins isn't
|
||||
* sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree
|
||||
* interior updates have to drop their journal pin before they're
|
||||
* fully complete:
|
||||
* In flight interior btree updates will generate more journal
|
||||
* updates and btree updates (alloc btree):
|
||||
*/
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
if (bch2_btree_interior_updates_nr_pending(c)) {
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
clean_passes = 0;
|
||||
}
|
||||
flush_work(&c->btree_interior_update_work);
|
||||
|
||||
clean_passes = wrote ? 0 : clean_passes + 1;
|
||||
if (bch2_journal_flush_all_pins(&c->journal))
|
||||
clean_passes = 0;
|
||||
} while (clean_passes < 2);
|
||||
allocator_not_running:
|
||||
bch_verbose(c, "flushing journal and stopping allocators complete");
|
||||
|
||||
set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
|
||||
nowrote_alloc:
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
flush_work(&c->btree_interior_update_work);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_allocator_stop(ca);
|
||||
|
||||
clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
|
||||
clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
|
||||
|
||||
bch2_fs_journal_stop(&c->journal);
|
||||
|
||||
/* XXX: mark super that alloc info is persistent */
|
||||
|
||||
/*
|
||||
* the journal kicks off btree writes via reclaim - wait for in flight
|
||||
* writes after stopping journal:
|
||||
@ -338,8 +358,11 @@ void bch2_fs_read_only(struct bch_fs *c)
|
||||
!test_bit(BCH_FS_ERROR, &c->flags) &&
|
||||
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
|
||||
test_bit(BCH_FS_STARTED, &c->flags) &&
|
||||
!c->opts.norecovery)
|
||||
test_bit(BCH_FS_ALLOC_CLEAN, &c->flags) &&
|
||||
!c->opts.norecovery) {
|
||||
bch_verbose(c, "marking filesystem clean");
|
||||
bch2_fs_mark_clean(c);
|
||||
}
|
||||
|
||||
clear_bit(BCH_FS_RW, &c->flags);
|
||||
}
|
||||
@ -426,6 +449,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
clear_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
|
||||
|
||||
for_each_rw_member(ca, c, i)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
bch2_recalc_capacity(c);
|
||||
@ -494,6 +519,7 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
bch2_fs_ec_exit(c);
|
||||
bch2_fs_encryption_exit(c);
|
||||
bch2_fs_io_exit(c);
|
||||
bch2_fs_btree_interior_update_exit(c);
|
||||
bch2_fs_btree_iter_exit(c);
|
||||
bch2_fs_btree_cache_exit(c);
|
||||
bch2_fs_journal_exit(&c->journal);
|
||||
@ -511,8 +537,6 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
mempool_exit(&c->large_bkey_pool);
|
||||
mempool_exit(&c->btree_bounce_pool);
|
||||
bioset_exit(&c->btree_bio);
|
||||
mempool_exit(&c->btree_interior_update_pool);
|
||||
mempool_exit(&c->btree_reserve_pool);
|
||||
mempool_exit(&c->fill_iter);
|
||||
percpu_ref_exit(&c->writes);
|
||||
kfree(c->replicas.entries);
|
||||
@ -675,11 +699,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
|
||||
INIT_LIST_HEAD(&c->list);
|
||||
|
||||
INIT_LIST_HEAD(&c->btree_interior_update_list);
|
||||
INIT_LIST_HEAD(&c->btree_interior_updates_unwritten);
|
||||
mutex_init(&c->btree_reserve_cache_lock);
|
||||
mutex_init(&c->btree_interior_update_lock);
|
||||
|
||||
mutex_init(&c->usage_scratch_lock);
|
||||
|
||||
mutex_init(&c->bio_bounce_pages_lock);
|
||||
@ -752,10 +771,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
|
||||
percpu_ref_init(&c->writes, bch2_writes_disabled,
|
||||
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
|
||||
mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1,
|
||||
sizeof(struct btree_reserve)) ||
|
||||
mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
|
||||
sizeof(struct btree_update)) ||
|
||||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
|
||||
bioset_init(&c->btree_bio, 1,
|
||||
max(offsetof(struct btree_read_bio, bio),
|
||||
@ -771,6 +786,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
bch2_fs_replicas_init(c) ||
|
||||
bch2_fs_btree_cache_init(c) ||
|
||||
bch2_fs_btree_iter_init(c) ||
|
||||
bch2_fs_btree_interior_update_init(c) ||
|
||||
bch2_fs_io_init(c) ||
|
||||
bch2_fs_encryption_init(c) ||
|
||||
bch2_fs_compress_init(c) ||
|
||||
|
@ -166,6 +166,7 @@ read_attribute(journal_debug);
|
||||
read_attribute(journal_pins);
|
||||
read_attribute(btree_updates);
|
||||
read_attribute(dirty_btree_nodes);
|
||||
read_attribute(btree_transactions);
|
||||
|
||||
read_attribute(internal_uuid);
|
||||
|
||||
@ -401,6 +402,12 @@ SHOW(bch2_fs)
|
||||
|
||||
if (attr == &sysfs_dirty_btree_nodes)
|
||||
return bch2_dirty_btree_nodes_print(c, buf);
|
||||
if (attr == &sysfs_btree_transactions) {
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
|
||||
bch2_btree_trans_to_text(&out, c);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_compression_stats)
|
||||
return bch2_compression_stats(c, buf);
|
||||
@ -571,6 +578,7 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_journal_pins,
|
||||
&sysfs_btree_updates,
|
||||
&sysfs_dirty_btree_nodes,
|
||||
&sysfs_btree_transactions,
|
||||
|
||||
&sysfs_read_realloc_races,
|
||||
&sysfs_extent_migrate_done,
|
||||
|
12
linux/six.c
12
linux/six.c
@ -108,7 +108,8 @@ static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
if (!do_six_trylock_type(lock, type))
|
||||
return false;
|
||||
|
||||
six_acquire(&lock->dep_map, 1);
|
||||
if (type != SIX_LOCK_write)
|
||||
six_acquire(&lock->dep_map, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -130,7 +131,8 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
old.v + l[type].lock_val)) != old.v);
|
||||
|
||||
six_set_owner(lock, type, old);
|
||||
six_acquire(&lock->dep_map, 1);
|
||||
if (type != SIX_LOCK_write)
|
||||
six_acquire(&lock->dep_map, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -323,7 +325,8 @@ static void __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type t
|
||||
__always_inline
|
||||
static void __six_lock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
six_acquire(&lock->dep_map, 0);
|
||||
if (type != SIX_LOCK_write)
|
||||
six_acquire(&lock->dep_map, 0);
|
||||
|
||||
if (!do_six_trylock_type(lock, type))
|
||||
__six_lock_type_slowpath(lock, type);
|
||||
@ -382,7 +385,8 @@ static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
EBUG_ON(type == SIX_LOCK_write &&
|
||||
!(lock->state.v & __SIX_LOCK_HELD_intent));
|
||||
|
||||
six_release(&lock->dep_map);
|
||||
if (type != SIX_LOCK_write)
|
||||
six_release(&lock->dep_map);
|
||||
|
||||
if (type == SIX_LOCK_intent) {
|
||||
EBUG_ON(lock->owner != current);
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
static pthread_mutex_t wq_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_cond_t work_finished = PTHREAD_COND_INITIALIZER;
|
||||
static LIST_HEAD(wq_list);
|
||||
|
||||
struct workqueue_struct {
|
||||
@ -13,8 +14,6 @@ struct workqueue_struct {
|
||||
struct work_struct *current_work;
|
||||
struct list_head pending_work;
|
||||
|
||||
pthread_cond_t work_finished;
|
||||
|
||||
struct task_struct *worker;
|
||||
char name[24];
|
||||
};
|
||||
@ -23,6 +22,11 @@ enum {
|
||||
WORK_PENDING_BIT,
|
||||
};
|
||||
|
||||
static bool work_pending(struct work_struct *work)
|
||||
{
|
||||
return test_bit(WORK_PENDING_BIT, work_data_bits(work));
|
||||
}
|
||||
|
||||
static void clear_work_pending(struct work_struct *work)
|
||||
{
|
||||
clear_bit(WORK_PENDING_BIT, work_data_bits(work));
|
||||
@ -36,7 +40,7 @@ static bool set_work_pending(struct work_struct *work)
|
||||
static void __queue_work(struct workqueue_struct *wq,
|
||||
struct work_struct *work)
|
||||
{
|
||||
BUG_ON(!test_bit(WORK_PENDING_BIT, work_data_bits(work)));
|
||||
BUG_ON(!work_pending(work));
|
||||
BUG_ON(!list_empty(&work->entry));
|
||||
|
||||
list_add_tail(&work->entry, &wq->pending_work);
|
||||
@ -130,17 +134,39 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
static bool __flush_work(struct work_struct *work)
|
||||
static bool work_running(struct work_struct *work)
|
||||
{
|
||||
struct workqueue_struct *wq;
|
||||
bool ret = false;
|
||||
retry:
|
||||
|
||||
list_for_each_entry(wq, &wq_list, list)
|
||||
if (wq->current_work == work) {
|
||||
pthread_cond_wait(&wq->work_finished, &wq_lock);
|
||||
ret = true;
|
||||
goto retry;
|
||||
}
|
||||
if (wq->current_work == work)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool flush_work(struct work_struct *work)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
pthread_mutex_lock(&wq_lock);
|
||||
while (work_pending(work) || work_running(work)) {
|
||||
pthread_cond_wait(&work_finished, &wq_lock);
|
||||
ret = true;
|
||||
}
|
||||
pthread_mutex_unlock(&wq_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool __flush_work(struct work_struct *work)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
while (work_running(work)) {
|
||||
pthread_cond_wait(&work_finished, &wq_lock);
|
||||
ret = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -228,7 +254,7 @@ static int worker_thread(void *arg)
|
||||
continue;
|
||||
}
|
||||
|
||||
BUG_ON(!test_bit(WORK_PENDING_BIT, work_data_bits(work)));
|
||||
BUG_ON(!work_pending(work));
|
||||
list_del_init(&work->entry);
|
||||
clear_work_pending(work);
|
||||
|
||||
@ -236,7 +262,7 @@ static int worker_thread(void *arg)
|
||||
work->func(work);
|
||||
pthread_mutex_lock(&wq_lock);
|
||||
|
||||
pthread_cond_broadcast(&wq->work_finished);
|
||||
pthread_cond_broadcast(&work_finished);
|
||||
}
|
||||
pthread_mutex_unlock(&wq_lock);
|
||||
|
||||
@ -269,8 +295,6 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
|
||||
INIT_LIST_HEAD(&wq->list);
|
||||
INIT_LIST_HEAD(&wq->pending_work);
|
||||
|
||||
pthread_cond_init(&wq->work_finished, NULL);
|
||||
|
||||
va_start(args, max_active);
|
||||
vsnprintf(wq->name, sizeof(wq->name), fmt, args);
|
||||
va_end(args);
|
||||
|
Loading…
Reference in New Issue
Block a user