Update bcachefs sources to c9b4a210f9 fixup! bcachefs: Fixes for going RO

This commit is contained in:
Kent Overstreet 2020-06-03 16:21:35 -04:00
parent 90d54b3886
commit 1952c0790c
48 changed files with 1017 additions and 908 deletions

View File

@ -1 +1 @@
e1f6739c4a9fee1db7d94a5087a253041542cb62
c9b4a210f946889f56654dda24dd8ced3b1aac24

View File

@ -208,29 +208,25 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
get_alloc_field(a.v, &d, i));
}
int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_s_c k)
{
struct btree_trans trans;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
struct bch_dev *ca;
unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
bch2_btree_and_journal_iter_init(&iter, &trans, journal_keys,
BTREE_ID_ALLOC, POS_MIN);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
if (!level)
bch2_mark_key(c, k, 0, 0, NULL, 0,
BTREE_TRIGGER_ALLOC_READ|
BTREE_TRIGGER_NOATOMIC);
bch2_btree_and_journal_iter_advance(&iter);
}
return 0;
}
ret = bch2_trans_exit(&trans) ?: ret;
int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
{
struct bch_dev *ca;
unsigned i;
int ret = 0;
ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_ALLOC,
NULL, bch2_alloc_read_fn);
if (ret) {
bch_err(c, "error reading alloc info: %i", ret);
return ret;
@ -847,7 +843,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
struct bkey_s_c k;
bool invalidating_cached_data;
size_t b;
int ret;
int ret = 0;
BUG_ON(!ca->alloc_heap.used ||
!ca->alloc_heap.data[0].nr);
@ -861,11 +857,27 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
BUG_ON(!fifo_push(&ca->free_inc, b));
g = bucket(ca, b);
m = READ_ONCE(g->mark);
bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
spin_unlock(&c->freelist_lock);
percpu_up_read(&c->mark_lock);
invalidating_cached_data = m.cached_sectors != 0;
if (!invalidating_cached_data)
goto out;
/*
* If the read-only path is trying to shut down, we can't be generating
* new btree updates:
*/
if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
ret = 1;
goto out;
}
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
@ -919,7 +931,7 @@ retry:
flags);
if (ret == -EINTR)
goto retry;
out:
if (!ret) {
/* remove from alloc_heap: */
struct alloc_heap_entry e, *top = ca->alloc_heap.data;
@ -953,7 +965,7 @@ retry:
percpu_up_read(&c->mark_lock);
}
return ret;
return ret < 0 ? ret : 0;
}
static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
@ -1465,11 +1477,6 @@ again:
}
rcu_read_unlock();
if (c->btree_roots_dirty) {
bch2_journal_meta(&c->journal);
goto again;
}
return !nodes_unwritten &&
!bch2_btree_interior_updates_nr_pending(c);
}

View File

@ -477,8 +477,10 @@ struct bch_dev {
enum {
/* startup: */
BCH_FS_ALLOC_READ_DONE,
BCH_FS_ALLOC_CLEAN,
BCH_FS_ALLOCATOR_STARTED,
BCH_FS_ALLOCATOR_RUNNING,
BCH_FS_ALLOCATOR_STOPPING,
BCH_FS_INITIAL_GC_DONE,
BCH_FS_FSCK_DONE,
BCH_FS_STARTED,
@ -600,13 +602,10 @@ struct bch_fs {
struct bio_set btree_bio;
struct btree_root btree_roots[BTREE_ID_NR];
bool btree_roots_dirty;
struct mutex btree_root_lock;
struct btree_cache btree_cache;
mempool_t btree_reserve_pool;
/*
* Cache of allocated btree nodes - if we allocate a btree node and
* don't use it, if we free it that space can't be reused until going
@ -624,6 +623,12 @@ struct bch_fs {
struct mutex btree_interior_update_lock;
struct closure_waitlist btree_interior_update_wait;
struct workqueue_struct *btree_interior_update_worker;
struct work_struct btree_interior_update_work;
/* btree_iter.c: */
struct mutex btree_trans_lock;
struct list_head btree_trans_list;
mempool_t btree_iters_pool;
struct workqueue_struct *wq;

View File

@ -1262,6 +1262,8 @@ LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60);
LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61);
LE64_BITMASK(BCH_SB_REFLINK, struct bch_sb, flags[0], 61, 62);
/* 61-64 unused */
LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);

View File

@ -176,13 +176,17 @@ void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
{
pr_buf(out, "u64s %u type %s ", k->u64s,
bch2_bkey_types[k->type]);
if (k) {
pr_buf(out, "u64s %u type %s ", k->u64s,
bch2_bkey_types[k->type]);
bch2_bpos_to_text(out, k->p);
bch2_bpos_to_text(out, k->p);
pr_buf(out, " snap %u len %u ver %llu",
k->p.snapshot, k->size, k->version.lo);
pr_buf(out, " snap %u len %u ver %llu",
k->p.snapshot, k->size, k->version.lo);
} else {
pr_buf(out, "(null)");
}
}
void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
@ -198,8 +202,11 @@ void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
bch2_bkey_to_text(out, k.k);
pr_buf(out, ": ");
bch2_val_to_text(out, c, k);
if (k.k) {
pr_buf(out, ": ");
bch2_val_to_text(out, c, k);
}
}
void bch2_bkey_swab_val(struct bkey_s k)

View File

@ -553,7 +553,6 @@ out_unlock:
list_del_init(&b->list);
mutex_unlock(&bc->lock);
memalloc_nofs_restore(flags);
out:
b->flags = 0;
b->written = 0;
@ -566,6 +565,7 @@ out:
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
start_time);
memalloc_nofs_restore(flags);
return b;
err:
/* Try to cannibalize another cached btree node: */
@ -581,6 +581,7 @@ err:
}
mutex_unlock(&bc->lock);
memalloc_nofs_restore(flags);
return ERR_PTR(-ENOMEM);
}
@ -849,6 +850,18 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
if (!parent)
return NULL;
/*
* There's a corner case where a btree_iter might have a node locked
* that is just outside its current pos - when
* bch2_btree_iter_set_pos_same_leaf() gets to the end of the node.
*
* But the lock ordering checks in __bch2_btree_node_lock() go off of
* iter->pos, not the node's key: so if the iterator is marked as
* needing to be traversed, we risk deadlock if we don't bail out here:
*/
if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
return ERR_PTR(-EINTR);
if (!bch2_btree_node_relock(iter, level + 1)) {
ret = ERR_PTR(-EINTR);
goto out;

View File

@ -464,6 +464,7 @@ static void bch2_mark_superblocks(struct bch_fs *c)
mutex_unlock(&c->sb_lock);
}
#if 0
/* Also see bch2_pending_btree_node_free_insert_done() */
static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
{
@ -481,6 +482,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
mutex_unlock(&c->btree_interior_update_lock);
}
#endif
static void bch2_mark_allocator_buckets(struct bch_fs *c)
{
@ -579,8 +581,10 @@ static int bch2_gc_done(struct bch_fs *c,
#define copy_bucket_field(_f) \
if (dst->b[b].mark._f != src->b[b].mark._f) { \
if (verify) \
fsck_err(c, "dev %u bucket %zu has wrong " #_f \
fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f \
": got %u, should be %u", i, b, \
dst->b[b].mark.gen, \
bch2_data_types[dst->b[b].mark.data_type],\
dst->b[b].mark._f, src->b[b].mark._f); \
dst->b[b]._mark._f = src->b[b].mark._f; \
}
@ -797,6 +801,10 @@ int bch2_gc(struct bch_fs *c, struct journal_keys *journal_keys,
trace_gc_start(c);
down_write(&c->gc_lock);
/* flush interior btree updates: */
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
again:
ret = bch2_gc_start(c, metadata_only);
if (ret)
@ -808,7 +816,9 @@ again:
if (ret)
goto out;
#if 0
bch2_mark_pending_btree_node_frees(c);
#endif
bch2_mark_allocator_buckets(c);
c->gc_count++;
@ -1033,6 +1043,8 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
btree_node_reset_sib_u64s(n);
bch2_btree_build_aux_trees(n);
bch2_btree_update_add_new_node(as, n);
six_unlock_write(&n->lock);
bch2_btree_node_write(c, n, SIX_LOCK_intent);
@ -1081,7 +1093,7 @@ next:
bch2_btree_iter_node_replace(iter, new_nodes[0]);
for (i = 0; i < nr_new_nodes; i++)
bch2_open_buckets_put(c, &new_nodes[i]->ob);
bch2_btree_update_get_open_buckets(as, new_nodes[i]);
/* Free the old nodes and update our sliding window */
for (i = 0; i < nr_old_nodes; i++) {

View File

@ -631,14 +631,14 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
struct btree *b, struct bset *i,
unsigned offset, int write)
{
pr_buf(out, "error validating btree node %s"
"at btree %u level %u/%u\n"
"pos %llu:%llu node offset %u",
pr_buf(out, "error validating btree node %sat btree %u level %u/%u\n"
"pos ",
write ? "before write " : "",
b->btree_id, b->level,
c->btree_roots[b->btree_id].level,
b->key.k.p.inode, b->key.k.p.offset,
b->written);
c->btree_roots[b->btree_id].level);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
pr_buf(out, " node offset %u", b->written);
if (i)
pr_buf(out, " bset u64s %u", le16_to_cpu(i->u64s));
}
@ -944,7 +944,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
btree_err_on(b->data->keys.seq != bp->seq,
BTREE_ERR_MUST_RETRY, c, b, NULL,
"got wrong btree node");
"got wrong btree node (seq %llx want %llx)",
b->data->keys.seq, bp->seq);
}
while (b->written < c->opts.btree_node_size) {

View File

@ -205,8 +205,9 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
if (!linked->nodes_locked)
continue;
/* * Must lock btree nodes in key order: */
if (__btree_iter_cmp(iter->btree_id, pos, linked) < 0)
/* Must lock btree nodes in key order: */
if ((cmp_int(iter->btree_id, linked->btree_id) ?:
bkey_cmp(pos, linked->pos)) < 0)
ret = false;
/*
@ -1320,6 +1321,16 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
btree_iter_advance_to_pos(iter, l, -1);
/*
* XXX:
* keeping a node locked that's outside (even just outside) iter->pos
* breaks __bch2_btree_node_lock(). This seems to only affect
* bch2_btree_node_get_sibling so for now it's fixed there, but we
* should try to get rid of this corner case.
*
* (this behaviour is currently needed for BTREE_INSERT_NOUNLOCK)
*/
if (bch2_btree_node_iter_end(&l->iter) &&
btree_iter_pos_after_node(iter, l->b))
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
@ -1912,7 +1923,7 @@ static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
struct btree_iter *iter;
trans_for_each_iter(trans, iter) {
pr_err("iter: btree %s pos %llu:%llu%s%s%s %pf",
pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
bch2_btree_ids[iter->btree_id],
iter->pos.inode,
iter->pos.offset,
@ -2153,6 +2164,9 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
trans->nr_updates2 = 0;
trans->mem_top = 0;
trans->extra_journal_entries = NULL;
trans->extra_journal_entry_u64s = 0;
if (trans->fs_usage_deltas) {
trans->fs_usage_deltas->used = 0;
memset(&trans->fs_usage_deltas->memset_start, 0,
@ -2189,12 +2203,25 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
if (expected_mem_bytes)
bch2_trans_preload_mem(trans, expected_mem_bytes);
#ifdef CONFIG_BCACHEFS_DEBUG
trans->pid = current->pid;
mutex_lock(&c->btree_trans_lock);
list_add(&trans->list, &c->btree_trans_list);
mutex_unlock(&c->btree_trans_lock);
#endif
}
int bch2_trans_exit(struct btree_trans *trans)
{
bch2_trans_unlock(trans);
#ifdef CONFIG_BCACHEFS_DEBUG
mutex_lock(&trans->c->btree_trans_lock);
list_del(&trans->list);
mutex_unlock(&trans->c->btree_trans_lock);
#endif
kfree(trans->fs_usage_deltas);
kfree(trans->mem);
if (trans->used_mempool)
@ -2207,6 +2234,51 @@ int bch2_trans_exit(struct btree_trans *trans)
return trans->error ? -EIO : 0;
}
void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
{
#ifdef CONFIG_BCACHEFS_DEBUG
struct btree_trans *trans;
struct btree_iter *iter;
struct btree *b;
unsigned l;
mutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) {
pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip);
trans_for_each_iter(trans, iter) {
if (!iter->nodes_locked)
continue;
pr_buf(out, " iter %s:", bch2_btree_ids[iter->btree_id]);
bch2_bpos_to_text(out, iter->pos);
pr_buf(out, "\n");
for (l = 0; l < BTREE_MAX_DEPTH; l++) {
if (btree_node_locked(iter, l)) {
b = iter->l[l].b;
pr_buf(out, " %p l=%u %s ",
b, l, btree_node_intent_locked(iter, l) ? "i" : "r");
bch2_bpos_to_text(out, b->key.k.p);
pr_buf(out, "\n");
}
}
}
b = READ_ONCE(trans->locking);
if (b) {
pr_buf(out, " locking %px l=%u %s:",
b, b->level,
bch2_btree_ids[b->btree_id]);
bch2_bpos_to_text(out, b->key.k.p);
pr_buf(out, "\n");
}
}
mutex_unlock(&c->btree_trans_lock);
#endif
}
void bch2_fs_btree_iter_exit(struct bch_fs *c)
{
mempool_exit(&c->btree_iters_pool);
@ -2216,6 +2288,9 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
{
unsigned nr = BTREE_ITER_MAX;
INIT_LIST_HEAD(&c->btree_trans_list);
mutex_init(&c->btree_trans_lock);
return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
sizeof(struct btree_iter) * nr +
sizeof(struct btree_insert_entry) * nr +

View File

@ -172,17 +172,10 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
static inline int __btree_iter_cmp(enum btree_id id,
struct bpos pos,
const struct btree_iter *r)
{
return cmp_int(id, r->btree_id) ?: bkey_cmp(pos, r->pos);
}
static inline int btree_iter_cmp(const struct btree_iter *l,
const struct btree_iter *r)
{
return __btree_iter_cmp(l->btree_id, l->pos, r);
return cmp_int(l->btree_id, r->btree_id) ?: bkey_cmp(l->pos, r->pos);
}
/*
@ -303,6 +296,8 @@ void *bch2_trans_kmalloc(struct btree_trans *, size_t);
void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
int bch2_trans_exit(struct btree_trans *);
void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *);
void bch2_fs_btree_iter_exit(struct bch_fs *);
int bch2_fs_btree_iter_init(struct bch_fs *);

View File

@ -182,11 +182,21 @@ static inline bool btree_node_lock(struct btree *b, struct bpos pos,
struct btree_iter *iter,
enum six_lock_type type)
{
EBUG_ON(level >= BTREE_MAX_DEPTH);
bool ret;
return likely(six_trylock_type(&b->lock, type)) ||
EBUG_ON(level >= BTREE_MAX_DEPTH);
#ifdef CONFIG_BCACHEFS_DEBUG
iter->trans->locking = b;
#endif
ret = likely(six_trylock_type(&b->lock, type)) ||
btree_node_lock_increment(iter, b, level, type) ||
__bch2_btree_node_lock(b, pos, level, iter, type);
#ifdef CONFIG_BCACHEFS_DEBUG
iter->trans->locking = NULL;
#endif
return ret;
}
bool __bch2_btree_node_relock(struct btree_iter *, unsigned);

View File

@ -281,6 +281,11 @@ struct btree_insert_entry {
struct btree_trans {
struct bch_fs *c;
#ifdef CONFIG_BCACHEFS_DEBUG
struct list_head list;
struct btree *locking;
pid_t pid;
#endif
unsigned long ip;
u64 iters_linked;
@ -305,6 +310,10 @@ struct btree_trans {
struct btree_insert_entry *updates2;
/* update path: */
struct jset_entry *extra_journal_entries;
unsigned extra_journal_entry_u64s;
struct journal_entry_pin *journal_pin;
struct journal_res journal_res;
struct journal_preres journal_preres;
u64 *journal_seq;

File diff suppressed because it is too large Load Diff

View File

@ -6,34 +6,13 @@
#include "btree_locking.h"
#include "btree_update.h"
struct btree_reserve {
struct disk_reservation disk_res;
unsigned nr;
struct btree *b[BTREE_RESERVE_MAX];
};
void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *);
bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *,
struct bkey_format *);
/* Btree node freeing/allocation: */
#define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
/*
* Tracks a btree node that has been (or is about to be) freed in memory, but
* has _not_ yet been freed on disk (because the write that makes the new
* node(s) visible and frees the old hasn't completed yet)
*/
struct pending_btree_node_free {
bool index_update_done;
__le64 seq;
enum btree_id btree_id;
unsigned level;
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
};
#define BTREE_UPDATE_JOURNAL_RES \
((BKEY_BTREE_PTR_U64s_MAX + 1) * (BTREE_MAX_DEPTH - 1) * 2)
#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
/*
* Tracks an in progress split/rewrite of a btree node and the update to the
@ -72,9 +51,8 @@ struct btree_update {
unsigned nodes_written:1;
enum btree_id btree_id;
u8 level;
struct btree_reserve *reserve;
struct disk_reservation disk_res;
struct journal_preres journal_preres;
/*
@ -96,17 +74,28 @@ struct btree_update {
*/
struct journal_entry_pin journal;
/*
* Nodes being freed:
* Protected by c->btree_node_pending_free_lock
*/
struct pending_btree_node_free pending[BTREE_MAX_DEPTH + GC_MERGE_NODES];
unsigned nr_pending;
/* Preallocated nodes we reserve when we start the update: */
struct btree *prealloc_nodes[BTREE_UPDATE_NODES_MAX];
unsigned nr_prealloc_nodes;
/* Nodes being freed: */
struct keylist old_keys;
u64 _old_keys[BTREE_UPDATE_NODES_MAX *
BKEY_BTREE_PTR_VAL_U64s_MAX];
/* Nodes being added: */
struct keylist new_keys;
u64 _new_keys[BTREE_UPDATE_NODES_MAX *
BKEY_BTREE_PTR_VAL_U64s_MAX];
/* New nodes, that will be made reachable by this update: */
struct btree *new_nodes[BTREE_MAX_DEPTH * 2 + GC_MERGE_NODES];
struct btree *new_nodes[BTREE_UPDATE_NODES_MAX];
unsigned nr_new_nodes;
u8 open_buckets[BTREE_UPDATE_NODES_MAX *
BCH_REPLICAS_MAX];
u8 nr_open_buckets;
unsigned journal_u64s;
u64 journal_entries[BTREE_UPDATE_JOURNAL_RES];
@ -120,14 +109,12 @@ struct btree_update {
u64 inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3];
};
#define for_each_pending_btree_node_free(c, as, p) \
list_for_each_entry(as, &c->btree_interior_update_list, list) \
for (p = as->pending; p < as->pending + as->nr_pending; p++)
void bch2_btree_node_free_inmem(struct bch_fs *, struct btree *,
struct btree_iter *);
void bch2_btree_node_free_never_inserted(struct bch_fs *, struct btree *);
void bch2_btree_update_get_open_buckets(struct btree_update *, struct btree *);
struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
struct btree *,
struct bkey_format);
@ -139,6 +126,7 @@ bch2_btree_update_start(struct btree_trans *, enum btree_id, unsigned,
void bch2_btree_interior_update_will_free_node(struct btree_update *,
struct btree *);
void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
void bch2_btree_insert_node(struct btree_update *, struct btree *,
struct btree_iter *, struct keylist *,
@ -333,4 +321,11 @@ ssize_t bch2_btree_updates_print(struct bch_fs *, char *);
size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *);
void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *);
struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
struct jset_entry *, struct jset_entry *);
void bch2_fs_btree_interior_update_exit(struct bch_fs *);
int bch2_fs_btree_interior_update_init(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */

View File

@ -413,6 +413,15 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
goto err;
}
if (unlikely(trans->extra_journal_entry_u64s)) {
memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
trans->extra_journal_entries,
trans->extra_journal_entry_u64s);
trans->journal_res.offset += trans->extra_journal_entry_u64s;
trans->journal_res.u64s -= trans->extra_journal_entry_u64s;
}
/*
* Not allowed to fail after we've gotten our journal reservation - we
* have to use it:
@ -511,6 +520,10 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b,
i->iter);
if (!ret && trans->journal_pin)
bch2_journal_pin_add(&trans->c->journal, trans->journal_res.seq,
trans->journal_pin, NULL);
/*
* Drop journal reservation after dropping write locks, since dropping
* the journal reservation may kick off a journal write:
@ -800,7 +813,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
trans->journal_u64s = 0;
trans->journal_u64s = trans->extra_journal_entry_u64s;
trans->journal_preres_u64s = 0;
if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&

View File

@ -778,29 +778,31 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
})
static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, enum bch_data_type type,
size_t b, enum bch_data_type data_type,
unsigned sectors, bool gc)
{
struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark old, new;
bool overflow;
BUG_ON(type != BCH_DATA_SB &&
type != BCH_DATA_JOURNAL);
BUG_ON(data_type != BCH_DATA_SB &&
data_type != BCH_DATA_JOURNAL);
old = bucket_cmpxchg(g, new, ({
new.data_type = type;
new.data_type = data_type;
overflow = checked_add(new.dirty_sectors, sectors);
}));
bch2_fs_inconsistent_on(old.data_type &&
old.data_type != type, c,
old.data_type != data_type, c,
"different types of data in same bucket: %s, %s",
bch2_data_types[old.data_type],
bch2_data_types[type]);
bch2_data_types[data_type]);
bch2_fs_inconsistent_on(overflow, c,
"bucket sector count overflow: %u + %u > U16_MAX",
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > U16_MAX",
ca->dev_idx, b, new.gen,
bch2_data_types[old.data_type ?: data_type],
old.dirty_sectors, sectors);
if (c)
@ -916,58 +918,117 @@ static void bucket_set_stripe(struct bch_fs *c,
}
}
static bool bch2_mark_pointer(struct bch_fs *c,
struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k,
struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type ptr_data_type,
u8 bucket_gen, u8 *bucket_data_type,
u16 *dirty_sectors, u16 *cached_sectors)
{
u16 *dst_sectors = !p.ptr.cached
? dirty_sectors
: cached_sectors;
u16 orig_sectors = *dst_sectors;
char buf[200];
if (gen_after(p.ptr.gen, bucket_gen)) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
"while marking %s",
p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
bucket_gen,
bch2_data_types[*bucket_data_type ?: ptr_data_type],
p.ptr.gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
return -EIO;
}
if (gen_cmp(bucket_gen, p.ptr.gen) >= 96U) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
"while marking %s",
p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
bucket_gen,
bch2_data_types[*bucket_data_type ?: ptr_data_type],
p.ptr.gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
return -EIO;
}
if (bucket_gen != p.ptr.gen && !p.ptr.cached) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n"
"while marking %s",
p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
bucket_gen,
bch2_data_types[*bucket_data_type ?: ptr_data_type],
p.ptr.gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
return -EIO;
}
if (bucket_gen != p.ptr.gen)
return 1;
if (*bucket_data_type && *bucket_data_type != ptr_data_type) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
"while marking %s",
p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
bucket_gen,
bch2_data_types[*bucket_data_type],
bch2_data_types[ptr_data_type],
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
return -EIO;
}
if (checked_add(*dst_sectors, sectors)) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
"while marking %s",
p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
bucket_gen,
bch2_data_types[*bucket_data_type ?: ptr_data_type],
orig_sectors, sectors,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
return -EIO;
}
*bucket_data_type = *dirty_sectors || *cached_sectors
? ptr_data_type : 0;
return 0;
}
static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k,
struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
{
bool gc = flags & BTREE_TRIGGER_GC;
struct bucket_mark old, new;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc);
bool overflow;
u8 bucket_data_type;
u64 v;
int ret;
v = atomic64_read(&g->_mark.v);
do {
new.v.counter = old.v.counter = v;
bucket_data_type = new.data_type;
/*
* Check this after reading bucket mark to guard against
* the allocator invalidating a bucket after we've already
* checked the gen
*/
if (gen_after(p.ptr.gen, new.gen)) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"pointer gen in the future");
return true;
}
ret = __mark_pointer(c, k, p, sectors, data_type, new.gen,
&bucket_data_type,
&new.dirty_sectors,
&new.cached_sectors);
if (ret)
return ret;
if (new.gen != p.ptr.gen) {
/* XXX write repair code for this */
if (!p.ptr.cached &&
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"stale dirty pointer");
return true;
}
new.data_type = bucket_data_type;
if (!p.ptr.cached)
overflow = checked_add(new.dirty_sectors, sectors);
else
overflow = checked_add(new.cached_sectors, sectors);
if (!new.dirty_sectors &&
!new.cached_sectors) {
new.data_type = 0;
if (journal_seq) {
new.journal_seq_valid = 1;
new.journal_seq = journal_seq;
}
} else {
new.data_type = data_type;
if (journal_seq) {
new.journal_seq_valid = 1;
new.journal_seq = journal_seq;
}
if (flags & BTREE_TRIGGER_NOATOMIC) {
@ -978,25 +1039,11 @@ static bool bch2_mark_pointer(struct bch_fs *c,
old.v.counter,
new.v.counter)) != old.v.counter);
if (old.data_type && old.data_type != data_type)
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u different types of data in same bucket: %s, %s",
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
new.gen,
bch2_data_types[old.data_type],
bch2_data_types[data_type]);
bch2_fs_inconsistent_on(overflow, c,
"bucket sector count overflow: %u + %lli > U16_MAX",
!p.ptr.cached
? old.dirty_sectors
: old.cached_sectors, sectors);
bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
BUG_ON(!gc && bucket_became_unavailable(old, new));
return false;
return 0;
}
static int bch2_mark_stripe_ptr(struct bch_fs *c,
@ -1060,6 +1107,7 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
struct extent_ptr_decoded p;
struct bch_replicas_padded r;
s64 dirty_sectors = 0;
bool stale;
int ret;
r.e.data_type = data_type;
@ -1072,8 +1120,13 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
s64 disk_sectors = data_type == BCH_DATA_BTREE
? sectors
: ptr_disk_sectors_delta(p, offset, sectors, flags);
bool stale = bch2_mark_pointer(c, p, disk_sectors, data_type,
fs_usage, journal_seq, flags);
ret = bch2_mark_pointer(c, k, p, disk_sectors, data_type,
fs_usage, journal_seq, flags);
if (ret < 0)
return ret;
stale = ret > 0;
if (p.ptr.cached) {
if (!stale)
@ -1175,7 +1228,7 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
return 0;
}
int bch2_mark_key_locked(struct bch_fs *c,
static int bch2_mark_key_locked(struct bch_fs *c,
struct bkey_s_c k,
unsigned offset, s64 sectors,
struct bch_fs_usage *fs_usage,
@ -1434,29 +1487,30 @@ static int trans_get_key(struct btree_trans *trans,
}
static int bch2_trans_mark_pointer(struct btree_trans *trans,
struct extent_ptr_decoded p,
struct bkey_s_c k, struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type)
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_s_c k_a;
struct bkey_alloc_unpacked u;
struct bkey_i_alloc *a;
u16 *dst_sectors, orig_sectors;
int ret;
ret = trans_get_key(trans, BTREE_ID_ALLOC,
POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)),
&iter, &k);
&iter, &k_a);
if (ret < 0)
return ret;
if (!ret && unlikely(!test_bit(BCH_FS_ALLOC_WRITTEN, &c->flags))) {
if (k_a.k->type != KEY_TYPE_alloc ||
(!ret && unlikely(!test_bit(BCH_FS_ALLOC_WRITTEN, &c->flags)))) {
/*
* During journal replay, and if gc repairs alloc info at
* runtime, the alloc info in the btree might not be up to date
* yet - so, trust the in memory mark:
* yet - so, trust the in memory mark - unless we're already
* updating that key:
*/
struct bucket *g;
struct bucket_mark m;
@ -1467,52 +1521,13 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
u = alloc_mem_to_key(g, m);
percpu_up_read(&c->mark_lock);
} else {
/*
* Unless we're already updating that key:
*/
if (k.k->type != KEY_TYPE_alloc) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"pointer to nonexistent bucket %llu:%llu",
iter->pos.inode, iter->pos.offset);
ret = -1;
goto out;
}
u = bch2_alloc_unpack(k);
u = bch2_alloc_unpack(k_a);
}
if (gen_after(u.gen, p.ptr.gen)) {
ret = 1;
ret = __mark_pointer(c, k, p, sectors, data_type, u.gen, &u.data_type,
&u.dirty_sectors, &u.cached_sectors);
if (ret)
goto out;
}
if (u.data_type && u.data_type != data_type) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s",
iter->pos.inode, iter->pos.offset,
u.gen,
bch2_data_types[u.data_type],
bch2_data_types[data_type]);
ret = -1;
goto out;
}
dst_sectors = !p.ptr.cached
? &u.dirty_sectors
: &u.cached_sectors;
orig_sectors = *dst_sectors;
if (checked_add(*dst_sectors, sectors)) {
bch2_fs_inconsistent(c,
"bucket sector count overflow: %u + %lli > U16_MAX",
orig_sectors, sectors);
/* return an error indicating that we need full fsck */
ret = -EIO;
goto out;
}
u.data_type = u.dirty_sectors || u.cached_sectors
? data_type : 0;
a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
ret = PTR_ERR_OR_ZERO(a);
@ -1597,7 +1612,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
? sectors
: ptr_disk_sectors_delta(p, offset, sectors, flags);
ret = bch2_trans_mark_pointer(trans, p, disk_sectors,
ret = bch2_trans_mark_pointer(trans, k, p, disk_sectors,
data_type);
if (ret < 0)
return ret;

View File

@ -259,8 +259,6 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
size_t, enum bch_data_type, unsigned,
struct gc_pos, unsigned);
int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, unsigned, s64,
struct bch_fs_usage *, u64, unsigned);
int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned, s64,
struct bch_fs_usage *, u64, unsigned);
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,

View File

@ -162,7 +162,7 @@ ssize_t bch2_io_timers_show(struct io_clock *clock, char *buf)
now = atomic_long_read(&clock->now);
for (i = 0; i < clock->timers.used; i++)
pr_buf(&out, "%pf:\t%li\n",
pr_buf(&out, "%ps:\t%li\n",
clock->timers.data[i]->fn,
clock->timers.data[i]->expire - now);
spin_unlock(&clock->timer_lock);

View File

@ -7,6 +7,7 @@
#include "super-io.h"
#include <linux/lz4.h>
#include <linux/sched/mm.h>
#include <linux/zlib.h>
#include <linux/zstd.h>
@ -63,7 +64,7 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
struct bbuf ret;
struct bio_vec bv;
struct bvec_iter iter;
unsigned nr_pages = 0;
unsigned nr_pages = 0, flags;
struct page *stack_pages[16];
struct page **pages = NULL;
void *data;
@ -103,7 +104,10 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
__bio_for_each_segment(bv, bio, iter, start)
pages[nr_pages++] = bv.bv_page;
flags = memalloc_nofs_save();
data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
memalloc_nofs_restore(flags);
if (pages != stack_pages)
kfree(pages);
@ -603,7 +607,7 @@ have_compressed:
}
if (!mempool_initialized(&c->decompress_workspace)) {
ret = mempool_init_kmalloc_pool(
ret = mempool_init_kvpmalloc_pool(
&c->decompress_workspace,
1, decompress_workspace_size);
if (ret)

View File

@ -104,7 +104,7 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
bch_scnmemcpy(out, d.v->d_name,
bch2_dirent_name_bytes(d));
pr_buf(out, " -> %llu", d.v->d_inum);
pr_buf(out, " -> %llu type %u", d.v->d_inum, d.v->d_type);
}
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,

View File

@ -1273,38 +1273,28 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
return ret;
}
static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_s_c k)
{
int ret = 0;
if (k.k->type == KEY_TYPE_stripe)
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
bch2_mark_key(c, k, 0, 0, NULL, 0,
BTREE_TRIGGER_ALLOC_READ|
BTREE_TRIGGER_NOATOMIC);
return ret;
}
int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
{
struct btree_trans trans;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
int ret;
ret = bch2_fs_ec_start(c);
int ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_EC,
NULL, bch2_stripes_read_fn);
if (ret)
return ret;
bch2_trans_init(&trans, c, 0, 0);
bch2_btree_and_journal_iter_init(&iter, &trans, journal_keys,
BTREE_ID_EC, POS_MIN);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
bch2_mark_key(c, k, 0, 0, NULL, 0,
BTREE_TRIGGER_ALLOC_READ|
BTREE_TRIGGER_NOATOMIC);
bch2_btree_and_journal_iter_advance(&iter);
}
ret = bch2_trans_exit(&trans) ?: ret;
if (ret) {
bch_err(c, "error reading stripes: %i", ret);
return ret;
}
return 0;
return ret;
}
int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
@ -1343,11 +1333,6 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
return 0;
}
int bch2_fs_ec_start(struct bch_fs *c)
{
return bch2_ec_mem_alloc(c, false);
}
void bch2_fs_ec_exit(struct bch_fs *c)
{
struct ec_stripe_head *h;

View File

@ -157,8 +157,6 @@ int bch2_stripes_write(struct bch_fs *, unsigned, bool *);
int bch2_ec_mem_alloc(struct bch_fs *, bool);
int bch2_fs_ec_start(struct bch_fs *);
void bch2_fs_ec_exit(struct bch_fs *);
int bch2_fs_ec_init(struct bch_fs *);

View File

@ -85,7 +85,7 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
if (s->fmt == fmt)
goto found;
s = kzalloc(sizeof(*s), GFP_KERNEL);
s = kzalloc(sizeof(*s), GFP_NOFS);
if (!s) {
if (!c->fsck_alloc_err)
bch_err(c, "kmalloc err, cannot ratelimit fsck errs");

View File

@ -102,6 +102,7 @@ struct fsck_err_state {
#define FSCK_CAN_IGNORE (1 << 1)
#define FSCK_NEED_FSCK (1 << 2)
__printf(3, 4) __cold
enum fsck_err_ret bch2_fsck_err(struct bch_fs *,
unsigned, const char *, ...);
void bch2_flush_fsck_errs(struct bch_fs *);

View File

@ -220,7 +220,7 @@ void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
{
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
pr_buf(out, "seq %llu sectors %u written %u min_key ",
pr_buf(out, "seq %llx sectors %u written %u min_key ",
le64_to_cpu(bp.v->seq),
le16_to_cpu(bp.v->sectors),
le16_to_cpu(bp.v->sectors_written));

View File

@ -845,7 +845,7 @@ retry:
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(trans,
&offset_into_extent, sk.k);
&offset_into_extent, &sk);
if (ret)
break;
@ -2844,6 +2844,9 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
u64 aligned_len;
loff_t ret = 0;
if (!c->opts.reflink)
return -EOPNOTSUPP;
if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
return -EINVAL;

View File

@ -889,7 +889,7 @@ retry:
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(&trans,
&offset_into_extent, cur.k);
&offset_into_extent, &cur);
if (ret)
break;

View File

@ -1169,7 +1169,7 @@ static int check_inode_nlink(struct bch_fs *c,
}
if (!S_ISDIR(u->bi_mode) && link->dir_count) {
need_fsck_err(c, "non directory with subdirectories",
need_fsck_err(c, "non directory with subdirectories (inum %llu)",
u->bi_inum);
return 0;
}

View File

@ -1641,7 +1641,7 @@ retry:
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(&trans,
&offset_into_extent, sk.k);
&offset_into_extent, &sk);
if (ret)
break;
@ -1943,14 +1943,14 @@ static void bch2_read_endio(struct bio *bio)
int __bch2_read_indirect_extent(struct btree_trans *trans,
unsigned *offset_into_extent,
struct bkey_i *orig_k)
struct bkey_on_stack *orig_k)
{
struct btree_iter *iter;
struct bkey_s_c k;
u64 reflink_offset;
int ret;
reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k)->v.idx) +
reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
*offset_into_extent;
iter = bch2_trans_get_iter(trans, BTREE_ID_REFLINK,
@ -1973,7 +1973,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
}
*offset_into_extent = iter->pos.offset - bkey_start_offset(k.k);
bkey_reassemble(orig_k, k);
bkey_on_stack_reassemble(orig_k, trans->c, k);
err:
bch2_trans_iter_put(trans, iter);
return ret;
@ -2273,7 +2273,7 @@ retry:
k = bkey_i_to_s_c(sk.k);
ret = bch2_read_indirect_extent(&trans,
&offset_into_extent, sk.k);
&offset_into_extent, &sk);
if (ret)
goto err;

View File

@ -3,6 +3,7 @@
#define _BCACHEFS_IO_H
#include "checksum.h"
#include "bkey_on_stack.h"
#include "io_types.h"
#define to_wbio(_bio) \
@ -110,13 +111,13 @@ struct cache_promote_op;
struct extent_ptr_decoded;
int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
struct bkey_i *);
struct bkey_on_stack *);
static inline int bch2_read_indirect_extent(struct btree_trans *trans,
unsigned *offset_into_extent,
struct bkey_i *k)
struct bkey_on_stack *k)
{
return k->k.type == KEY_TYPE_reflink_p
return k->k->k.type == KEY_TYPE_reflink_p
? __bch2_read_indirect_extent(trans, offset_into_extent, k)
: 0;
}

View File

@ -959,15 +959,12 @@ void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
void bch2_fs_journal_stop(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
bch2_journal_flush_all_pins(j);
wait_event(j->wait, journal_entry_close(j));
/* do we need to write another journal entry? */
if (test_bit(JOURNAL_NOT_EMPTY, &j->flags) ||
c->btree_roots_dirty)
if (test_bit(JOURNAL_NOT_EMPTY, &j->flags))
bch2_journal_meta(j);
journal_quiesce(j);
@ -1238,14 +1235,14 @@ ssize_t bch2_journal_print_pins(struct journal *j, char *buf)
i, atomic_read(&pin_list->count));
list_for_each_entry(pin, &pin_list->list, list)
pr_buf(&out, "\t%p %pf\n",
pr_buf(&out, "\t%px %ps\n",
pin, pin->flush);
if (!list_empty(&pin_list->flushed))
pr_buf(&out, "flushed:\n");
list_for_each_entry(pin, &pin_list->flushed, list)
pr_buf(&out, "\t%p %pf\n",
pr_buf(&out, "\t%px %ps\n",
pin, pin->flush);
}
spin_unlock(&j->lock);

View File

@ -199,27 +199,39 @@ bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
return entry;
}
static inline struct jset_entry *
journal_res_entry(struct journal *j, struct journal_res *res)
{
return vstruct_idx(j->buf[res->idx].data, res->offset);
}
static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type,
enum btree_id id, unsigned level,
const void *data, unsigned u64s)
{
memset(entry, 0, sizeof(*entry));
entry->u64s = cpu_to_le16(u64s);
entry->type = type;
entry->btree_id = id;
entry->level = level;
memcpy_u64s_small(entry->_data, data, u64s);
return jset_u64s(u64s);
}
static inline void bch2_journal_add_entry(struct journal *j, struct journal_res *res,
unsigned type, enum btree_id id,
unsigned level,
const void *data, unsigned u64s)
{
struct journal_buf *buf = &j->buf[res->idx];
struct jset_entry *entry = vstruct_idx(buf->data, res->offset);
unsigned actual = jset_u64s(u64s);
unsigned actual = journal_entry_set(journal_res_entry(j, res),
type, id, level, data, u64s);
EBUG_ON(!res->ref);
EBUG_ON(actual > res->u64s);
res->offset += actual;
res->u64s -= actual;
memset(entry, 0, sizeof(*entry));
entry->u64s = cpu_to_le16(u64s);
entry->type = type;
entry->btree_id = id;
entry->level = level;
memcpy_u64s(entry->_data, data, u64s);
}
static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *res,

View File

@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
#include "btree_io.h"
#include "btree_update_interior.h"
#include "buckets.h"
#include "checksum.h"
#include "error.h"
@ -993,8 +994,23 @@ void bch2_journal_write(struct closure *cl)
j->write_start_time = local_clock();
start = vstruct_last(jset);
end = bch2_journal_super_entries_add_common(c, start,
/*
* New btree roots are set by journalling them; when the journal entry
* gets written we have to propagate them to c->btree_roots
*
* But, every journal entry we write has to contain all the btree roots
* (at least for now); so after we copy btree roots to c->btree_roots we
* have to get any missing btree roots and add them to this journal
* entry:
*/
bch2_journal_entries_to_btree_roots(c, jset);
start = end = vstruct_last(jset);
end = bch2_btree_roots_to_journal_entries(c, jset->start, end);
end = bch2_journal_super_entries_add_common(c, end,
le64_to_cpu(jset->seq));
u64s = (u64 *) end - (u64 *) start;
BUG_ON(u64s > j->entry_u64s_reserved);

View File

@ -330,7 +330,7 @@ static void bch2_journal_pin_add_locked(struct journal *j, u64 seq,
__journal_pin_drop(j, pin);
BUG_ON(!atomic_read(&pin_list->count));
BUG_ON(!atomic_read(&pin_list->count) && seq == journal_last_seq(j));
atomic_inc(&pin_list->count);
pin->seq = seq;
@ -413,10 +413,12 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
return ret;
}
static void journal_flush_pins(struct journal *j, u64 seq_to_flush,
/* returns true if we did work */
static bool journal_flush_pins(struct journal *j, u64 seq_to_flush,
unsigned min_nr)
{
struct journal_entry_pin *pin;
bool ret = false;
u64 seq;
lockdep_assert_held(&j->reclaim_lock);
@ -431,7 +433,10 @@ static void journal_flush_pins(struct journal *j, u64 seq_to_flush,
BUG_ON(j->flush_in_progress != pin);
j->flush_in_progress = NULL;
wake_up(&j->pin_flush_wait);
ret = true;
}
return ret;
}
/**
@ -523,7 +528,8 @@ void bch2_journal_reclaim_work(struct work_struct *work)
mutex_unlock(&j->reclaim_lock);
}
static int journal_flush_done(struct journal *j, u64 seq_to_flush)
static int journal_flush_done(struct journal *j, u64 seq_to_flush,
bool *did_work)
{
int ret;
@ -533,7 +539,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush)
mutex_lock(&j->reclaim_lock);
journal_flush_pins(j, seq_to_flush, 0);
*did_work = journal_flush_pins(j, seq_to_flush, 0);
spin_lock(&j->lock);
/*
@ -551,12 +557,17 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush)
return ret;
}
void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
{
if (!test_bit(JOURNAL_STARTED, &j->flags))
return;
bool did_work = false;
closure_wait_event(&j->async_wait, journal_flush_done(j, seq_to_flush));
if (!test_bit(JOURNAL_STARTED, &j->flags))
return false;
closure_wait_event(&j->async_wait,
journal_flush_done(j, seq_to_flush, &did_work));
return did_work;
}
int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)

View File

@ -38,7 +38,7 @@ static inline void bch2_journal_pin_add(struct journal *j, u64 seq,
struct journal_entry_pin *pin,
journal_pin_flush_fn flush_fn)
{
if (unlikely(!journal_pin_active(pin)))
if (unlikely(!journal_pin_active(pin) || pin->seq > seq))
__bch2_journal_pin_add(j, seq, pin, flush_fn);
}
@ -53,11 +53,11 @@ void bch2_journal_do_discards(struct journal *);
void bch2_journal_reclaim(struct journal *);
void bch2_journal_reclaim_work(struct work_struct *);
void bch2_journal_flush_pins(struct journal *, u64);
bool bch2_journal_flush_pins(struct journal *, u64);
static inline void bch2_journal_flush_all_pins(struct journal *j)
static inline bool bch2_journal_flush_all_pins(struct journal *j)
{
bch2_journal_flush_pins(j, U64_MAX);
return bch2_journal_flush_pins(j, U64_MAX);
}
int bch2_journal_flush_device_pins(struct journal *, int);

View File

@ -6,7 +6,7 @@
int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
size_t nr_inline_u64s, size_t new_u64s)
{
size_t oldsize = bch_keylist_u64s(l);
size_t oldsize = bch2_keylist_u64s(l);
size_t newsize = oldsize + new_u64s;
u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p;
u64 *new_keys;
@ -52,7 +52,7 @@ void bch2_keylist_pop_front(struct keylist *l)
memmove_u64s_down(l->keys,
bkey_next(l->keys),
bch_keylist_u64s(l));
bch2_keylist_u64s(l));
}
#ifdef CONFIG_BCACHEFS_DEBUG

View File

@ -36,14 +36,14 @@ static inline bool bch2_keylist_empty(struct keylist *l)
return l->top == l->keys;
}
static inline size_t bch_keylist_u64s(struct keylist *l)
static inline size_t bch2_keylist_u64s(struct keylist *l)
{
return l->top_p - l->keys_p;
}
static inline size_t bch2_keylist_bytes(struct keylist *l)
{
return bch_keylist_u64s(l) * sizeof(u64);
return bch2_keylist_u64s(l) * sizeof(u64);
}
static inline struct bkey_i *bch2_keylist_front(struct keylist *l)

View File

@ -151,15 +151,8 @@ retry:
}
/* flush relevant btree updates */
while (1) {
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c) ||
c->btree_roots_dirty);
if (c->btree_roots_dirty)
bch2_journal_meta(&c->journal);
if (!bch2_btree_interior_updates_nr_pending(c))
break;
}
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
ret = 0;
err:

View File

@ -775,14 +775,8 @@ int bch2_data_job(struct bch_fs *c,
ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
while (1) {
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c) ||
c->btree_roots_dirty);
if (!bch2_btree_interior_updates_nr_pending(c))
break;
bch2_journal_meta(&c->journal);
}
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
ret = bch2_replicas_gc2(c) ?: ret;

View File

@ -207,6 +207,11 @@ enum opt_type {
OPT_BOOL(), \
BCH_SB_PRJQUOTA, false, \
NULL, "Enable project quotas") \
x(reflink, u8, \
OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH_SB_REFLINK, true, \
NULL, "Enable reflink support") \
x(degraded, u8, \
OPT_MOUNT, \
OPT_BOOL(), \

View File

@ -191,6 +191,78 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *i
b->btree_id, b->level, b->data->min_key);
}
/* Walk btree, overlaying keys from the journal: */
static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
struct journal_keys *journal_keys,
enum btree_id btree_id,
btree_walk_node_fn node_fn,
btree_walk_key_fn key_fn)
{
struct btree_and_journal_iter iter;
struct bkey_s_c k;
int ret = 0;
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
ret = key_fn(c, btree_id, b->level, k);
if (ret)
break;
if (b->level) {
struct btree *child;
BKEY_PADDED(k) tmp;
bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k);
bch2_btree_and_journal_iter_advance(&iter);
if (b->level > 0) {
child = bch2_btree_node_get_noiter(c, &tmp.k,
b->btree_id, b->level - 1);
ret = PTR_ERR_OR_ZERO(child);
if (ret)
break;
ret = (node_fn ? node_fn(c, b) : 0) ?:
bch2_btree_and_journal_walk_recurse(c, child,
journal_keys, btree_id, node_fn, key_fn);
six_unlock_read(&child->lock);
if (ret)
break;
}
} else {
bch2_btree_and_journal_iter_advance(&iter);
}
}
return ret;
}
int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_keys,
enum btree_id btree_id,
btree_walk_node_fn node_fn,
btree_walk_key_fn key_fn)
{
struct btree *b = c->btree_roots[btree_id].b;
int ret = 0;
if (btree_node_fake(b))
return 0;
six_lock_read(&b->lock);
ret = (node_fn ? node_fn(c, b) : 0) ?:
bch2_btree_and_journal_walk_recurse(c, b, journal_keys, btree_id,
node_fn, key_fn) ?:
key_fn(c, btree_id, b->level + 1, bkey_i_to_s_c(&b->key));
six_unlock_read(&b->lock);
return ret;
}
/* sort and dedup all keys in the journal: */
void bch2_journal_entries_free(struct list_head *list)
@ -691,6 +763,7 @@ static int verify_superblock_clean(struct bch_fs *c,
"superblock read clock doesn't match journal after clean shutdown");
for (i = 0; i < BTREE_ID_NR; i++) {
char buf1[200], buf2[200];
struct bkey_i *k1, *k2;
unsigned l1 = 0, l2 = 0;
@ -706,7 +779,11 @@ static int verify_superblock_clean(struct bch_fs *c,
k1->k.u64s != k2->k.u64s ||
memcmp(k1, k2, bkey_bytes(k1)) ||
l1 != l2, c,
"superblock btree root doesn't match journal after clean shutdown");
"superblock btree root %u doesn't match journal after clean shutdown\n"
"sb: l=%u %s\n"
"journal: l=%u %s\n", i,
l1, (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(k1)), buf1),
l2, (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(k2)), buf2));
}
fsck_err:
return ret;
@ -1077,6 +1154,15 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_mark_dev_superblock(c, ca, 0);
mutex_unlock(&c->sb_lock);
mutex_lock(&c->sb_lock);
c->disk_sb.sb->version = c->disk_sb.sb->version_min =
le16_to_cpu(bcachefs_metadata_version_current);
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
@ -1135,11 +1221,6 @@ int bch2_fs_initialize(struct bch_fs *c)
goto err;
mutex_lock(&c->sb_lock);
c->disk_sb.sb->version = c->disk_sb.sb->version_min =
le16_to_cpu(bcachefs_metadata_version_current);
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);

View File

@ -44,6 +44,13 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
struct journal_keys *,
struct btree *);
typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b);
typedef int (*btree_walk_key_fn)(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_s_c k);
int bch2_btree_and_journal_walk(struct bch_fs *, struct journal_keys *, enum btree_id,
btree_walk_node_fn, btree_walk_key_fn);
void bch2_journal_keys_free(struct journal_keys *);
void bch2_journal_entries_free(struct list_head *);

View File

@ -167,6 +167,9 @@ s64 bch2_remap_range(struct bch_fs *c,
u64 src_done, dst_done;
int ret = 0, ret2 = 0;
if (!c->opts.reflink)
return -EOPNOTSUPP;
if (!percpu_ref_tryget(&c->writes))
return -EROFS;

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "btree_update_interior.h"
#include "buckets.h"
#include "checksum.h"
#include "disk_groups.h"
@ -955,7 +956,6 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
mutex_lock(&c->sb_lock);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA);
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates;
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled;
@ -989,27 +989,8 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
struct jset_entry *entry,
u64 journal_seq)
{
struct btree_root *r;
unsigned i;
mutex_lock(&c->btree_root_lock);
for (r = c->btree_roots;
r < c->btree_roots + BTREE_ID_NR;
r++)
if (r->alive) {
entry_init_u64s(entry, r->key.u64s + 1);
entry->btree_id = r - c->btree_roots;
entry->level = r->level;
entry->type = BCH_JSET_ENTRY_btree_root;
bkey_copy(&entry->start[0], &r->key);
entry = vstruct_next(entry);
}
c->btree_roots_dirty = false;
mutex_unlock(&c->btree_root_lock);
percpu_down_write(&c->mark_lock);
if (!journal_seq) {
@ -1110,6 +1091,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
entry = sb_clean->start;
entry = bch2_journal_super_entries_add_common(c, entry, 0);
entry = bch2_btree_roots_to_journal_entries(c, entry, entry);
BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
memset(entry, 0,

View File

@ -207,7 +207,7 @@ int bch2_congested(void *data, int bdi_bits)
static void __bch2_fs_read_only(struct bch_fs *c)
{
struct bch_dev *ca;
bool wrote;
bool wrote = false;
unsigned i, clean_passes = 0;
int ret;
@ -224,48 +224,68 @@ static void __bch2_fs_read_only(struct bch_fs *c)
*/
bch2_journal_flush_all_pins(&c->journal);
/*
* If the allocator threads didn't all start up, the btree updates to
* write out alloc info aren't going to work:
*/
if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
goto allocator_not_running;
goto nowrote_alloc;
bch_verbose(c, "writing alloc info");
/*
* This should normally just be writing the bucket read/write clocks:
*/
ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
bch_verbose(c, "writing alloc info complete");
if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
if (ret)
goto nowrote_alloc;
bch_verbose(c, "flushing journal and stopping allocators");
bch2_journal_flush_all_pins(&c->journal);
set_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
do {
wrote = false;
clean_passes++;
ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
if (ret)
break;
for_each_member_device(ca, c, i)
bch2_dev_allocator_quiesce(c, ca);
bch2_journal_flush_all_pins(&c->journal);
if (bch2_journal_flush_all_pins(&c->journal))
clean_passes = 0;
/*
* We need to explicitly wait on btree interior updates to complete
* before stopping the journal, flushing all journal pins isn't
* sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree
* interior updates have to drop their journal pin before they're
* fully complete:
* In flight interior btree updates will generate more journal
* updates and btree updates (alloc btree):
*/
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
if (bch2_btree_interior_updates_nr_pending(c)) {
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
clean_passes = 0;
}
flush_work(&c->btree_interior_update_work);
clean_passes = wrote ? 0 : clean_passes + 1;
if (bch2_journal_flush_all_pins(&c->journal))
clean_passes = 0;
} while (clean_passes < 2);
allocator_not_running:
bch_verbose(c, "flushing journal and stopping allocators complete");
set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
nowrote_alloc:
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
flush_work(&c->btree_interior_update_work);
for_each_member_device(ca, c, i)
bch2_dev_allocator_stop(ca);
clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
bch2_fs_journal_stop(&c->journal);
/* XXX: mark super that alloc info is persistent */
/*
* the journal kicks off btree writes via reclaim - wait for in flight
* writes after stopping journal:
@ -338,8 +358,11 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_ERROR, &c->flags) &&
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
test_bit(BCH_FS_STARTED, &c->flags) &&
!c->opts.norecovery)
test_bit(BCH_FS_ALLOC_CLEAN, &c->flags) &&
!c->opts.norecovery) {
bch_verbose(c, "marking filesystem clean");
bch2_fs_mark_clean(c);
}
clear_bit(BCH_FS_RW, &c->flags);
}
@ -426,6 +449,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
if (ret)
goto err;
clear_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
for_each_rw_member(ca, c, i)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
@ -494,6 +519,7 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_fs_ec_exit(c);
bch2_fs_encryption_exit(c);
bch2_fs_io_exit(c);
bch2_fs_btree_interior_update_exit(c);
bch2_fs_btree_iter_exit(c);
bch2_fs_btree_cache_exit(c);
bch2_fs_journal_exit(&c->journal);
@ -511,8 +537,6 @@ static void bch2_fs_free(struct bch_fs *c)
mempool_exit(&c->large_bkey_pool);
mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio);
mempool_exit(&c->btree_interior_update_pool);
mempool_exit(&c->btree_reserve_pool);
mempool_exit(&c->fill_iter);
percpu_ref_exit(&c->writes);
kfree(c->replicas.entries);
@ -675,11 +699,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
INIT_LIST_HEAD(&c->list);
INIT_LIST_HEAD(&c->btree_interior_update_list);
INIT_LIST_HEAD(&c->btree_interior_updates_unwritten);
mutex_init(&c->btree_reserve_cache_lock);
mutex_init(&c->btree_interior_update_lock);
mutex_init(&c->usage_scratch_lock);
mutex_init(&c->bio_bounce_pages_lock);
@ -752,10 +771,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
percpu_ref_init(&c->writes, bch2_writes_disabled,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1,
sizeof(struct btree_reserve)) ||
mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
sizeof(struct btree_update)) ||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
bioset_init(&c->btree_bio, 1,
max(offsetof(struct btree_read_bio, bio),
@ -771,6 +786,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_replicas_init(c) ||
bch2_fs_btree_cache_init(c) ||
bch2_fs_btree_iter_init(c) ||
bch2_fs_btree_interior_update_init(c) ||
bch2_fs_io_init(c) ||
bch2_fs_encryption_init(c) ||
bch2_fs_compress_init(c) ||

View File

@ -166,6 +166,7 @@ read_attribute(journal_debug);
read_attribute(journal_pins);
read_attribute(btree_updates);
read_attribute(dirty_btree_nodes);
read_attribute(btree_transactions);
read_attribute(internal_uuid);
@ -401,6 +402,12 @@ SHOW(bch2_fs)
if (attr == &sysfs_dirty_btree_nodes)
return bch2_dirty_btree_nodes_print(c, buf);
if (attr == &sysfs_btree_transactions) {
struct printbuf out = _PBUF(buf, PAGE_SIZE);
bch2_btree_trans_to_text(&out, c);
return out.pos - buf;
}
if (attr == &sysfs_compression_stats)
return bch2_compression_stats(c, buf);
@ -571,6 +578,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_journal_pins,
&sysfs_btree_updates,
&sysfs_dirty_btree_nodes,
&sysfs_btree_transactions,
&sysfs_read_realloc_races,
&sysfs_extent_migrate_done,

View File

@ -108,7 +108,8 @@ static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
if (!do_six_trylock_type(lock, type))
return false;
six_acquire(&lock->dep_map, 1);
if (type != SIX_LOCK_write)
six_acquire(&lock->dep_map, 1);
return true;
}
@ -130,7 +131,8 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
old.v + l[type].lock_val)) != old.v);
six_set_owner(lock, type, old);
six_acquire(&lock->dep_map, 1);
if (type != SIX_LOCK_write)
six_acquire(&lock->dep_map, 1);
return true;
}
@ -323,7 +325,8 @@ static void __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type t
__always_inline
static void __six_lock_type(struct six_lock *lock, enum six_lock_type type)
{
six_acquire(&lock->dep_map, 0);
if (type != SIX_LOCK_write)
six_acquire(&lock->dep_map, 0);
if (!do_six_trylock_type(lock, type))
__six_lock_type_slowpath(lock, type);
@ -382,7 +385,8 @@ static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
EBUG_ON(type == SIX_LOCK_write &&
!(lock->state.v & __SIX_LOCK_HELD_intent));
six_release(&lock->dep_map);
if (type != SIX_LOCK_write)
six_release(&lock->dep_map);
if (type == SIX_LOCK_intent) {
EBUG_ON(lock->owner != current);

View File

@ -5,6 +5,7 @@
#include <linux/workqueue.h>
static pthread_mutex_t wq_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t work_finished = PTHREAD_COND_INITIALIZER;
static LIST_HEAD(wq_list);
struct workqueue_struct {
@ -13,8 +14,6 @@ struct workqueue_struct {
struct work_struct *current_work;
struct list_head pending_work;
pthread_cond_t work_finished;
struct task_struct *worker;
char name[24];
};
@ -23,6 +22,11 @@ enum {
WORK_PENDING_BIT,
};
static bool work_pending(struct work_struct *work)
{
return test_bit(WORK_PENDING_BIT, work_data_bits(work));
}
static void clear_work_pending(struct work_struct *work)
{
clear_bit(WORK_PENDING_BIT, work_data_bits(work));
@ -36,7 +40,7 @@ static bool set_work_pending(struct work_struct *work)
static void __queue_work(struct workqueue_struct *wq,
struct work_struct *work)
{
BUG_ON(!test_bit(WORK_PENDING_BIT, work_data_bits(work)));
BUG_ON(!work_pending(work));
BUG_ON(!list_empty(&work->entry));
list_add_tail(&work->entry, &wq->pending_work);
@ -130,17 +134,39 @@ retry:
goto retry;
}
static bool __flush_work(struct work_struct *work)
static bool work_running(struct work_struct *work)
{
struct workqueue_struct *wq;
bool ret = false;
retry:
list_for_each_entry(wq, &wq_list, list)
if (wq->current_work == work) {
pthread_cond_wait(&wq->work_finished, &wq_lock);
ret = true;
goto retry;
}
if (wq->current_work == work)
return true;
return false;
}
bool flush_work(struct work_struct *work)
{
bool ret = false;
pthread_mutex_lock(&wq_lock);
while (work_pending(work) || work_running(work)) {
pthread_cond_wait(&work_finished, &wq_lock);
ret = true;
}
pthread_mutex_unlock(&wq_lock);
return ret;
}
static bool __flush_work(struct work_struct *work)
{
bool ret = false;
while (work_running(work)) {
pthread_cond_wait(&work_finished, &wq_lock);
ret = true;
}
return ret;
}
@ -228,7 +254,7 @@ static int worker_thread(void *arg)
continue;
}
BUG_ON(!test_bit(WORK_PENDING_BIT, work_data_bits(work)));
BUG_ON(!work_pending(work));
list_del_init(&work->entry);
clear_work_pending(work);
@ -236,7 +262,7 @@ static int worker_thread(void *arg)
work->func(work);
pthread_mutex_lock(&wq_lock);
pthread_cond_broadcast(&wq->work_finished);
pthread_cond_broadcast(&work_finished);
}
pthread_mutex_unlock(&wq_lock);
@ -269,8 +295,6 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
INIT_LIST_HEAD(&wq->list);
INIT_LIST_HEAD(&wq->pending_work);
pthread_cond_init(&wq->work_finished, NULL);
va_start(args, max_active);
vsnprintf(wq->name, sizeof(wq->name), fmt, args);
va_end(args);