mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to 3c41353bc1 bcachefs: Fix bch2_verify_keylist_sorted
This commit is contained in:
parent
30f72f75f5
commit
f87850496e
@ -1 +1 @@
|
||||
fe72e70682cd2430a099c08c3135253675030d28
|
||||
3c41353bc185e0a0da4c6f63b1203575c41a2da1
|
||||
|
@ -259,7 +259,11 @@ do { \
|
||||
BCH_DEBUG_PARAM(btree_gc_rewrite_disabled, \
|
||||
"Disables rewriting of btree nodes during mark and sweep")\
|
||||
BCH_DEBUG_PARAM(btree_shrinker_disabled, \
|
||||
"Disables the shrinker callback for the btree node cache")
|
||||
"Disables the shrinker callback for the btree node cache")\
|
||||
BCH_DEBUG_PARAM(verify_btree_ondisk, \
|
||||
"Reread btree nodes at various points to verify the " \
|
||||
"mergesort in the read path against modifications " \
|
||||
"done in memory")
|
||||
|
||||
/* Parameters that should only be compiled in in debug mode: */
|
||||
#define BCH_DEBUG_PARAMS_DEBUG() \
|
||||
@ -273,10 +277,6 @@ do { \
|
||||
"information) when iterating over keys") \
|
||||
BCH_DEBUG_PARAM(debug_check_btree_accounting, \
|
||||
"Verify btree accounting for keys within a node") \
|
||||
BCH_DEBUG_PARAM(verify_btree_ondisk, \
|
||||
"Reread btree nodes at various points to verify the " \
|
||||
"mergesort in the read path against modifications " \
|
||||
"done in memory") \
|
||||
BCH_DEBUG_PARAM(journal_seq_verify, \
|
||||
"Store the journal sequence number in the version " \
|
||||
"number of every btree key, and verify that btree " \
|
||||
@ -545,6 +545,8 @@ struct btree_iter_buf {
|
||||
struct btree_iter *iter;
|
||||
};
|
||||
|
||||
#define REPLICAS_DELTA_LIST_MAX (1U << 16)
|
||||
|
||||
struct bch_fs {
|
||||
struct closure cl;
|
||||
|
||||
@ -572,6 +574,7 @@ struct bch_fs {
|
||||
struct bch_replicas_cpu replicas;
|
||||
struct bch_replicas_cpu replicas_gc;
|
||||
struct mutex replicas_gc_lock;
|
||||
mempool_t replicas_delta_pool;
|
||||
|
||||
struct journal_entry_res btree_root_journal_res;
|
||||
struct journal_entry_res replicas_journal_res;
|
||||
@ -644,6 +647,7 @@ struct bch_fs {
|
||||
struct mutex btree_trans_lock;
|
||||
struct list_head btree_trans_list;
|
||||
mempool_t btree_iters_pool;
|
||||
mempool_t btree_trans_mem_pool;
|
||||
struct btree_iter_buf __percpu *btree_iters_bufs;
|
||||
|
||||
struct srcu_struct btree_trans_barrier;
|
||||
@ -813,11 +817,9 @@ struct bch_fs {
|
||||
/* DEBUG JUNK */
|
||||
struct dentry *debug;
|
||||
struct btree_debug btree_debug[BTREE_ID_NR];
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
struct btree *verify_data;
|
||||
struct btree_node *verify_ondisk;
|
||||
struct mutex verify_lock;
|
||||
#endif
|
||||
|
||||
u64 *unused_inode_hints;
|
||||
unsigned inode_shard_bits;
|
||||
|
@ -100,7 +100,6 @@ const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k)
|
||||
|
||||
static unsigned bch2_key_types_allowed[] = {
|
||||
[BKEY_TYPE_extents] =
|
||||
(1U << KEY_TYPE_discard)|
|
||||
(1U << KEY_TYPE_error)|
|
||||
(1U << KEY_TYPE_extent)|
|
||||
(1U << KEY_TYPE_reservation)|
|
||||
|
@ -33,21 +33,21 @@ static inline unsigned btree_cache_can_free(struct btree_cache *bc)
|
||||
return max_t(int, 0, bc->used - bc->reserve);
|
||||
}
|
||||
|
||||
static void __btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
EBUG_ON(btree_node_write_in_flight(b));
|
||||
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
b->data = NULL;
|
||||
vfree(b->aux_data);
|
||||
b->aux_data = NULL;
|
||||
}
|
||||
|
||||
static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
|
||||
__btree_node_data_free(c, b);
|
||||
EBUG_ON(btree_node_write_in_flight(b));
|
||||
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
b->data = NULL;
|
||||
#ifdef __KERNEL__
|
||||
vfree(b->aux_data);
|
||||
#else
|
||||
munmap(b->aux_data, btree_aux_data_bytes(b));
|
||||
#endif
|
||||
b->aux_data = NULL;
|
||||
|
||||
bc->used--;
|
||||
list_move(&b->list, &bc->freed);
|
||||
}
|
||||
@ -75,8 +75,13 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
b->data = kvpmalloc(btree_bytes(c), gfp);
|
||||
if (!b->data)
|
||||
return -ENOMEM;
|
||||
|
||||
#ifdef __KERNEL__
|
||||
b->aux_data = vmalloc_exec(btree_aux_data_bytes(b), gfp);
|
||||
#else
|
||||
b->aux_data = mmap(NULL, btree_aux_data_bytes(b),
|
||||
PROT_READ|PROT_WRITE|PROT_EXEC,
|
||||
MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
|
||||
#endif
|
||||
if (!b->aux_data) {
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
b->data = NULL;
|
||||
@ -100,7 +105,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c)
|
||||
return b;
|
||||
}
|
||||
|
||||
static struct btree *btree_node_mem_alloc(struct bch_fs *c)
|
||||
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b = __btree_node_mem_alloc(c);
|
||||
@ -360,12 +365,10 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
flags = memalloc_nofs_save();
|
||||
mutex_lock(&bc->lock);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
if (c->verify_data)
|
||||
list_move(&c->verify_data->list, &bc->live);
|
||||
|
||||
kvpfree(c->verify_ondisk, btree_bytes(c));
|
||||
#endif
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
if (c->btree_roots[i].b)
|
||||
@ -419,31 +422,15 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
bch2_recalc_btree_reserve(c);
|
||||
|
||||
for (i = 0; i < bc->reserve; i++)
|
||||
if (!btree_node_mem_alloc(c)) {
|
||||
if (!__bch2_btree_node_mem_alloc(c)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
list_splice_init(&bc->live, &bc->freeable);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
mutex_init(&c->verify_lock);
|
||||
|
||||
c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
|
||||
if (!c->verify_ondisk) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
c->verify_data = btree_node_mem_alloc(c);
|
||||
if (!c->verify_data) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
list_del_init(&c->verify_data->list);
|
||||
#endif
|
||||
|
||||
bc->shrink.count_objects = bch2_btree_cache_count;
|
||||
bc->shrink.scan_objects = bch2_btree_cache_scan;
|
||||
bc->shrink.seeks = 4;
|
||||
@ -703,6 +690,41 @@ static int lock_node_check_fn(struct six_lock *lock, void *p)
|
||||
return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1;
|
||||
}
|
||||
|
||||
static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
char buf1[100], buf2[100], buf3[100], buf4[100];
|
||||
|
||||
if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags))
|
||||
return;
|
||||
|
||||
bch2_bpos_to_text(&PBUF(buf1), b->key.k.type == KEY_TYPE_btree_ptr_v2
|
||||
? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key
|
||||
: POS_MIN);
|
||||
bch2_bpos_to_text(&PBUF(buf2), b->data->min_key);
|
||||
|
||||
bch2_bpos_to_text(&PBUF(buf3), b->key.k.p);
|
||||
bch2_bpos_to_text(&PBUF(buf4), b->data->max_key);
|
||||
bch2_fs_inconsistent(c, "btree node header doesn't match ptr\n"
|
||||
"btree: ptr %u header %llu\n"
|
||||
"level: ptr %u header %llu\n"
|
||||
"min ptr %s node header %s\n"
|
||||
"max ptr %s node header %s",
|
||||
b->c.btree_id, BTREE_NODE_ID(b->data),
|
||||
b->c.level, BTREE_NODE_LEVEL(b->data),
|
||||
buf1, buf2, buf3, buf4);
|
||||
}
|
||||
|
||||
static inline void btree_check_header(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
if (b->c.btree_id != BTREE_NODE_ID(b->data) ||
|
||||
b->c.level != BTREE_NODE_LEVEL(b->data) ||
|
||||
bpos_cmp(b->data->max_key, b->key.k.p) ||
|
||||
(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
bpos_cmp(b->data->min_key,
|
||||
bkey_i_to_btree_ptr_v2(&b->key)->v.min_key)))
|
||||
btree_bad_header(c, b);
|
||||
}
|
||||
|
||||
/**
|
||||
* bch_btree_node_get - find a btree node in the cache and lock it, reading it
|
||||
* in from disk if necessary.
|
||||
@ -833,10 +855,7 @@ lock_node:
|
||||
|
||||
EBUG_ON(b->c.btree_id != iter->btree_id);
|
||||
EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
|
||||
EBUG_ON(bpos_cmp(b->data->max_key, k->k.p));
|
||||
EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
bpos_cmp(b->data->min_key,
|
||||
bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
|
||||
btree_check_header(c, b);
|
||||
|
||||
return b;
|
||||
}
|
||||
@ -916,10 +935,7 @@ lock_node:
|
||||
|
||||
EBUG_ON(b->c.btree_id != btree_id);
|
||||
EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
|
||||
EBUG_ON(bpos_cmp(b->data->max_key, k->k.p));
|
||||
EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
bpos_cmp(b->data->min_key,
|
||||
bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
|
||||
btree_check_header(c, b);
|
||||
out:
|
||||
bch2_btree_cache_cannibalize_unlock(c);
|
||||
return b;
|
||||
|
@ -17,6 +17,7 @@ int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
|
||||
void bch2_btree_cache_cannibalize_unlock(struct bch_fs *);
|
||||
int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
|
||||
|
||||
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
|
||||
struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
|
||||
|
||||
struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
|
||||
|
@ -330,6 +330,10 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id,
|
||||
BUG_ON(bch2_journal_seq_verify &&
|
||||
k->k->version.lo > journal_cur_seq(&c->journal));
|
||||
|
||||
ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c,
|
||||
"key version number higher than recorded: %llu > %llu",
|
||||
k->k->version.lo,
|
||||
@ -346,8 +350,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id,
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
|
||||
}
|
||||
|
||||
ptrs = bch2_bkey_ptrs_c(*k);
|
||||
|
@ -1340,6 +1340,13 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void btree_write_submit(struct work_struct *work)
|
||||
{
|
||||
struct btree_write_bio *wbio = container_of(work, struct btree_write_bio, work);
|
||||
|
||||
bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &wbio->key);
|
||||
}
|
||||
|
||||
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct btree_write_bio *wbio;
|
||||
@ -1347,7 +1354,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
|
||||
struct bset *i;
|
||||
struct btree_node *bn = NULL;
|
||||
struct btree_node_entry *bne = NULL;
|
||||
struct bkey_buf k;
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct sort_iter sort_iter;
|
||||
struct nonce nonce;
|
||||
@ -1358,8 +1364,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
|
||||
bool validate_before_checksum = false;
|
||||
void *data;
|
||||
|
||||
bch2_bkey_buf_init(&k);
|
||||
|
||||
if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
|
||||
return;
|
||||
|
||||
@ -1536,6 +1540,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
|
||||
wbio_init(&wbio->wbio.bio);
|
||||
wbio->data = data;
|
||||
wbio->bytes = bytes;
|
||||
wbio->wbio.c = c;
|
||||
wbio->wbio.used_mempool = used_mempool;
|
||||
wbio->wbio.bio.bi_opf = REQ_OP_WRITE|REQ_META;
|
||||
wbio->wbio.bio.bi_end_io = btree_node_write_endio;
|
||||
@ -1558,9 +1563,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
|
||||
* just make all btree node writes FUA to keep things sane.
|
||||
*/
|
||||
|
||||
bch2_bkey_buf_copy(&k, c, &b->key);
|
||||
bkey_copy(&wbio->key, &b->key);
|
||||
|
||||
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(k.k)), ptr)
|
||||
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&wbio->key)), ptr)
|
||||
ptr->offset += b->written;
|
||||
|
||||
b->written += sectors_to_write;
|
||||
@ -1568,9 +1573,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
|
||||
atomic64_inc(&c->btree_writes_nr);
|
||||
atomic64_add(sectors_to_write, &c->btree_writes_sectors);
|
||||
|
||||
/* XXX: submitting IO with btree locks held: */
|
||||
bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, k.k);
|
||||
bch2_bkey_buf_exit(&k, c);
|
||||
INIT_WORK(&wbio->work, btree_write_submit);
|
||||
schedule_work(&wbio->work);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
|
@ -42,6 +42,7 @@ struct btree_read_bio {
|
||||
|
||||
struct btree_write_bio {
|
||||
struct work_struct work;
|
||||
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
|
||||
void *data;
|
||||
unsigned bytes;
|
||||
struct bch_write_bio wbio;
|
||||
|
@ -2145,7 +2145,16 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
if (new_top > trans->mem_bytes) {
|
||||
size_t old_bytes = trans->mem_bytes;
|
||||
size_t new_bytes = roundup_pow_of_two(new_top);
|
||||
void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
|
||||
void *new_mem;
|
||||
|
||||
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
|
||||
|
||||
new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
|
||||
if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
|
||||
new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
|
||||
new_bytes = BTREE_TRANS_MEM_MAX;
|
||||
kfree(trans->mem);
|
||||
}
|
||||
|
||||
if (!new_mem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@ -2249,6 +2258,11 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
|
||||
if (expected_mem_bytes) {
|
||||
trans->mem_bytes = roundup_pow_of_two(expected_mem_bytes);
|
||||
trans->mem = kmalloc(trans->mem_bytes, GFP_KERNEL|__GFP_NOFAIL);
|
||||
|
||||
if (!unlikely(trans->mem)) {
|
||||
trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
|
||||
trans->mem_bytes = BTREE_TRANS_MEM_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
@ -2290,7 +2304,18 @@ int bch2_trans_exit(struct btree_trans *trans)
|
||||
|
||||
bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
|
||||
|
||||
if (trans->fs_usage_deltas) {
|
||||
if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) ==
|
||||
REPLICAS_DELTA_LIST_MAX)
|
||||
mempool_free(trans->fs_usage_deltas,
|
||||
&trans->c->replicas_delta_pool);
|
||||
else
|
||||
kfree(trans->fs_usage_deltas);
|
||||
}
|
||||
|
||||
if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
|
||||
mempool_free(trans->mem, &trans->c->btree_trans_mem_pool);
|
||||
else
|
||||
kfree(trans->mem);
|
||||
|
||||
#ifdef __KERNEL__
|
||||
@ -2299,6 +2324,7 @@ int bch2_trans_exit(struct btree_trans *trans)
|
||||
*/
|
||||
trans->iters = this_cpu_xchg(c->btree_iters_bufs->iter, trans->iters);
|
||||
#endif
|
||||
|
||||
if (trans->iters)
|
||||
mempool_free(trans->iters, &trans->c->btree_iters_pool);
|
||||
|
||||
@ -2392,6 +2418,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
|
||||
void bch2_fs_btree_iter_exit(struct bch_fs *c)
|
||||
{
|
||||
mempool_exit(&c->btree_trans_mem_pool);
|
||||
mempool_exit(&c->btree_iters_pool);
|
||||
cleanup_srcu_struct(&c->btree_trans_barrier);
|
||||
}
|
||||
@ -2407,5 +2434,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
|
||||
mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
|
||||
sizeof(struct btree_iter) * nr +
|
||||
sizeof(struct btree_insert_entry) * nr +
|
||||
sizeof(struct btree_insert_entry) * nr);
|
||||
sizeof(struct btree_insert_entry) * nr) ?:
|
||||
mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
|
||||
BTREE_TRANS_MEM_MAX);
|
||||
}
|
||||
|
@ -218,8 +218,14 @@ static int btree_key_cache_fill(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (k.k->u64s > ck->u64s) {
|
||||
new_u64s = roundup_pow_of_two(k.k->u64s);
|
||||
/*
|
||||
* bch2_varint_decode can read past the end of the buffer by at
|
||||
* most 7 bytes (it won't be used):
|
||||
*/
|
||||
new_u64s = k.k->u64s + 1;
|
||||
|
||||
if (new_u64s > ck->u64s) {
|
||||
new_u64s = roundup_pow_of_two(new_u64s);
|
||||
new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS);
|
||||
if (!new_k) {
|
||||
ret = -ENOMEM;
|
||||
@ -385,12 +391,18 @@ retry:
|
||||
goto evict;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since journal reclaim depends on us making progress here, and the
|
||||
* allocator/copygc depend on journal reclaim making progress, we need
|
||||
* to be using alloc reserves:
|
||||
* */
|
||||
ret = bch2_btree_iter_traverse(b_iter) ?:
|
||||
bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOUNLOCK|
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
(ck->journal.seq == journal_last_seq(j)
|
||||
? BTREE_INSERT_JOURNAL_RESERVED
|
||||
: 0)|
|
||||
|
@ -352,6 +352,8 @@ struct btree_trans_commit_hook {
|
||||
struct btree_trans_commit_hook *next;
|
||||
};
|
||||
|
||||
#define BTREE_TRANS_MEM_MAX 4096
|
||||
|
||||
struct btree_trans {
|
||||
struct bch_fs *c;
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
|
@ -887,6 +887,14 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
||||
btree_update_drop_new_node(c, b);
|
||||
|
||||
btree_update_will_delete_key(as, &b->key);
|
||||
|
||||
/*
|
||||
* XXX: Waiting on io with btree node locks held, we don't want to be
|
||||
* doing this. We can't have btree writes happening after the space has
|
||||
* been freed, but we really only need to block before
|
||||
* btree_update_nodes_written_trans() happens.
|
||||
*/
|
||||
btree_node_wait_on_io(b);
|
||||
}
|
||||
|
||||
void bch2_btree_update_done(struct btree_update *as)
|
||||
@ -1146,6 +1154,24 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
|
||||
set_btree_node_need_write(b);
|
||||
}
|
||||
|
||||
static void
|
||||
__bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
|
||||
struct btree_iter *iter, struct keylist *keys,
|
||||
struct btree_node_iter node_iter)
|
||||
{
|
||||
struct bkey_i *insert = bch2_keylist_front(keys);
|
||||
struct bkey_packed *k;
|
||||
|
||||
BUG_ON(btree_node_type(b) != BKEY_TYPE_btree);
|
||||
|
||||
while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) &&
|
||||
(bkey_cmp_left_packed(b, k, &insert->k.p) >= 0))
|
||||
;
|
||||
|
||||
for_each_keylist_key(keys, insert)
|
||||
bch2_insert_fixup_btree_ptr(as, b, iter, insert, &node_iter);
|
||||
}
|
||||
|
||||
/*
|
||||
* Move keys from n1 (original replacement node, now lower node) to n2 (higher
|
||||
* node)
|
||||
@ -1276,16 +1302,9 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
|
||||
struct bkey_packed *src, *dst, *n;
|
||||
struct bset *i;
|
||||
|
||||
BUG_ON(btree_node_type(b) != BKEY_TYPE_btree);
|
||||
|
||||
bch2_btree_node_iter_init(&node_iter, b, &k->k.p);
|
||||
|
||||
while (!bch2_keylist_empty(keys)) {
|
||||
k = bch2_keylist_front(keys);
|
||||
|
||||
bch2_insert_fixup_btree_ptr(as, b, iter, k, &node_iter);
|
||||
bch2_keylist_pop_front(keys);
|
||||
}
|
||||
__bch2_btree_insert_keys_interior(as, b, iter, keys, node_iter);
|
||||
|
||||
/*
|
||||
* We can't tolerate whiteouts here - with whiteouts there can be
|
||||
@ -1431,24 +1450,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
|
||||
struct btree_iter *iter, struct keylist *keys)
|
||||
{
|
||||
struct btree_iter *linked;
|
||||
struct btree_node_iter node_iter;
|
||||
struct bkey_i *insert = bch2_keylist_front(keys);
|
||||
struct bkey_packed *k;
|
||||
|
||||
/* Don't screw up @iter's position: */
|
||||
node_iter = iter->l[b->c.level].iter;
|
||||
|
||||
/*
|
||||
* btree_split(), btree_gc_coalesce() will insert keys before
|
||||
* the iterator's current position - they know the keys go in
|
||||
* the node the iterator points to:
|
||||
*/
|
||||
while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) &&
|
||||
(bkey_cmp_left_packed(b, k, &insert->k.p) >= 0))
|
||||
;
|
||||
|
||||
for_each_keylist_key(keys, insert)
|
||||
bch2_insert_fixup_btree_ptr(as, b, iter, insert, &node_iter);
|
||||
__bch2_btree_insert_keys_interior(as, b, iter, keys, iter->l[b->c.level].iter);
|
||||
|
||||
btree_update_updated_node(as, b);
|
||||
|
||||
@ -1598,7 +1601,19 @@ retry:
|
||||
next = m;
|
||||
}
|
||||
|
||||
BUG_ON(bkey_cmp(bpos_successor(prev->data->max_key), next->data->min_key));
|
||||
if (bkey_cmp(bpos_successor(prev->data->max_key), next->data->min_key)) {
|
||||
char buf1[100], buf2[100];
|
||||
|
||||
bch2_bpos_to_text(&PBUF(buf1), prev->data->max_key);
|
||||
bch2_bpos_to_text(&PBUF(buf2), next->data->min_key);
|
||||
bch2_fs_inconsistent(c,
|
||||
"btree topology error in btree merge:\n"
|
||||
"prev ends at %s\n"
|
||||
"next starts at %s\n",
|
||||
buf1, buf2);
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_bkey_format_init(&new_s);
|
||||
bch2_bkey_format_add_pos(&new_s, prev->data->min_key);
|
||||
|
@ -293,6 +293,12 @@ btree_key_can_insert_cached(struct btree_trans *trans,
|
||||
!(trans->flags & BTREE_INSERT_JOURNAL_RECLAIM))
|
||||
return BTREE_INSERT_NEED_JOURNAL_RECLAIM;
|
||||
|
||||
/*
|
||||
* bch2_varint_decode can read past the end of the buffer by at most 7
|
||||
* bytes (it won't be used):
|
||||
*/
|
||||
u64s += 1;
|
||||
|
||||
if (u64s <= ck->u64s)
|
||||
return BTREE_INSERT_OK;
|
||||
|
||||
|
@ -396,20 +396,22 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
bch2_wake_allocator(ca);
|
||||
}
|
||||
|
||||
static inline void update_replicas(struct bch_fs *c,
|
||||
static inline int update_replicas(struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
struct bch_replicas_entry *r,
|
||||
s64 sectors)
|
||||
{
|
||||
int idx = bch2_replicas_entry_idx(c, r);
|
||||
|
||||
BUG_ON(idx < 0);
|
||||
if (idx < 0)
|
||||
return -1;
|
||||
|
||||
fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
|
||||
fs_usage->replicas[idx] += sectors;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void update_cached_sectors(struct bch_fs *c,
|
||||
static inline int update_cached_sectors(struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
unsigned dev, s64 sectors)
|
||||
{
|
||||
@ -417,7 +419,7 @@ static inline void update_cached_sectors(struct bch_fs *c,
|
||||
|
||||
bch2_replicas_entry_cached(&r.e, dev);
|
||||
|
||||
update_replicas(c, fs_usage, &r.e, sectors);
|
||||
return update_replicas(c, fs_usage, &r.e, sectors);
|
||||
}
|
||||
|
||||
static struct replicas_delta_list *
|
||||
@ -425,10 +427,26 @@ replicas_deltas_realloc(struct btree_trans *trans, unsigned more)
|
||||
{
|
||||
struct replicas_delta_list *d = trans->fs_usage_deltas;
|
||||
unsigned new_size = d ? (d->size + more) * 2 : 128;
|
||||
unsigned alloc_size = sizeof(*d) + new_size;
|
||||
|
||||
WARN_ON_ONCE(alloc_size > REPLICAS_DELTA_LIST_MAX);
|
||||
|
||||
if (!d || d->used + more > d->size) {
|
||||
d = krealloc(d, sizeof(*d) + new_size, GFP_NOIO|__GFP_ZERO);
|
||||
BUG_ON(!d);
|
||||
d = krealloc(d, alloc_size, GFP_NOIO|__GFP_ZERO);
|
||||
|
||||
BUG_ON(!d && alloc_size > REPLICAS_DELTA_LIST_MAX);
|
||||
|
||||
if (!d) {
|
||||
d = mempool_alloc(&trans->c->replicas_delta_pool, GFP_NOIO);
|
||||
memset(d, 0, REPLICAS_DELTA_LIST_MAX);
|
||||
|
||||
if (trans->fs_usage_deltas)
|
||||
memcpy(d, trans->fs_usage_deltas,
|
||||
trans->fs_usage_deltas->size + sizeof(*d));
|
||||
|
||||
new_size = REPLICAS_DELTA_LIST_MAX - sizeof(*d);
|
||||
kfree(trans->fs_usage_deltas);
|
||||
}
|
||||
|
||||
d->size = new_size;
|
||||
trans->fs_usage_deltas = d;
|
||||
@ -553,8 +571,12 @@ static int bch2_mark_alloc(struct bch_fs *c,
|
||||
|
||||
if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
|
||||
old_m.cached_sectors) {
|
||||
update_cached_sectors(c, fs_usage, ca->dev_idx,
|
||||
-old_m.cached_sectors);
|
||||
if (update_cached_sectors(c, fs_usage, ca->dev_idx,
|
||||
-old_m.cached_sectors)) {
|
||||
bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors");
|
||||
return -1;
|
||||
}
|
||||
|
||||
trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
|
||||
old_m.cached_sectors);
|
||||
}
|
||||
@ -936,8 +958,12 @@ static int bch2_mark_extent(struct bch_fs *c,
|
||||
|
||||
if (p.ptr.cached) {
|
||||
if (!stale)
|
||||
update_cached_sectors(c, fs_usage, p.ptr.dev,
|
||||
disk_sectors);
|
||||
if (update_cached_sectors(c, fs_usage, p.ptr.dev,
|
||||
disk_sectors)) {
|
||||
bch2_fs_fatal_error(c, "bch2_mark_extent(): no replicas entry while updating cached sectors");
|
||||
return -1;
|
||||
|
||||
}
|
||||
} else if (!p.has_ec) {
|
||||
dirty_sectors += disk_sectors;
|
||||
r.e.devs[r.e.nr_devs++] = p.ptr.dev;
|
||||
@ -956,8 +982,15 @@ static int bch2_mark_extent(struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
if (r.e.nr_devs)
|
||||
update_replicas(c, fs_usage, &r.e, dirty_sectors);
|
||||
if (r.e.nr_devs) {
|
||||
if (update_replicas(c, fs_usage, &r.e, dirty_sectors)) {
|
||||
char buf[200];
|
||||
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, k);
|
||||
bch2_fs_fatal_error(c, "no replicas entry for %s", buf);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1031,8 +1064,14 @@ static int bch2_mark_stripe(struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
update_replicas(c, fs_usage, &m->r.e,
|
||||
((s64) m->sectors * m->nr_redundant));
|
||||
if (update_replicas(c, fs_usage, &m->r.e,
|
||||
((s64) m->sectors * m->nr_redundant))) {
|
||||
char buf[200];
|
||||
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, new);
|
||||
bch2_fs_fatal_error(c, "no replicas entry for %s", buf);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -1292,7 +1331,7 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
|
||||
added += d->delta;
|
||||
}
|
||||
|
||||
update_replicas(c, dst, &d->r, d->delta);
|
||||
BUG_ON(update_replicas(c, dst, &d->r, d->delta));
|
||||
}
|
||||
|
||||
dst->nr_inodes += deltas->nr_inodes;
|
||||
|
@ -29,40 +29,19 @@
|
||||
|
||||
static struct dentry *bch_debug;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
|
||||
void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
|
||||
struct extent_ptr_decoded pick)
|
||||
{
|
||||
struct btree *v = c->verify_data;
|
||||
struct btree_node *n_ondisk, *n_sorted, *n_inmemory;
|
||||
struct bset *sorted, *inmemory;
|
||||
struct extent_ptr_decoded pick;
|
||||
struct bch_dev *ca;
|
||||
struct btree_node *n_ondisk = c->verify_ondisk;
|
||||
struct btree_node *n_sorted = c->verify_data->data;
|
||||
struct bset *sorted, *inmemory = &b->data->keys;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev);
|
||||
struct bio *bio;
|
||||
bool failed = false;
|
||||
|
||||
if (c->opts.nochanges)
|
||||
return;
|
||||
|
||||
btree_node_io_lock(b);
|
||||
mutex_lock(&c->verify_lock);
|
||||
|
||||
n_ondisk = c->verify_ondisk;
|
||||
n_sorted = c->verify_data->data;
|
||||
n_inmemory = b->data;
|
||||
|
||||
bkey_copy(&v->key, &b->key);
|
||||
v->written = 0;
|
||||
v->c.level = b->c.level;
|
||||
v->c.btree_id = b->c.btree_id;
|
||||
bch2_btree_keys_init(v);
|
||||
|
||||
if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
|
||||
NULL, &pick) <= 0)
|
||||
return;
|
||||
|
||||
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
|
||||
if (!bch2_dev_get_ioref(ca, READ))
|
||||
return;
|
||||
return false;
|
||||
|
||||
bio = bio_alloc_bioset(GFP_NOIO,
|
||||
buf_pages(n_sorted, btree_bytes(c)),
|
||||
@ -79,12 +58,12 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
|
||||
memcpy(n_ondisk, n_sorted, btree_bytes(c));
|
||||
|
||||
v->written = 0;
|
||||
if (bch2_btree_node_read_done(c, ca, v, false))
|
||||
goto out;
|
||||
return false;
|
||||
|
||||
n_sorted = c->verify_data->data;
|
||||
sorted = &n_sorted->keys;
|
||||
inmemory = &n_inmemory->keys;
|
||||
|
||||
if (inmemory->u64s != sorted->u64s ||
|
||||
memcmp(inmemory->start,
|
||||
@ -102,8 +81,8 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
printk(KERN_ERR "*** read back in:\n");
|
||||
bch2_dump_bset(c, v, sorted, 0);
|
||||
|
||||
while (offset < b->written) {
|
||||
if (!offset ) {
|
||||
while (offset < v->written) {
|
||||
if (!offset) {
|
||||
i = &n_ondisk->keys;
|
||||
sectors = vstruct_blocks(n_ondisk, c->block_bits) <<
|
||||
c->block_bits;
|
||||
@ -122,25 +101,84 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
offset += sectors;
|
||||
}
|
||||
|
||||
printk(KERN_ERR "*** block %u/%u not written\n",
|
||||
offset >> c->block_bits, btree_blocks(c));
|
||||
|
||||
for (j = 0; j < le16_to_cpu(inmemory->u64s); j++)
|
||||
if (inmemory->_data[j] != sorted->_data[j])
|
||||
break;
|
||||
|
||||
printk(KERN_ERR "b->written %u\n", b->written);
|
||||
|
||||
console_unlock();
|
||||
panic("verify failed at %u\n", j);
|
||||
bch_err(c, "verify failed at key %u", j);
|
||||
|
||||
failed = true;
|
||||
}
|
||||
|
||||
if (v->written != b->written) {
|
||||
bch_err(c, "written wrong: expected %u, got %u",
|
||||
b->written, v->written);
|
||||
failed = true;
|
||||
}
|
||||
|
||||
return failed;
|
||||
}
|
||||
|
||||
void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs;
|
||||
struct extent_ptr_decoded p;
|
||||
const union bch_extent_entry *entry;
|
||||
struct btree *v;
|
||||
struct bset *inmemory = &b->data->keys;
|
||||
struct bkey_packed *k;
|
||||
bool failed = false;
|
||||
|
||||
if (c->opts.nochanges)
|
||||
return;
|
||||
|
||||
btree_node_io_lock(b);
|
||||
mutex_lock(&c->verify_lock);
|
||||
|
||||
if (!c->verify_ondisk) {
|
||||
c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
|
||||
if (!c->verify_ondisk)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!c->verify_data) {
|
||||
c->verify_data = __bch2_btree_node_mem_alloc(c);
|
||||
if (!c->verify_data)
|
||||
goto out;
|
||||
|
||||
list_del_init(&c->verify_data->list);
|
||||
}
|
||||
|
||||
BUG_ON(b->nsets != 1);
|
||||
|
||||
for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_next(k))
|
||||
if (k->type == KEY_TYPE_btree_ptr_v2) {
|
||||
struct bch_btree_ptr_v2 *v = (void *) bkeyp_val(&b->format, k);
|
||||
v->mem_ptr = 0;
|
||||
}
|
||||
|
||||
v = c->verify_data;
|
||||
bkey_copy(&v->key, &b->key);
|
||||
v->c.level = b->c.level;
|
||||
v->c.btree_id = b->c.btree_id;
|
||||
bch2_btree_keys_init(v);
|
||||
|
||||
ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key));
|
||||
bkey_for_each_ptr_decode(&b->key.k, ptrs, p, entry)
|
||||
failed |= bch2_btree_verify_replica(c, b, p);
|
||||
|
||||
if (failed) {
|
||||
char buf[200];
|
||||
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(&b->key));
|
||||
bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf);
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&c->verify_lock);
|
||||
btree_node_io_unlock(b);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
/* XXX: bch_fs refcounting */
|
||||
|
@ -8,11 +8,7 @@ struct bio;
|
||||
struct btree;
|
||||
struct bch_fs;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void __bch2_btree_verify(struct bch_fs *, struct btree *);
|
||||
#else
|
||||
static inline void __bch2_btree_verify(struct bch_fs *c, struct btree *b) {}
|
||||
#endif
|
||||
|
||||
static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
|
@ -1621,6 +1621,7 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags)
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_put(&trans, iter);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
|
@ -38,7 +38,7 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
|
||||
return ret ?: sectors;
|
||||
}
|
||||
|
||||
static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
|
||||
static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
|
||||
struct bch_inode_unpacked *inode,
|
||||
u32 *snapshot)
|
||||
{
|
||||
@ -63,7 +63,14 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int write_inode(struct btree_trans *trans,
|
||||
static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
|
||||
struct bch_inode_unpacked *inode,
|
||||
u32 *snapshot)
|
||||
{
|
||||
return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot));
|
||||
}
|
||||
|
||||
static int __write_inode(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode,
|
||||
u32 snapshot)
|
||||
{
|
||||
@ -71,11 +78,19 @@ static int write_inode(struct btree_trans *trans,
|
||||
bch2_trans_get_iter(trans, BTREE_ID_inodes,
|
||||
SPOS(0, inode->bi_inum, snapshot),
|
||||
BTREE_ITER_INTENT);
|
||||
int ret = bch2_inode_write(trans, inode_iter, inode);
|
||||
bch2_trans_iter_put(trans, inode_iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int write_inode(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode,
|
||||
u32 snapshot)
|
||||
{
|
||||
int ret = __bch2_trans_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW,
|
||||
bch2_inode_write(trans, inode_iter, inode));
|
||||
bch2_trans_iter_put(trans, inode_iter);
|
||||
__write_inode(trans, inode, snapshot));
|
||||
if (ret)
|
||||
bch_err(trans->c, "error in fsck: error %i updating inode", ret);
|
||||
return ret;
|
||||
@ -114,57 +129,101 @@ static int remove_dirent(struct btree_trans *trans, struct bpos pos)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __reattach_inode(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *lostfound,
|
||||
u64 inum)
|
||||
/* Get lost+found, create if it doesn't exist: */
|
||||
static int lookup_lostfound(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *lostfound)
|
||||
{
|
||||
struct bch_hash_info dir_hash =
|
||||
bch2_hash_info_init(trans->c, lostfound);
|
||||
struct bch_inode_unpacked inode_u;
|
||||
char name_buf[20];
|
||||
struct qstr name;
|
||||
u64 dir_offset = 0;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_inode_unpacked root;
|
||||
struct bch_hash_info root_hash_info;
|
||||
struct qstr lostfound_str = QSTR("lost+found");
|
||||
u64 inum;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
snprintf(name_buf, sizeof(name_buf), "%llu", inum);
|
||||
name = (struct qstr) QSTR(name_buf);
|
||||
ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root, &snapshot);
|
||||
if (ret && ret != -ENOENT)
|
||||
return ret;
|
||||
|
||||
ret = lookup_inode(trans, inum, &inode_u, &snapshot);
|
||||
root_hash_info = bch2_hash_info_init(c, &root);
|
||||
inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
|
||||
&lostfound_str);
|
||||
if (!inum) {
|
||||
bch_notice(c, "creating lost+found");
|
||||
goto create_lostfound;
|
||||
}
|
||||
|
||||
ret = lookup_inode(trans, inum, lostfound, &snapshot);
|
||||
if (ret && ret != -ENOENT) {
|
||||
/*
|
||||
* The check_dirents pass has already run, dangling dirents
|
||||
* shouldn't exist here:
|
||||
*/
|
||||
bch_err(c, "error looking up lost+found: %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (ret == -ENOENT) {
|
||||
create_lostfound:
|
||||
bch2_inode_init_early(c, lostfound);
|
||||
|
||||
ret = __bch2_trans_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW,
|
||||
bch2_create_trans(trans,
|
||||
BCACHEFS_ROOT_INO, &root,
|
||||
lostfound,
|
||||
&lostfound_str,
|
||||
0, 0, S_IFDIR|0700, 0, NULL, NULL));
|
||||
if (ret)
|
||||
bch_err(c, "error creating lost+found: %i", ret);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int reattach_inode(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
struct bch_hash_info dir_hash;
|
||||
struct bch_inode_unpacked lostfound;
|
||||
char name_buf[20];
|
||||
struct qstr name;
|
||||
u64 dir_offset = 0;
|
||||
int ret;
|
||||
|
||||
ret = lookup_lostfound(trans, &lostfound);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (S_ISDIR(inode_u.bi_mode)) {
|
||||
lostfound->bi_nlink++;
|
||||
if (S_ISDIR(inode->bi_mode)) {
|
||||
lostfound.bi_nlink++;
|
||||
|
||||
ret = write_inode(trans, lostfound, U32_MAX);
|
||||
ret = write_inode(trans, &lostfound, U32_MAX);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = bch2_dirent_create(trans, lostfound->bi_inum, &dir_hash,
|
||||
mode_to_type(inode_u.bi_mode),
|
||||
&name, inum, &dir_offset,
|
||||
BCH_HASH_SET_MUST_CREATE);
|
||||
if (ret)
|
||||
dir_hash = bch2_hash_info_init(trans->c, &lostfound);
|
||||
|
||||
snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
|
||||
name = (struct qstr) QSTR(name_buf);
|
||||
|
||||
ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
|
||||
bch2_dirent_create(trans, lostfound.bi_inum, &dir_hash,
|
||||
mode_to_type(inode->bi_mode),
|
||||
&name, inode->bi_inum, &dir_offset,
|
||||
BCH_HASH_SET_MUST_CREATE));
|
||||
if (ret) {
|
||||
bch_err(trans->c, "error %i reattaching inode %llu",
|
||||
ret, inode->bi_inum);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inode_u.bi_dir = lostfound->bi_inum;
|
||||
inode_u.bi_dir_offset = dir_offset;
|
||||
inode->bi_dir = lostfound.bi_inum;
|
||||
inode->bi_dir_offset = dir_offset;
|
||||
|
||||
return write_inode(trans, &inode_u, U32_MAX);
|
||||
}
|
||||
|
||||
static int reattach_inode(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *lostfound,
|
||||
u64 inum)
|
||||
{
|
||||
int ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
|
||||
__reattach_inode(trans, lostfound, inum));
|
||||
if (ret)
|
||||
bch_err(trans->c, "error %i reattaching inode %llu", ret, inum);
|
||||
|
||||
return ret;
|
||||
return write_inode(trans, inode, U32_MAX);
|
||||
}
|
||||
|
||||
static int remove_backpointer(struct btree_trans *trans,
|
||||
@ -931,58 +990,6 @@ create_root:
|
||||
BTREE_INSERT_LAZY_RW);
|
||||
}
|
||||
|
||||
/* Get lost+found, create if it doesn't exist: */
|
||||
static int check_lostfound(struct bch_fs *c,
|
||||
struct bch_inode_unpacked *root_inode,
|
||||
struct bch_inode_unpacked *lostfound_inode)
|
||||
{
|
||||
struct qstr lostfound = QSTR("lost+found");
|
||||
struct bch_hash_info root_hash_info =
|
||||
bch2_hash_info_init(c, root_inode);
|
||||
u64 inum;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
bch_verbose(c, "checking lost+found");
|
||||
|
||||
inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
|
||||
&lostfound);
|
||||
if (!inum) {
|
||||
bch_notice(c, "creating lost+found");
|
||||
goto create_lostfound;
|
||||
}
|
||||
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
lookup_inode(&trans, inum, lostfound_inode, &snapshot));
|
||||
if (ret && ret != -ENOENT)
|
||||
return ret;
|
||||
|
||||
if (fsck_err_on(ret, c, "lost+found missing"))
|
||||
goto create_lostfound;
|
||||
|
||||
if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c,
|
||||
"lost+found inode not a directory"))
|
||||
goto create_lostfound;
|
||||
|
||||
return 0;
|
||||
fsck_err:
|
||||
return ret;
|
||||
create_lostfound:
|
||||
bch2_inode_init_early(c, lostfound_inode);
|
||||
|
||||
ret = bch2_trans_do(c, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW,
|
||||
bch2_create_trans(&trans,
|
||||
BCACHEFS_ROOT_INO, root_inode,
|
||||
lostfound_inode, &lostfound,
|
||||
0, 0, S_IFDIR|0700, 0, NULL, NULL));
|
||||
if (ret)
|
||||
bch_err(c, "error creating lost+found: %i", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pathbuf {
|
||||
size_t nr;
|
||||
size_t size;
|
||||
@ -1014,7 +1021,6 @@ static int path_down(struct pathbuf *p, u64 inum)
|
||||
}
|
||||
|
||||
static int check_path(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *lostfound,
|
||||
struct pathbuf *p,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
@ -1038,7 +1044,7 @@ static int check_path(struct btree_trans *trans,
|
||||
inode->bi_nlink,
|
||||
inode->bi_dir,
|
||||
inode->bi_dir_offset))
|
||||
ret = reattach_inode(trans, lostfound, inode->bi_inum);
|
||||
ret = reattach_inode(trans, inode);
|
||||
break;
|
||||
}
|
||||
ret = 0;
|
||||
@ -1067,12 +1073,11 @@ static int check_path(struct btree_trans *trans,
|
||||
break;
|
||||
}
|
||||
|
||||
ret = reattach_inode(trans, lostfound, inode->bi_inum);
|
||||
ret = reattach_inode(trans, inode);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = lockrestart_do(trans,
|
||||
lookup_inode(trans, inode->bi_dir, inode, &snapshot));
|
||||
ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
|
||||
if (ret) {
|
||||
/* Should have been caught in dirents pass */
|
||||
bch_err(c, "error looking up parent directory: %i", ret);
|
||||
@ -1090,8 +1095,7 @@ fsck_err:
|
||||
* After check_dirents(), if an inode backpointer doesn't exist that means it's
|
||||
* unreachable:
|
||||
*/
|
||||
static int check_directory_structure(struct bch_fs *c,
|
||||
struct bch_inode_unpacked *lostfound)
|
||||
static int check_directory_structure(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter *iter;
|
||||
@ -1113,7 +1117,7 @@ static int check_directory_structure(struct bch_fs *c,
|
||||
break;
|
||||
}
|
||||
|
||||
ret = check_path(&trans, lostfound, &path, &u);
|
||||
ret = check_path(&trans, &path, &u);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@ -1190,7 +1194,6 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
|
||||
}
|
||||
|
||||
static int check_inode_nlink(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *lostfound_inode,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c_inode inode,
|
||||
unsigned nlink)
|
||||
@ -1238,7 +1241,6 @@ fsck_err:
|
||||
|
||||
noinline_for_stack
|
||||
static int bch2_gc_walk_inodes(struct bch_fs *c,
|
||||
struct bch_inode_unpacked *lostfound_inode,
|
||||
nlink_table *links,
|
||||
u64 range_start, u64 range_end)
|
||||
{
|
||||
@ -1259,7 +1261,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
|
||||
continue;
|
||||
|
||||
link = genradix_ptr(links, k.k->p.offset - range_start);
|
||||
ret = check_inode_nlink(&trans, lostfound_inode, iter,
|
||||
ret = check_inode_nlink(&trans, iter,
|
||||
bkey_s_c_to_inode(k), link ? link->count : 0);
|
||||
if (ret)
|
||||
break;
|
||||
@ -1275,8 +1277,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
|
||||
}
|
||||
|
||||
noinline_for_stack
|
||||
static int check_nlinks(struct bch_fs *c,
|
||||
struct bch_inode_unpacked *lostfound_inode)
|
||||
static int check_nlinks(struct bch_fs *c)
|
||||
{
|
||||
nlink_table links;
|
||||
u64 this_iter_range_start, next_iter_range_start = 0;
|
||||
@ -1296,7 +1297,7 @@ static int check_nlinks(struct bch_fs *c,
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
|
||||
ret = bch2_gc_walk_inodes(c, &links,
|
||||
this_iter_range_start,
|
||||
next_iter_range_start);
|
||||
if (ret)
|
||||
@ -1316,16 +1317,15 @@ static int check_nlinks(struct bch_fs *c,
|
||||
*/
|
||||
int bch2_fsck_full(struct bch_fs *c)
|
||||
{
|
||||
struct bch_inode_unpacked root_inode, lostfound_inode;
|
||||
struct bch_inode_unpacked root_inode;
|
||||
|
||||
return check_inodes(c, true) ?:
|
||||
check_extents(c) ?:
|
||||
check_dirents(c) ?:
|
||||
check_xattrs(c) ?:
|
||||
check_root(c, &root_inode) ?:
|
||||
check_lostfound(c, &root_inode, &lostfound_inode) ?:
|
||||
check_directory_structure(c, &lostfound_inode) ?:
|
||||
check_nlinks(c, &lostfound_inode);
|
||||
check_directory_structure(c) ?:
|
||||
check_nlinks(c);
|
||||
}
|
||||
|
||||
int bch2_fsck_walk_inodes_only(struct bch_fs *c)
|
||||
|
@ -634,7 +634,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
|
||||
msecs_to_jiffies(j->reclaim_delay_ms)))
|
||||
min_nr = 1;
|
||||
|
||||
if (j->prereserved.reserved * 2 > j->prereserved.remaining)
|
||||
if (j->prereserved.reserved * 4 > j->prereserved.remaining)
|
||||
min_nr = 1;
|
||||
|
||||
if (fifo_free(&j->pin) <= 32)
|
||||
|
@ -62,6 +62,6 @@ void bch2_verify_keylist_sorted(struct keylist *l)
|
||||
|
||||
for_each_keylist_key(l, k)
|
||||
BUG_ON(bkey_next(k) != l->top &&
|
||||
bkey_cmp(k->k.p, bkey_next(k)->k.p) >= 0);
|
||||
bpos_cmp(k->k.p, bkey_next(k)->k.p) >= 0);
|
||||
}
|
||||
#endif
|
||||
|
@ -762,7 +762,7 @@ static int bch2_move_btree(struct bch_fs *c,
|
||||
id == start_btree_id ? start_pos : POS_MIN,
|
||||
BTREE_ITER_PREFETCH, b) {
|
||||
if (kthread && kthread_should_stop())
|
||||
goto out;
|
||||
break;
|
||||
|
||||
if ((cmp_int(id, end_btree_id) ?:
|
||||
bkey_cmp(b->key.k.p, end_pos)) > 0)
|
||||
@ -789,8 +789,10 @@ next:
|
||||
}
|
||||
|
||||
ret = bch2_trans_iter_free(&trans, iter) ?: ret;
|
||||
if (kthread && kthread_should_stop())
|
||||
break;
|
||||
}
|
||||
out:
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (ret)
|
||||
|
@ -87,9 +87,20 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
|
||||
if (i >= 0 &&
|
||||
p.ptr.offset < h->data[i].offset + ca->mi.bucket_size &&
|
||||
p.ptr.gen == h->data[i].gen) {
|
||||
/*
|
||||
* We need to use the journal reserve here, because
|
||||
* - journal reclaim depends on btree key cache
|
||||
* flushing to make forward progress,
|
||||
* - which has to make forward progress when the
|
||||
* journal is pre-reservation full,
|
||||
* - and depends on allocation - meaning allocator and
|
||||
* copygc
|
||||
*/
|
||||
|
||||
data_opts->target = io_opts->background_target;
|
||||
data_opts->nr_replicas = 1;
|
||||
data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE;
|
||||
data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_JOURNAL_RESERVED;
|
||||
data_opts->rewrite_dev = p.ptr.dev;
|
||||
|
||||
if (p.has_ec)
|
||||
|
@ -1063,11 +1063,27 @@ unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_fs_replicas_exit(struct bch_fs *c)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
kfree(c->usage_scratch);
|
||||
for (i = 0; i < ARRAY_SIZE(c->usage); i++)
|
||||
free_percpu(c->usage[i]);
|
||||
kfree(c->usage_base);
|
||||
kfree(c->replicas.entries);
|
||||
kfree(c->replicas_gc.entries);
|
||||
|
||||
mempool_exit(&c->replicas_delta_pool);
|
||||
}
|
||||
|
||||
int bch2_fs_replicas_init(struct bch_fs *c)
|
||||
{
|
||||
bch2_journal_entry_res_resize(&c->journal,
|
||||
&c->replicas_journal_res,
|
||||
reserve_journal_replicas(c, &c->replicas));
|
||||
|
||||
return replicas_table_update(c, &c->replicas);
|
||||
return mempool_init_kmalloc_pool(&c->replicas_delta_pool, 1,
|
||||
REPLICAS_DELTA_LIST_MAX) ?:
|
||||
replicas_table_update(c, &c->replicas);
|
||||
}
|
||||
|
@ -102,6 +102,7 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
|
||||
|
||||
void bch2_fs_replicas_exit(struct bch_fs *);
|
||||
int bch2_fs_replicas_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_REPLICAS_H */
|
||||
|
@ -477,6 +477,7 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
bch2_fs_btree_iter_exit(c);
|
||||
bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
|
||||
bch2_fs_btree_cache_exit(c);
|
||||
bch2_fs_replicas_exit(c);
|
||||
bch2_fs_journal_exit(&c->journal);
|
||||
bch2_io_clock_exit(&c->io_clock[WRITE]);
|
||||
bch2_io_clock_exit(&c->io_clock[READ]);
|
||||
@ -484,10 +485,6 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
bch2_journal_keys_free(&c->journal_keys);
|
||||
bch2_journal_entries_free(&c->journal_entries);
|
||||
percpu_free_rwsem(&c->mark_lock);
|
||||
kfree(c->usage_scratch);
|
||||
for (i = 0; i < ARRAY_SIZE(c->usage); i++)
|
||||
free_percpu(c->usage[i]);
|
||||
kfree(c->usage_base);
|
||||
|
||||
if (c->btree_iters_bufs)
|
||||
for_each_possible_cpu(cpu)
|
||||
@ -501,8 +498,6 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
bioset_exit(&c->btree_bio);
|
||||
mempool_exit(&c->fill_iter);
|
||||
percpu_ref_exit(&c->writes);
|
||||
kfree(c->replicas.entries);
|
||||
kfree(c->replicas_gc.entries);
|
||||
kfree(rcu_dereference_protected(c->disk_groups, 1));
|
||||
kfree(c->journal_seq_blacklist_table);
|
||||
kfree(c->unused_inode_hints);
|
||||
|
Loading…
Reference in New Issue
Block a user