diff --git a/.bcachefs_revision b/.bcachefs_revision index a0a4a343..d1024536 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -e6fa8eaa1b374fc6262bd088ad1f140f4c5a8b11 +8d3093bd9b9254957badce4a4ff178baeb3632ed diff --git a/include/linux/bsearch.h b/include/linux/bsearch.h new file mode 100644 index 00000000..e66b711d --- /dev/null +++ b/include/linux/bsearch.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BSEARCH_H +#define _LINUX_BSEARCH_H + +#include + +static __always_inline +void *__inline_bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp) +{ + const char *pivot; + int result; + + while (num > 0) { + pivot = base + (num >> 1) * size; + result = cmp(key, pivot); + + if (result == 0) + return (void *)pivot; + + if (result > 0) { + base = pivot + size; + num--; + } + num >>= 1; + } + + return NULL; +} + +extern void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp); + +#endif /* _LINUX_BSEARCH_H */ diff --git a/include/linux/types.h b/include/linux/types.h index c9886cba..77f96737 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -76,4 +76,6 @@ typedef __u64 __bitwise __be64; typedef u64 sector_t; +typedef int (*cmp_func_t)(const void *a, const void *b); + #endif /* _TOOLS_LINUX_TYPES_H_ */ diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index ce058d55..f2d2c7bb 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -485,10 +485,12 @@ enum { BCH_FS_ALLOCATOR_RUNNING, BCH_FS_ALLOCATOR_STOPPING, BCH_FS_INITIAL_GC_DONE, + BCH_FS_INITIAL_GC_UNFIXED, BCH_FS_BTREE_INTERIOR_REPLAY_DONE, BCH_FS_FSCK_DONE, BCH_FS_STARTED, BCH_FS_RW, + BCH_FS_WAS_RW, /* shutdown: */ BCH_FS_STOPPING, @@ -497,7 +499,9 @@ enum { /* errors: */ BCH_FS_ERROR, + BCH_FS_TOPOLOGY_ERROR, BCH_FS_ERRORS_FIXED, + BCH_FS_ERRORS_NOT_FIXED, /* misc: */ BCH_FS_NEED_ANOTHER_GC, diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index ead7268b..d640a311 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1313,12 +1313,10 @@ LE64_BITMASK(BCH_SB_GRPQUOTA, struct bch_sb, flags[0], 58, 59); LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60); LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61); +LE64_BITMASK(BCH_SB_HAS_TOPOLOGY_ERRORS,struct bch_sb, flags[0], 61, 62); -/* bit 61 was reflink option */ LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63); -/* 61-64 unused */ - LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4); LE64_BITMASK(BCH_SB_COMPRESSION_TYPE, struct bch_sb, flags[1], 4, 8); LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9); diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index edc3c5ed..f8692f79 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -958,6 +958,36 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false); } +void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k) +{ + struct btree_cache *bc = &c->btree_cache; + struct btree *b; + + b = btree_cache_find(bc, k); + if (!b) + return; + + six_lock_intent(&b->c.lock, NULL, NULL); + six_lock_write(&b->c.lock, NULL, NULL); + + wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, + TASK_UNINTERRUPTIBLE); + __bch2_btree_node_write(c, b); + + /* wait for any in flight btree write */ + btree_node_wait_on_io(b); + + BUG_ON(btree_node_dirty(b)); + + mutex_lock(&bc->lock); + btree_node_data_free(c, b); + bch2_btree_node_hash_remove(bc, b); + mutex_unlock(&bc->lock); + + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); +} + void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, struct btree *b) { diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index c517cc02..40dd263a 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -30,6 +30,8 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *, const struct bkey_i *, enum btree_id, unsigned); +void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *); + void bch2_fs_btree_cache_exit(struct bch_fs *); int bch2_fs_btree_cache_init(struct bch_fs *); void bch2_fs_btree_cache_init_early(struct btree_cache *); diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 864931ea..24fa279d 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -66,8 +66,6 @@ static int bch2_gc_check_topology(struct bch_fs *c, ? node_start : bpos_successor(prev->k->k.p); char buf1[200], buf2[200]; - bool update_min = false; - bool update_max = false; int ret = 0; if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) { @@ -81,86 +79,343 @@ static int bch2_gc_check_topology(struct bch_fs *c, bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k)); } - if (fsck_err_on(bpos_cmp(expected_start, bp->v.min_key), c, - "btree node with incorrect min_key at btree %s level %u:\n" - " prev %s\n" - " cur %s", - bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, - (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2))) - update_min = true; + if (bpos_cmp(expected_start, bp->v.min_key)) { + bch2_topology_error(c); + + if (fsck_err(c, "btree node with incorrect min_key at btree %s level %u:\n" + " prev %s\n" + " cur %s", + bch2_btree_ids[b->c.btree_id], b->c.level, + buf1, + (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2))) { + bch_info(c, "Halting mark and sweep to start topology repair pass"); + return FSCK_ERR_START_TOPOLOGY_REPAIR; + } else { + set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); + } + } } - if (fsck_err_on(is_last && - bpos_cmp(cur.k->k.p, node_end), c, + if (is_last && bpos_cmp(cur.k->k.p, node_end)) { + bch2_topology_error(c); + + if (fsck_err(c, "btree node with incorrect max_key at btree %s level %u:\n" + " %s\n" + " expected %s", + bch2_btree_ids[b->c.btree_id], b->c.level, + (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(cur.k)), buf1), + (bch2_bpos_to_text(&PBUF(buf2), node_end), buf2))) { + bch_info(c, "Halting mark and sweep to start topology repair pass"); + return FSCK_ERR_START_TOPOLOGY_REPAIR; + } else { + set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); + } + } + + bch2_bkey_buf_copy(prev, c, cur.k); +fsck_err: + return ret; +} + +static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst) +{ + switch (b->key.k.type) { + case KEY_TYPE_btree_ptr: { + struct bkey_i_btree_ptr *src = bkey_i_to_btree_ptr(&b->key); + + dst->k.p = src->k.p; + dst->v.mem_ptr = 0; + dst->v.seq = b->data->keys.seq; + dst->v.sectors_written = 0; + dst->v.flags = 0; + dst->v.min_key = b->data->min_key; + set_bkey_val_bytes(&dst->k, sizeof(dst->v) + bkey_val_bytes(&src->k)); + memcpy(dst->v.start, src->v.start, bkey_val_bytes(&src->k)); + break; + } + case KEY_TYPE_btree_ptr_v2: + bkey_copy(&dst->k_i, &b->key); + break; + default: + BUG(); + } +} + +static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min) +{ + struct bkey_i_btree_ptr_v2 *new; + int ret; + + new = kmalloc(BKEY_BTREE_PTR_U64s_MAX * sizeof(u64), GFP_KERNEL); + if (!new) + return -ENOMEM; + + btree_ptr_to_v2(b, new); + b->data->min_key = new_min; + new->v.min_key = new_min; + SET_BTREE_PTR_RANGE_UPDATED(&new->v, true); + + ret = bch2_journal_key_insert(c, b->c.btree_id, b->c.level + 1, &new->k_i); + if (ret) { + kfree(new); + return ret; + } + + bch2_btree_node_drop_keys_outside_node(b); + + return 0; +} + +static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) +{ + struct bkey_i_btree_ptr_v2 *new; + int ret; + + ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p); + if (ret) + return ret; + + new = kmalloc(BKEY_BTREE_PTR_U64s_MAX * sizeof(u64), GFP_KERNEL); + if (!new) + return -ENOMEM; + + btree_ptr_to_v2(b, new); + b->data->max_key = new_max; + new->k.p = new_max; + SET_BTREE_PTR_RANGE_UPDATED(&new->v, true); + + ret = bch2_journal_key_insert(c, b->c.btree_id, b->c.level + 1, &new->k_i); + if (ret) { + kfree(new); + return ret; + } + + bch2_btree_node_drop_keys_outside_node(b); + + mutex_lock(&c->btree_cache.lock); + bch2_btree_node_hash_remove(&c->btree_cache, b); + + bkey_copy(&b->key, &new->k_i); + ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); + BUG_ON(ret); + mutex_unlock(&c->btree_cache.lock); + return 0; +} + +static int btree_repair_node_start(struct bch_fs *c, struct btree *b, + struct btree *prev, struct btree *cur) +{ + struct bpos expected_start = !prev + ? b->data->min_key + : bpos_successor(prev->key.k.p); + char buf1[200], buf2[200]; + int ret = 0; + + if (!prev) { + struct printbuf out = PBUF(buf1); + pr_buf(&out, "start of node: "); + bch2_bpos_to_text(&out, b->data->min_key); + } else { + bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&prev->key)); + } + + if (mustfix_fsck_err_on(bpos_cmp(expected_start, cur->data->min_key), c, + "btree node with incorrect min_key at btree %s level %u:\n" + " prev %s\n" + " cur %s", + bch2_btree_ids[b->c.btree_id], b->c.level, + buf1, + (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&cur->key)), buf2))) { + if (prev && + bpos_cmp(expected_start, cur->data->min_key) > 0 && + BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) + ret = set_node_max(c, prev, + bpos_predecessor(cur->data->min_key)); + else + ret = set_node_min(c, cur, expected_start); + if (ret) + return ret; + } +fsck_err: + return ret; +} + +static int btree_repair_node_end(struct bch_fs *c, struct btree *b, + struct btree *child) +{ + char buf1[200], buf2[200]; + int ret = 0; + + if (mustfix_fsck_err_on(bpos_cmp(child->key.k.p, b->key.k.p), c, "btree node with incorrect max_key at btree %s level %u:\n" " %s\n" " expected %s", bch2_btree_ids[b->c.btree_id], b->c.level, - (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(cur.k)), buf1), - (bch2_bpos_to_text(&PBUF(buf2), node_end), buf2))) - update_max = true; - - bch2_bkey_buf_copy(prev, c, cur.k); - - if (update_min || update_max) { - struct bkey_i *new; - struct bkey_i_btree_ptr_v2 *bp = NULL; - struct btree *n; - - if (update_max) { - ret = bch2_journal_key_delete(c, b->c.btree_id, - b->c.level, cur.k->k.p); - if (ret) - return ret; - } - - new = kmalloc(bkey_bytes(&cur.k->k), GFP_KERNEL); - if (!new) { - bch_err(c, "%s: error allocating new key", __func__); - return -ENOMEM; - } - - bkey_copy(new, cur.k); - - if (new->k.type == KEY_TYPE_btree_ptr_v2) - bp = bkey_i_to_btree_ptr_v2(new); - - if (update_min) - bp->v.min_key = expected_start; - if (update_max) - new->k.p = node_end; - if (bp) - SET_BTREE_PTR_RANGE_UPDATED(&bp->v, true); - - ret = bch2_journal_key_insert(c, b->c.btree_id, b->c.level, new); - if (ret) { - kfree(new); + (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&child->key)), buf1), + (bch2_bpos_to_text(&PBUF(buf2), b->key.k.p), buf2))) { + ret = set_node_max(c, child, b->key.k.p); + if (ret) return ret; - } - - n = bch2_btree_node_get_noiter(c, cur.k, b->c.btree_id, - b->c.level - 1, true); - if (n) { - mutex_lock(&c->btree_cache.lock); - bch2_btree_node_hash_remove(&c->btree_cache, n); - - bkey_copy(&n->key, new); - if (update_min) - n->data->min_key = expected_start; - if (update_max) - n->data->max_key = node_end; - - ret = __bch2_btree_node_hash_insert(&c->btree_cache, n); - BUG_ON(ret); - mutex_unlock(&c->btree_cache.lock); - six_unlock_read(&n->c.lock); - } } fsck_err: return ret; } +#define DROP_THIS_NODE 10 + +static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b) +{ + struct btree_and_journal_iter iter; + struct bkey_s_c k; + struct bkey_buf tmp; + struct btree *prev = NULL, *cur = NULL; + bool have_child, dropped_children = false; + char buf[200]; + int ret = 0; + + if (!b->c.level) + return 0; +again: + have_child = dropped_children = false; + bch2_bkey_buf_init(&tmp); + bch2_btree_and_journal_iter_init_node_iter(&iter, c, b); + + while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + bch2_btree_and_journal_iter_advance(&iter); + bch2_bkey_buf_reassemble(&tmp, c, k); + + cur = bch2_btree_node_get_noiter(c, tmp.k, + b->c.btree_id, b->c.level - 1, + false); + ret = PTR_ERR_OR_ZERO(cur); + + if (mustfix_fsck_err_on(ret == -EIO, c, + "Unreadable btree node at btree %s level %u:\n" + " %s", + bch2_btree_ids[b->c.btree_id], + b->c.level - 1, + (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(tmp.k)), buf))) { + bch2_btree_node_evict(c, tmp.k); + ret = bch2_journal_key_delete(c, b->c.btree_id, + b->c.level, tmp.k->k.p); + if (ret) + goto err; + continue; + } + + if (ret) { + bch_err(c, "%s: error %i getting btree node", + __func__, ret); + break; + } + + ret = btree_repair_node_start(c, b, prev, cur); + if (prev) + six_unlock_read(&prev->c.lock); + prev = cur; + cur = NULL; + + if (ret) + break; + } + + if (!ret && !IS_ERR_OR_NULL(prev)) { + BUG_ON(cur); + ret = btree_repair_node_end(c, b, prev); + } + + if (!IS_ERR_OR_NULL(prev)) + six_unlock_read(&prev->c.lock); + prev = NULL; + if (!IS_ERR_OR_NULL(cur)) + six_unlock_read(&cur->c.lock); + cur = NULL; + + if (ret) + goto err; + + bch2_btree_and_journal_iter_exit(&iter); + bch2_btree_and_journal_iter_init_node_iter(&iter, c, b); + + while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { + bch2_bkey_buf_reassemble(&tmp, c, k); + bch2_btree_and_journal_iter_advance(&iter); + + cur = bch2_btree_node_get_noiter(c, tmp.k, + b->c.btree_id, b->c.level - 1, + false); + ret = PTR_ERR_OR_ZERO(cur); + + if (ret) { + bch_err(c, "%s: error %i getting btree node", + __func__, ret); + goto err; + } + + ret = bch2_btree_repair_topology_recurse(c, cur); + six_unlock_read(&cur->c.lock); + cur = NULL; + + if (ret == DROP_THIS_NODE) { + bch2_btree_node_evict(c, tmp.k); + ret = bch2_journal_key_delete(c, b->c.btree_id, + b->c.level, tmp.k->k.p); + dropped_children = true; + } + + if (ret) + goto err; + + have_child = true; + } + + if (mustfix_fsck_err_on(!have_child, c, + "empty interior btree node at btree %s level %u\n" + " %s", + bch2_btree_ids[b->c.btree_id], + b->c.level, + (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(&b->key)), buf))) + ret = DROP_THIS_NODE; +err: +fsck_err: + if (!IS_ERR_OR_NULL(prev)) + six_unlock_read(&prev->c.lock); + if (!IS_ERR_OR_NULL(cur)) + six_unlock_read(&cur->c.lock); + + bch2_btree_and_journal_iter_exit(&iter); + bch2_bkey_buf_exit(&tmp, c); + + if (!ret && dropped_children) + goto again; + + return ret; +} + +static int bch2_repair_topology(struct bch_fs *c) +{ + struct btree *b; + unsigned i; + int ret = 0; + + for (i = 0; i < BTREE_ID_NR && !ret; i++) { + b = c->btree_roots[i].b; + if (btree_node_fake(b)) + continue; + + six_lock_read(&b->c.lock, NULL, NULL); + ret = bch2_btree_repair_topology_recurse(c, b); + six_unlock_read(&b->c.lock); + + if (ret == DROP_THIS_NODE) { + bch_err(c, "empty btree root - repair unimplemented"); + ret = FSCK_ERR_EXIT; + } + } + + return ret; +} + static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, unsigned level, bool is_root, struct bkey_s_c *k) @@ -483,6 +738,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, struct bkey_s_c k; struct bkey_buf cur, prev; u8 max_stale = 0; + char buf[200]; int ret = 0; bch2_btree_and_journal_iter_init_node_iter(&iter, c, b); @@ -498,7 +754,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, &k, &max_stale, true); if (ret) { bch_err(c, "%s: error %i from bch2_gc_mark_key", __func__, ret); - break; + goto fsck_err; } if (b->c.level) { @@ -511,7 +767,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, &prev, cur, !bch2_btree_and_journal_iter_peek(&iter).k); if (ret) - break; + goto fsck_err; } else { bch2_btree_and_journal_iter_advance(&iter); } @@ -532,18 +788,25 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, false); ret = PTR_ERR_OR_ZERO(child); - if (fsck_err_on(ret == -EIO, c, - "unreadable btree node")) { - ret = bch2_journal_key_delete(c, b->c.btree_id, - b->c.level, cur.k->k.p); - if (ret) - return ret; + if (ret == -EIO) { + bch2_topology_error(c); - set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); - continue; - } - - if (ret) { + if (fsck_err(c, "Unreadable btree node at btree %s level %u:\n" + " %s", + bch2_btree_ids[b->c.btree_id], + b->c.level - 1, + (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(cur.k)), buf))) { + ret = FSCK_ERR_START_TOPOLOGY_REPAIR; + bch_info(c, "Halting mark and sweep to start topology repair pass"); + goto fsck_err; + } else { + /* Continue marking when opted to not + * fix the error: */ + ret = 0; + set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); + continue; + } + } else if (ret) { bch_err(c, "%s: error %i getting btree node", __func__, ret); break; @@ -583,16 +846,20 @@ static int bch2_gc_btree_init(struct bch_fs *c, return 0; six_lock_read(&b->c.lock, NULL, NULL); - if (fsck_err_on(bpos_cmp(b->data->min_key, POS_MIN), c, + if (mustfix_fsck_err_on(bpos_cmp(b->data->min_key, POS_MIN), c, "btree root with incorrect min_key: %s", (bch2_bpos_to_text(&PBUF(buf), b->data->min_key), buf))) { - BUG(); + bch_err(c, "repair unimplemented"); + ret = FSCK_ERR_EXIT; + goto fsck_err; } - if (fsck_err_on(bpos_cmp(b->data->max_key, POS_MAX), c, + if (mustfix_fsck_err_on(bpos_cmp(b->data->max_key, POS_MAX), c, "btree root with incorrect max_key: %s", (bch2_bpos_to_text(&PBUF(buf), b->data->max_key), buf))) { - BUG(); + bch_err(c, "repair unimplemented"); + ret = FSCK_ERR_EXIT; + goto fsck_err; } if (b->c.level >= target_depth) @@ -607,7 +874,7 @@ static int bch2_gc_btree_init(struct bch_fs *c, fsck_err: six_unlock_read(&b->c.lock); - if (ret) + if (ret < 0) bch_err(c, "%s: ret %i", __func__, ret); return ret; } @@ -622,23 +889,20 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only) { enum btree_id ids[BTREE_ID_NR]; unsigned i; + int ret = 0; for (i = 0; i < BTREE_ID_NR; i++) ids[i] = i; bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp); - for (i = 0; i < BTREE_ID_NR; i++) { - enum btree_id id = ids[i]; - int ret = initial - ? bch2_gc_btree_init(c, id, metadata_only) - : bch2_gc_btree(c, id, initial, metadata_only); - if (ret) { - bch_err(c, "%s: ret %i", __func__, ret); - return ret; - } - } + for (i = 0; i < BTREE_ID_NR && !ret; i++) + ret = initial + ? bch2_gc_btree_init(c, ids[i], metadata_only) + : bch2_gc_btree(c, ids[i], initial, metadata_only); - return 0; + if (ret < 0) + bch_err(c, "%s: ret %i", __func__, ret); + return ret; } static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca, @@ -1025,7 +1289,27 @@ again: bch2_mark_superblocks(c); + if (test_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags) && + !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags) && + c->opts.fix_errors != FSCK_OPT_NO) { + bch_info(c, "starting topology repair pass"); + ret = bch2_repair_topology(c); + if (ret) + goto out; + bch_info(c, "topology repair pass done"); + } + ret = bch2_gc_btrees(c, initial, metadata_only); + + if (ret == FSCK_ERR_START_TOPOLOGY_REPAIR && + !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { + set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); + ret = 0; + } + + if (ret == FSCK_ERR_START_TOPOLOGY_REPAIR) + ret = FSCK_ERR_EXIT; + if (ret) goto out; diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 2de31a6b..e609bc49 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -558,6 +558,46 @@ out: \ #define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false) +/* + * When btree topology repair changes the start or end of a node, that might + * mean we have to drop keys that are no longer inside the node: + */ +void bch2_btree_node_drop_keys_outside_node(struct btree *b) +{ + struct bset_tree *t; + + for_each_bset(b, t) { + struct bset *i = bset(b, t); + struct bkey_packed *k; + + for (k = i->start; k != vstruct_last(i); k = bkey_next(k)) + if (bkey_cmp_left_packed(b, k, &b->data->min_key) < 0) + break; + + if (k != i->start) { + unsigned shift = (u64 *) k - (u64 *) i->start; + + memmove_u64s_down(i->start, k, + (u64 *) vstruct_end(i) - (u64 *) k); + i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift); + set_btree_bset_end(b, t); + bch2_bset_set_no_aux_tree(b, t); + } + + for (k = i->start; k != vstruct_last(i); k = bkey_next(k)) + if (bkey_cmp_left_packed(b, k, &b->data->max_key) > 0) + break; + + if (k != vstruct_last(i)) { + i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start); + set_btree_bset_end(b, t); + bch2_bset_set_no_aux_tree(b, t); + } + } + + bch2_btree_build_aux_trees(b); +} + static int validate_bset(struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, unsigned sectors, int write, bool have_retry) @@ -680,6 +720,8 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; + bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && + BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); int ret = 0; for (k = i->start; @@ -713,7 +755,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, u = __bkey_disassemble(b, k, &tmp); invalid = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b)) ?: - bch2_bkey_in_btree_node(b, u.s_c) ?: + (!updated_range ? bch2_bkey_in_btree_node(b, u.s_c) : NULL) ?: (write ? bch2_bkey_val_invalid(c, u.s_c) : NULL); if (invalid) { char buf[160]; @@ -770,6 +812,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, struct bch_extent_ptr *ptr; struct bset *i; bool used_mempool, blacklisted; + bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && + BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); unsigned u64s; int ret, retry_read = 0, write = READ; @@ -917,6 +961,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_bounce_free(c, btree_bytes(c), used_mempool, sorted); + if (updated_range) + bch2_btree_node_drop_keys_outside_node(b); + i = &b->data->keys; for (k = i->start; k != vstruct_last(i);) { struct bkey tmp; @@ -986,6 +1033,7 @@ static void btree_node_read_work(struct work_struct *work) struct bch_io_failures failed = { .nr = 0 }; char buf[200]; struct printbuf out; + bool saw_error = false; bool can_retry; goto start; @@ -1023,6 +1071,8 @@ start: !bch2_btree_node_read_done(c, ca, b, can_retry)) break; + saw_error = true; + if (!can_retry) { set_btree_node_read_error(b); break; @@ -1032,6 +1082,10 @@ start: bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], rb->start_time); bio_put(&rb->bio); + + if (saw_error && !btree_node_read_error(b)) + bch2_btree_node_rewrite_async(c, b); + clear_btree_node_read_in_flight(b); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index c8a8b05a..cadcf7f8 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -131,6 +131,8 @@ static inline void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offse void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *); +void bch2_btree_node_drop_keys_outside_node(struct btree *); + void bch2_btree_build_aux_trees(struct btree *); void bch2_btree_init_next(struct bch_fs *, struct btree *, struct btree_iter *); diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index a5181a96..a0ff0c3c 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -682,7 +682,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) kmem_cache_free(bch2_key_cache, ck); } - BUG_ON(atomic_long_read(&bc->nr_dirty) && !bch2_journal_error(&c->journal)); + BUG_ON(atomic_long_read(&bc->nr_dirty) && + !bch2_journal_error(&c->journal) && + test_bit(BCH_FS_WAS_RW, &c->flags)); BUG_ON(atomic_long_read(&bc->nr_keys)); mutex_unlock(&bc->lock); diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 0c7caa7e..56131ac5 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -72,6 +72,7 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id, int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *, __le64, unsigned); +void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *); int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *, struct btree *, struct bkey_i *); diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 5c86e76f..b9e0ff97 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -1154,6 +1154,27 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b set_btree_node_need_write(b); } +static void +__bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, + struct btree_iter *iter, struct keylist *keys, + struct btree_node_iter node_iter) +{ + struct bkey_i *insert = bch2_keylist_front(keys); + struct bkey_packed *k; + + BUG_ON(btree_node_type(b) != BKEY_TYPE_btree); + + while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) && + (bkey_cmp_left_packed(b, k, &insert->k.p) >= 0)) + ; + + while (!bch2_keylist_empty(keys)) { + bch2_insert_fixup_btree_ptr(as, b, iter, + bch2_keylist_front(keys), &node_iter); + bch2_keylist_pop_front(keys); + } +} + /* * Move keys from n1 (original replacement node, now lower node) to n2 (higher * node) @@ -1284,16 +1305,9 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, struct bkey_packed *src, *dst, *n; struct bset *i; - BUG_ON(btree_node_type(b) != BKEY_TYPE_btree); - bch2_btree_node_iter_init(&node_iter, b, &k->k.p); - while (!bch2_keylist_empty(keys)) { - k = bch2_keylist_front(keys); - - bch2_insert_fixup_btree_ptr(as, b, iter, k, &node_iter); - bch2_keylist_pop_front(keys); - } + __bch2_btree_insert_keys_interior(as, b, iter, keys, node_iter); /* * We can't tolerate whiteouts here - with whiteouts there can be @@ -1439,24 +1453,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, struct btree_iter *iter, struct keylist *keys) { struct btree_iter *linked; - struct btree_node_iter node_iter; - struct bkey_i *insert = bch2_keylist_front(keys); - struct bkey_packed *k; - /* Don't screw up @iter's position: */ - node_iter = iter->l[b->c.level].iter; - - /* - * btree_split(), btree_gc_coalesce() will insert keys before - * the iterator's current position - they know the keys go in - * the node the iterator points to: - */ - while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) && - (bkey_cmp_left_packed(b, k, &insert->k.p) >= 0)) - ; - - for_each_keylist_key(keys, insert) - bch2_insert_fixup_btree_ptr(as, b, iter, insert, &node_iter); + __bch2_btree_insert_keys_interior(as, b, iter, keys, iter->l[b->c.level].iter); btree_update_updated_node(as, b); @@ -1611,11 +1609,12 @@ retry: bch2_bpos_to_text(&PBUF(buf1), prev->data->max_key); bch2_bpos_to_text(&PBUF(buf2), next->data->min_key); - bch2_fs_inconsistent(c, - "btree topology error in btree merge:\n" - "prev ends at %s\n" - "next starts at %s\n", - buf1, buf2); + bch_err(c, + "btree topology error in btree merge:\n" + " prev ends at %s\n" + " next starts at %s", + buf1, buf2); + bch2_topology_error(c); ret = -EIO; goto err; } @@ -1797,6 +1796,56 @@ out: return ret; } +struct async_btree_rewrite { + struct bch_fs *c; + struct work_struct work; + enum btree_id btree_id; + unsigned level; + struct bpos pos; + __le64 seq; +}; + +void async_btree_node_rewrite_work(struct work_struct *work) +{ + struct async_btree_rewrite *a = + container_of(work, struct async_btree_rewrite, work); + struct bch_fs *c = a->c; + struct btree_trans trans; + struct btree_iter *iter; + + bch2_trans_init(&trans, c, 0, 0); + iter = bch2_trans_get_node_iter(&trans, a->btree_id, a->pos, + BTREE_MAX_DEPTH, a->level, 0); + bch2_btree_node_rewrite(c, iter, a->seq, 0); + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); + percpu_ref_put(&c->writes); + kfree(a); +} + +void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) +{ + struct async_btree_rewrite *a = kmalloc(sizeof(*a), GFP_NOFS); + + if (!percpu_ref_tryget(&c->writes)) + return; + + a = kmalloc(sizeof(*a), GFP_NOFS); + if (!a) { + percpu_ref_put(&c->writes); + return; + } + + a->c = c; + a->btree_id = b->c.btree_id; + a->level = b->c.level; + a->pos = b->key.k.p; + a->seq = b->data->keys.seq; + + INIT_WORK(&a->work, async_btree_node_rewrite_work); + queue_work(system_long_wq, &a->work); +} + static void __bch2_btree_node_update_key(struct bch_fs *c, struct btree_update *as, struct btree_iter *iter, diff --git a/libbcachefs/error.c b/libbcachefs/error.c index a8ee1db8..90c3b986 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -25,6 +25,13 @@ bool bch2_inconsistent_error(struct bch_fs *c) } } +void bch2_topology_error(struct bch_fs *c) +{ + set_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags); + if (test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) + bch2_inconsistent_error(c); +} + void bch2_fatal_error(struct bch_fs *c) { if (bch2_fs_emergency_read_only(c)) @@ -74,9 +81,13 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags, vprintk(fmt, args); va_end(args); - return bch2_inconsistent_error(c) - ? FSCK_ERR_EXIT - : FSCK_ERR_FIX; + if (c->opts.errors == BCH_ON_ERROR_continue) { + bch_err(c, "fixing"); + return FSCK_ERR_FIX; + } else { + bch2_inconsistent_error(c); + return FSCK_ERR_EXIT; + } } mutex_lock(&c->fsck_error_lock); @@ -146,6 +157,7 @@ print: set_bit(BCH_FS_ERRORS_FIXED, &c->flags); return FSCK_ERR_FIX; } else { + set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags); set_bit(BCH_FS_ERROR, &c->flags); return c->opts.fix_errors == FSCK_OPT_EXIT || !(flags & FSCK_CAN_IGNORE) diff --git a/libbcachefs/error.h b/libbcachefs/error.h index 0e49fd72..d8cd19b3 100644 --- a/libbcachefs/error.h +++ b/libbcachefs/error.h @@ -29,6 +29,8 @@ struct work_struct; bool bch2_inconsistent_error(struct bch_fs *); +void bch2_topology_error(struct bch_fs *); + #define bch2_fs_inconsistent(c, ...) \ ({ \ bch_err(c, __VA_ARGS__); \ @@ -88,6 +90,7 @@ enum fsck_err_ret { FSCK_ERR_IGNORE = 0, FSCK_ERR_FIX = 1, FSCK_ERR_EXIT = 2, + FSCK_ERR_START_TOPOLOGY_REPAIR = 3, }; struct fsck_err_state { diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 26fbd8c2..338d50be 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -12,8 +12,8 @@ #include "super.h" #include "xattr.h" +#include #include /* struct qstr */ -#include #define QSTR(n) { { { .len = strlen(n) } }, .name = n } @@ -290,21 +290,24 @@ static int hash_redo_key(struct btree_trans *trans, struct bch_hash_info *hash_info, struct btree_iter *k_iter, struct bkey_s_c k) { - struct bkey_i delete; + struct bkey_i *delete; struct bkey_i *tmp; + delete = bch2_trans_kmalloc(trans, sizeof(*delete)); + if (IS_ERR(delete)) + return PTR_ERR(delete); + tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); if (IS_ERR(tmp)) return PTR_ERR(tmp); bkey_reassemble(tmp, k); - bkey_init(&delete.k); - delete.k.p = k_iter->pos; - bch2_trans_update(trans, k_iter, &delete, 0); + bkey_init(&delete->k); + delete->k.p = k_iter->pos; + bch2_trans_update(trans, k_iter, delete, 0); - return bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, - tmp, 0); + return bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0); } static int fsck_hash_delete_at(struct btree_trans *trans, @@ -377,9 +380,8 @@ static int hash_check_key(struct btree_trans *trans, return ret; bad_hash: if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, " - "hashed to %llu should be at %llu\n%s", - desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset, - hash, iter->pos.offset, + "hashed to %llu\n%s", + desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset, hash, (bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE) return 0; @@ -1130,38 +1132,120 @@ static int check_directory_structure(struct bch_fs *c) return bch2_trans_exit(&trans) ?: ret; } -struct nlink { - u32 count; +struct nlink_table { + size_t nr; + size_t size; + + struct nlink { + u64 inum; + u32 snapshot; + u32 count; + } *d; }; -typedef GENRADIX(struct nlink) nlink_table; - -static void inc_link(struct bch_fs *c, nlink_table *links, - u64 range_start, u64 *range_end, u64 inum) +static int add_nlink(struct nlink_table *t, u64 inum, u32 snapshot) { - struct nlink *link; + if (t->nr == t->size) { + size_t new_size = max_t(size_t, 128UL, t->size * 2); + void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL); + if (!d) { + return -ENOMEM; + } - if (inum < range_start || inum >= *range_end) - return; + memcpy(d, t->d, t->size * sizeof(t->d[0])); + kvfree(t->d); - if (inum - range_start >= SIZE_MAX / sizeof(struct nlink)) { - *range_end = inum; - return; + t->d = d; + t->size = new_size; } - link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL); - if (!link) { - bch_verbose(c, "allocation failed during fsck - will need another pass"); - *range_end = inum; - return; - } - link->count++; + t->d[t->nr++] = (struct nlink) { + .inum = inum, + .snapshot = snapshot, + }; + + return 0; +} + +static int nlink_cmp(const void *_l, const void *_r) +{ + const struct nlink *l = _l; + const struct nlink *r = _r; + + return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot); +} + +static void inc_link(struct bch_fs *c, struct nlink_table *links, + u64 range_start, u64 range_end, u64 inum) +{ + struct nlink *link, key = { + .inum = inum, .snapshot = U32_MAX, + }; + + if (inum < range_start || inum >= range_end) + return; + + link = __inline_bsearch(&key, links->d, links->nr, + sizeof(links->d[0]), nlink_cmp); + if (link) + link->count++; } noinline_for_stack -static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, - u64 range_start, u64 *range_end) +static int check_nlinks_find_hardlinks(struct bch_fs *c, + struct nlink_table *t, + u64 start, u64 *end) +{ + struct btree_trans trans; + struct btree_iter *iter; + struct bkey_s_c k; + struct bkey_s_c_inode inode; + struct bch_inode_unpacked u; + int ret = 0; + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); + + for_each_btree_key(&trans, iter, BTREE_ID_inodes, + POS(0, start), 0, k, ret) { + if (k.k->type != KEY_TYPE_inode) + continue; + + inode = bkey_s_c_to_inode(k); + + /* + * Backpointer and directory structure checks are sufficient for + * directories, since they can't have hardlinks: + */ + if (S_ISDIR(le16_to_cpu(inode.v->bi_mode))) + continue; + + /* Should never fail, checked by bch2_inode_invalid: */ + BUG_ON(bch2_inode_unpack(inode, &u)); + + if (!u.bi_nlink) + continue; + + ret = add_nlink(t, k.k->p.offset, k.k->p.snapshot); + if (ret) { + *end = k.k->p.offset; + ret = 0; + break; + } + + } + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); + + if (ret) + bch_err(c, "error in fsck: btree error %i while walking inodes", ret); + + return ret; +} + +noinline_for_stack +static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, + u64 range_start, u64 range_end) { struct btree_trans trans; struct btree_iter *iter; @@ -1193,80 +1277,58 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, return ret; } -static int check_inode_nlink(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c_inode inode, - unsigned nlink) -{ - struct bch_fs *c = trans->c; - struct bch_inode_unpacked u; - int ret = 0; - - /* - * Backpointer and directory structure checks are sufficient for - * directories, since they can't have hardlinks: - */ - if (S_ISDIR(le16_to_cpu(inode.v->bi_mode))) - return 0; - - if (!nlink) { - bch_err(c, "no links found to inode %llu", inode.k->p.offset); - return -EINVAL; - } - - ret = bch2_inode_unpack(inode, &u); - - /* Should never happen, checked by bch2_inode_invalid: */ - if (bch2_fs_inconsistent_on(ret, c, - "error unpacking inode %llu in fsck", - inode.k->p.inode)) - return ret; - - if (fsck_err_on(bch2_inode_nlink_get(&u) != nlink, c, - "inode %llu has wrong i_nlink (type %u i_nlink %u, should be %u)", - u.bi_inum, mode_to_type(u.bi_mode), - bch2_inode_nlink_get(&u), nlink)) { - bch2_inode_nlink_set(&u, nlink); - - ret = __bch2_trans_do(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, - bch2_inode_write(trans, iter, &u)); - if (ret) - bch_err(c, "error in fsck: error %i updating inode", ret); - } -fsck_err: - return ret; -} - noinline_for_stack -static int bch2_gc_walk_inodes(struct bch_fs *c, - nlink_table *links, +static int check_nlinks_update_hardlinks(struct bch_fs *c, + struct nlink_table *links, u64 range_start, u64 range_end) { struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - struct nlink *link; + struct bkey_s_c_inode inode; + struct bch_inode_unpacked u; + struct nlink *link = links->d; int ret = 0; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS(0, range_start), 0, k, ret) { - if (!k.k || k.k->p.offset >= range_end) + if (k.k->p.offset >= range_end) break; if (k.k->type != KEY_TYPE_inode) continue; - link = genradix_ptr(links, k.k->p.offset - range_start); - ret = check_inode_nlink(&trans, iter, - bkey_s_c_to_inode(k), link ? link->count : 0); - if (ret) - break; + inode = bkey_s_c_to_inode(k); + if (S_ISDIR(le16_to_cpu(inode.v->bi_mode))) + continue; + BUG_ON(bch2_inode_unpack(inode, &u)); + + if (!u.bi_nlink) + continue; + + while (link->inum < k.k->p.offset) { + link++; + BUG_ON(link >= links->d + links->nr); + } + + if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c, + "inode %llu has wrong i_nlink (type %u i_nlink %u, should be %u)", + u.bi_inum, mode_to_type(u.bi_mode), + bch2_inode_nlink_get(&u), link->count)) { + bch2_inode_nlink_set(&u, link->count); + + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + bch2_inode_write(&trans, iter, &u)); + if (ret) + bch_err(c, "error in fsck: error %i updating inode", ret); + } } +fsck_err: bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); @@ -1279,34 +1341,36 @@ static int bch2_gc_walk_inodes(struct bch_fs *c, noinline_for_stack static int check_nlinks(struct bch_fs *c) { - nlink_table links; + struct nlink_table links = { 0 }; u64 this_iter_range_start, next_iter_range_start = 0; int ret = 0; bch_verbose(c, "checking inode nlinks"); - genradix_init(&links); - do { this_iter_range_start = next_iter_range_start; next_iter_range_start = U64_MAX; - ret = bch2_gc_walk_dirents(c, &links, + ret = check_nlinks_find_hardlinks(c, &links, + this_iter_range_start, + &next_iter_range_start); + + ret = check_nlinks_walk_dirents(c, &links, this_iter_range_start, - &next_iter_range_start); + next_iter_range_start); if (ret) break; - ret = bch2_gc_walk_inodes(c, &links, + ret = check_nlinks_update_hardlinks(c, &links, this_iter_range_start, next_iter_range_start); if (ret) break; - genradix_free(&links); + links.nr = 0; } while (next_iter_range_start != U64_MAX); - genradix_free(&links); + kvfree(links.d); return ret; } diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 2dc3dee4..a9ccd14e 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -973,7 +973,7 @@ int bch2_fs_recovery(struct bch_fs *c) struct jset *last_journal_entry = NULL; u64 blacklist_seq, journal_seq; bool write_sb = false; - int ret; + int ret = 0; if (c->sb.clean) clean = read_superblock_clean(c); @@ -1241,8 +1241,9 @@ use_clean: if (c->opts.fsck && !test_bit(BCH_FS_ERROR, &c->flags) && - BCH_SB_HAS_ERRORS(c->disk_sb.sb)) { + !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) { SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0); + SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 0); write_sb = true; } @@ -1253,10 +1254,9 @@ use_clean: if (c->journal_seq_blacklist_table && c->journal_seq_blacklist_table->nr > 128) queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work); -out: + ret = 0; -err: -fsck_err: +out: set_bit(BCH_FS_FSCK_DONE, &c->flags); bch2_flush_fsck_errs(c); @@ -1270,6 +1270,10 @@ fsck_err: else bch_verbose(c, "ret %i", ret); return ret; +err: +fsck_err: + bch2_fs_emergency_read_only(c); + goto out; } int bch2_fs_initialize(struct bch_fs *c) diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index de8d49e3..11d7167b 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -433,6 +433,11 @@ int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src) __copy_super(&c->disk_sb, src); + if (BCH_SB_HAS_ERRORS(c->disk_sb.sb)) + set_bit(BCH_FS_ERROR, &c->flags); + if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) + set_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags); + ret = bch2_sb_replicas_to_cpu_replicas(c); if (ret) return ret; @@ -713,6 +718,8 @@ int bch2_write_super(struct bch_fs *c) if (test_bit(BCH_FS_ERROR, &c->flags)) SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1); + if (test_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags)) + SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 1); SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index b6e449a7..fd8a2991 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -381,6 +381,11 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) unsigned i; int ret; + if (test_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags)) { + bch_err(c, "cannot go rw, unfixed btree errors"); + return -EROFS; + } + if (test_bit(BCH_FS_RW, &c->flags)) return 0; @@ -440,6 +445,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) percpu_ref_reinit(&c->writes); set_bit(BCH_FS_RW, &c->flags); + set_bit(BCH_FS_WAS_RW, &c->flags); return 0; err: __bch2_fs_read_only(c);