mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to bee34d805c bcachefs: Repair bad data pointers
This commit is contained in:
parent
19f921604d
commit
7740db24f7
@ -1 +1 @@
|
||||
ffc900d5936ae538e34d18a6ce739d0a5a9178cf
|
||||
bee34d805cf75e57f9380e0ee91771b9d90b2b2d
|
||||
|
@ -509,7 +509,8 @@ enum {
|
||||
BCH_FS_ERRORS_FIXED,
|
||||
|
||||
/* misc: */
|
||||
BCH_FS_FIXED_GENS,
|
||||
BCH_FS_NEED_ANOTHER_GC,
|
||||
BCH_FS_DELETED_NODES,
|
||||
BCH_FS_NEED_ALLOC_WRITE,
|
||||
BCH_FS_REBUILD_REPLICAS,
|
||||
BCH_FS_HOLD_BTREE_WRITES,
|
||||
@ -539,11 +540,13 @@ struct journal_keys {
|
||||
struct journal_key {
|
||||
enum btree_id btree_id:8;
|
||||
unsigned level:8;
|
||||
bool allocated;
|
||||
struct bkey_i *k;
|
||||
u32 journal_seq;
|
||||
u32 journal_offset;
|
||||
} *d;
|
||||
size_t nr;
|
||||
size_t size;
|
||||
u64 journal_seq_base;
|
||||
};
|
||||
|
||||
@ -840,6 +843,7 @@ struct bch_fs {
|
||||
struct journal journal;
|
||||
struct list_head journal_entries;
|
||||
struct journal_keys journal_keys;
|
||||
struct list_head journal_iters;
|
||||
|
||||
u64 last_bucket_seq_cleanup;
|
||||
|
||||
|
@ -603,13 +603,14 @@ struct bch_btree_ptr_v2 {
|
||||
__u64 mem_ptr;
|
||||
__le64 seq;
|
||||
__le16 sectors_written;
|
||||
/* In case we ever decide to do variable size btree nodes: */
|
||||
__le16 sectors;
|
||||
__le16 flags;
|
||||
struct bpos min_key;
|
||||
struct bch_extent_ptr start[0];
|
||||
__u64 _data[0];
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1);
|
||||
|
||||
struct bch_extent {
|
||||
struct bch_val v;
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "btree_iter.h"
|
||||
#include "btree_locking.h"
|
||||
#include "debug.h"
|
||||
#include "error.h"
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/sched/mm.h>
|
||||
@ -812,9 +813,12 @@ lock_node:
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
EBUG_ON(b->c.btree_id != iter->btree_id ||
|
||||
BTREE_NODE_LEVEL(b->data) != level ||
|
||||
bkey_cmp(b->data->max_key, k->k.p));
|
||||
EBUG_ON(b->c.btree_id != iter->btree_id);
|
||||
EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
|
||||
EBUG_ON(bkey_cmp(b->data->max_key, k->k.p));
|
||||
EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
bkey_cmp(b->data->min_key,
|
||||
bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
|
||||
|
||||
return b;
|
||||
}
|
||||
@ -822,7 +826,8 @@ lock_node:
|
||||
struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
|
||||
const struct bkey_i *k,
|
||||
enum btree_id btree_id,
|
||||
unsigned level)
|
||||
unsigned level,
|
||||
bool nofill)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
@ -837,6 +842,9 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
|
||||
retry:
|
||||
b = btree_cache_find(bc, k);
|
||||
if (unlikely(!b)) {
|
||||
if (nofill)
|
||||
return NULL;
|
||||
|
||||
b = bch2_btree_node_fill(c, NULL, k, btree_id,
|
||||
level, SIX_LOCK_read, true);
|
||||
|
||||
@ -883,9 +891,12 @@ lock_node:
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
EBUG_ON(b->c.btree_id != btree_id ||
|
||||
BTREE_NODE_LEVEL(b->data) != level ||
|
||||
bkey_cmp(b->data->max_key, k->k.p));
|
||||
EBUG_ON(b->c.btree_id != btree_id);
|
||||
EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
|
||||
EBUG_ON(bkey_cmp(b->data->max_key, k->k.p));
|
||||
EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
bkey_cmp(b->data->min_key,
|
||||
bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
|
||||
|
||||
return b;
|
||||
}
|
||||
@ -995,8 +1006,22 @@ out:
|
||||
if (sib != btree_prev_sib)
|
||||
swap(n1, n2);
|
||||
|
||||
BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p),
|
||||
n2->data->min_key));
|
||||
if (bkey_cmp(bkey_successor(n1->key.k.p),
|
||||
n2->data->min_key)) {
|
||||
char buf1[200], buf2[200];
|
||||
|
||||
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&n1->key));
|
||||
bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&n2->key));
|
||||
|
||||
bch2_fs_inconsistent(c, "btree topology error at btree %s level %u:\n"
|
||||
"prev: %s\n"
|
||||
"next: %s\n",
|
||||
bch2_btree_ids[iter->btree_id], level,
|
||||
buf1, buf2);
|
||||
|
||||
six_unlock_intent(&ret->c.lock);
|
||||
ret = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_btree_trans_verify_locks(trans);
|
||||
|
@ -26,7 +26,7 @@ struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
|
||||
enum six_lock_type, unsigned long);
|
||||
|
||||
struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
|
||||
enum btree_id, unsigned);
|
||||
enum btree_id, unsigned, bool);
|
||||
|
||||
struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
|
||||
struct btree *, enum btree_node_sibling);
|
||||
|
@ -50,39 +50,199 @@ static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
|
||||
__gc_pos_set(c, new_pos);
|
||||
}
|
||||
|
||||
/*
|
||||
* Missing: if an interior btree node is empty, we need to do something -
|
||||
* perhaps just kill it
|
||||
*/
|
||||
static int bch2_gc_check_topology(struct bch_fs *c,
|
||||
struct bkey_s_c k,
|
||||
struct bpos *expected_start,
|
||||
struct bpos expected_end,
|
||||
struct btree *b,
|
||||
struct bkey_buf *prev,
|
||||
struct bkey_buf cur,
|
||||
bool is_last)
|
||||
{
|
||||
struct bpos node_start = b->data->min_key;
|
||||
struct bpos node_end = b->data->max_key;
|
||||
struct bpos expected_start = bkey_deleted(&prev->k->k)
|
||||
? node_start
|
||||
: bkey_successor(prev->k->k.p);
|
||||
char buf1[200], buf2[200];
|
||||
bool update_min = false;
|
||||
bool update_max = false;
|
||||
int ret = 0;
|
||||
|
||||
if (k.k->type == KEY_TYPE_btree_ptr_v2) {
|
||||
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
|
||||
if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
|
||||
struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k);
|
||||
|
||||
if (fsck_err_on(bkey_cmp(*expected_start, bp.v->min_key), c,
|
||||
"btree node with incorrect min_key: got %llu:%llu, should be %llu:%llu",
|
||||
bp.v->min_key.inode,
|
||||
bp.v->min_key.offset,
|
||||
expected_start->inode,
|
||||
expected_start->offset)) {
|
||||
BUG();
|
||||
}
|
||||
if (bkey_deleted(&prev->k->k))
|
||||
scnprintf(buf1, sizeof(buf1), "start of node: %llu:%llu",
|
||||
node_start.inode,
|
||||
node_start.offset);
|
||||
else
|
||||
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k));
|
||||
|
||||
if (fsck_err_on(bkey_cmp(expected_start, bp->v.min_key), c,
|
||||
"btree node with incorrect min_key at btree %s level %u:\n"
|
||||
" prev %s\n"
|
||||
" cur %s",
|
||||
bch2_btree_ids[b->c.btree_id], b->c.level,
|
||||
buf1,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2)))
|
||||
update_min = true;
|
||||
}
|
||||
|
||||
*expected_start = bkey_cmp(k.k->p, POS_MAX)
|
||||
? bkey_successor(k.k->p)
|
||||
: k.k->p;
|
||||
|
||||
if (fsck_err_on(is_last &&
|
||||
bkey_cmp(k.k->p, expected_end), c,
|
||||
"btree node with incorrect max_key: got %llu:%llu, should be %llu:%llu",
|
||||
k.k->p.inode,
|
||||
k.k->p.offset,
|
||||
expected_end.inode,
|
||||
expected_end.offset)) {
|
||||
BUG();
|
||||
bkey_cmp(cur.k->k.p, node_end), c,
|
||||
"btree node with incorrect max_key at btree %s level %u:\n"
|
||||
" %s\n"
|
||||
" expected %s",
|
||||
bch2_btree_ids[b->c.btree_id], b->c.level,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(cur.k)), buf1),
|
||||
(bch2_bpos_to_text(&PBUF(buf2), node_end), buf2)))
|
||||
update_max = true;
|
||||
|
||||
bch2_bkey_buf_copy(prev, c, cur.k);
|
||||
|
||||
if (update_min || update_max) {
|
||||
struct bkey_i *new;
|
||||
struct bkey_i_btree_ptr_v2 *bp = NULL;
|
||||
struct btree *n;
|
||||
|
||||
if (update_max) {
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur.k->k.p);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
new = kmalloc(bkey_bytes(&cur.k->k), GFP_KERNEL);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
bkey_copy(new, cur.k);
|
||||
|
||||
if (new->k.type == KEY_TYPE_btree_ptr_v2)
|
||||
bp = bkey_i_to_btree_ptr_v2(new);
|
||||
|
||||
if (update_min)
|
||||
bp->v.min_key = expected_start;
|
||||
if (update_max)
|
||||
new->k.p = node_end;
|
||||
if (bp)
|
||||
SET_BTREE_PTR_RANGE_UPDATED(&bp->v, true);
|
||||
|
||||
ret = bch2_journal_key_insert(c, b->c.btree_id, b->c.level, new);
|
||||
if (ret) {
|
||||
kfree(new);
|
||||
return ret;
|
||||
}
|
||||
|
||||
n = bch2_btree_node_get_noiter(c, cur.k, b->c.btree_id,
|
||||
b->c.level - 1, true);
|
||||
if (n) {
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, n);
|
||||
|
||||
bkey_copy(&n->key, new);
|
||||
if (update_min)
|
||||
n->data->min_key = expected_start;
|
||||
if (update_max)
|
||||
n->data->max_key = node_end;
|
||||
|
||||
ret = __bch2_btree_node_hash_insert(&c->btree_cache, n);
|
||||
BUG_ON(ret);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
six_unlock_read(&n->c.lock);
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
unsigned level, bool is_root,
|
||||
struct bkey_s_c *k)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
bool do_update = false;
|
||||
int ret = 0;
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g = PTR_BUCKET(ca, ptr, true);
|
||||
struct bucket *g2 = PTR_BUCKET(ca, ptr, false);
|
||||
|
||||
if (fsck_err_on(!g->gen_valid, c,
|
||||
"bucket %u:%zu data type %s ptr gen %u missing in alloc btree",
|
||||
ptr->dev, PTR_BUCKET_NR(ca, ptr),
|
||||
bch2_data_types[ptr_data_type(k->k, ptr)],
|
||||
ptr->gen)) {
|
||||
if (!ptr->cached) {
|
||||
g2->_mark.gen = g->_mark.gen = ptr->gen;
|
||||
g2->gen_valid = g->gen_valid = true;
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
|
||||
} else {
|
||||
do_update = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
|
||||
"bucket %u:%zu data type %s ptr gen in the future: %u > %u",
|
||||
ptr->dev, PTR_BUCKET_NR(ca, ptr),
|
||||
bch2_data_types[ptr_data_type(k->k, ptr)],
|
||||
ptr->gen, g->mark.gen)) {
|
||||
if (!ptr->cached) {
|
||||
g2->_mark.gen = g->_mark.gen = ptr->gen;
|
||||
g2->gen_valid = g->gen_valid = true;
|
||||
g2->_mark.data_type = 0;
|
||||
g2->_mark.dirty_sectors = 0;
|
||||
g2->_mark.cached_sectors = 0;
|
||||
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
|
||||
} else {
|
||||
do_update = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (fsck_err_on(!ptr->cached &&
|
||||
gen_cmp(ptr->gen, g->mark.gen) < 0, c,
|
||||
"bucket %u:%zu data type %s stale dirty ptr: %u < %u",
|
||||
ptr->dev, PTR_BUCKET_NR(ca, ptr),
|
||||
bch2_data_types[ptr_data_type(k->k, ptr)],
|
||||
ptr->gen, g->mark.gen))
|
||||
do_update = true;
|
||||
}
|
||||
|
||||
if (do_update) {
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct bkey_i *new;
|
||||
|
||||
if (is_root) {
|
||||
bch_err(c, "cannot update btree roots yet");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
bkey_reassemble(new, *k);
|
||||
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g = PTR_BUCKET(ca, ptr, true);
|
||||
|
||||
(ptr->cached &&
|
||||
(!g->gen_valid || gen_cmp(ptr->gen, g->mark.gen) > 0)) ||
|
||||
(!ptr->cached &&
|
||||
gen_cmp(ptr->gen, g->mark.gen) < 0);
|
||||
}));
|
||||
|
||||
ret = bch2_journal_key_insert(c, btree_id, level, new);
|
||||
if (ret)
|
||||
kfree(new);
|
||||
else
|
||||
*k = bkey_i_to_s_c(new);
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
@ -90,7 +250,9 @@ fsck_err:
|
||||
|
||||
/* marking of btree keys/nodes: */
|
||||
|
||||
static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id,
|
||||
unsigned level, bool is_root,
|
||||
struct bkey_s_c k,
|
||||
u8 *max_stale, bool initial)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
@ -104,7 +266,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
BUG_ON(bch2_journal_seq_verify &&
|
||||
k.k->version.lo > journal_cur_seq(&c->journal));
|
||||
|
||||
/* XXX change to fsck check */
|
||||
if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c,
|
||||
"key version number higher than recorded: %llu > %llu",
|
||||
k.k->version.lo,
|
||||
@ -120,35 +281,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
return ret;
|
||||
}
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g = PTR_BUCKET(ca, ptr, true);
|
||||
struct bucket *g2 = PTR_BUCKET(ca, ptr, false);
|
||||
|
||||
if (mustfix_fsck_err_on(!g->gen_valid, c,
|
||||
"bucket %u:%zu data type %s ptr gen %u missing in alloc btree",
|
||||
ptr->dev, PTR_BUCKET_NR(ca, ptr),
|
||||
bch2_data_types[ptr_data_type(k.k, ptr)],
|
||||
ptr->gen)) {
|
||||
g2->_mark.gen = g->_mark.gen = ptr->gen;
|
||||
g2->gen_valid = g->gen_valid = true;
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
|
||||
}
|
||||
|
||||
if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
|
||||
"bucket %u:%zu data type %s ptr gen in the future: %u > %u",
|
||||
ptr->dev, PTR_BUCKET_NR(ca, ptr),
|
||||
bch2_data_types[ptr_data_type(k.k, ptr)],
|
||||
ptr->gen, g->mark.gen)) {
|
||||
g2->_mark.gen = g->_mark.gen = ptr->gen;
|
||||
g2->gen_valid = g->gen_valid = true;
|
||||
g2->_mark.data_type = 0;
|
||||
g2->_mark.dirty_sectors = 0;
|
||||
g2->_mark.cached_sectors = 0;
|
||||
set_bit(BCH_FS_FIXED_GENS, &c->flags);
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
|
||||
}
|
||||
}
|
||||
ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, &k);
|
||||
}
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
@ -169,10 +302,10 @@ fsck_err:
|
||||
static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
|
||||
bool initial)
|
||||
{
|
||||
struct bpos next_node_start = b->data->min_key;
|
||||
struct btree_node_iter iter;
|
||||
struct bkey unpacked;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf prev, cur;
|
||||
int ret = 0;
|
||||
|
||||
*max_stale = 0;
|
||||
@ -181,26 +314,32 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
|
||||
return 0;
|
||||
|
||||
bch2_btree_node_iter_init_from_start(&iter, b);
|
||||
bch2_bkey_buf_init(&prev);
|
||||
bch2_bkey_buf_init(&cur);
|
||||
bkey_init(&prev.k->k);
|
||||
|
||||
while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
|
||||
bch2_bkey_debugcheck(c, b, k);
|
||||
|
||||
ret = bch2_gc_mark_key(c, k, max_stale, initial);
|
||||
ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, false,
|
||||
k, max_stale, initial);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bch2_btree_node_iter_advance(&iter, b);
|
||||
|
||||
if (b->c.level) {
|
||||
ret = bch2_gc_check_topology(c, k,
|
||||
&next_node_start,
|
||||
b->data->max_key,
|
||||
bch2_bkey_buf_reassemble(&cur, c, k);
|
||||
|
||||
ret = bch2_gc_check_topology(c, b, &prev, cur,
|
||||
bch2_btree_node_iter_end(&iter));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_bkey_buf_exit(&cur, c);
|
||||
bch2_bkey_buf_exit(&prev, c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -253,7 +392,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
|
||||
mutex_lock(&c->btree_root_lock);
|
||||
b = c->btree_roots[btree_id].b;
|
||||
if (!btree_node_fake(b))
|
||||
ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
|
||||
ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, true,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
&max_stale, initial);
|
||||
gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
|
||||
mutex_unlock(&c->btree_root_lock);
|
||||
@ -262,18 +402,18 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
|
||||
}
|
||||
|
||||
static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
|
||||
struct journal_keys *journal_keys,
|
||||
unsigned target_depth)
|
||||
{
|
||||
struct btree_and_journal_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bpos next_node_start = b->data->min_key;
|
||||
struct bkey_buf tmp;
|
||||
struct bkey_buf cur, prev;
|
||||
u8 max_stale = 0;
|
||||
int ret = 0;
|
||||
|
||||
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
|
||||
bch2_bkey_buf_init(&prev);
|
||||
bch2_bkey_buf_init(&cur);
|
||||
bkey_init(&prev.k->k);
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
bch2_bkey_debugcheck(c, b, k);
|
||||
@ -281,50 +421,72 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
|
||||
BUG_ON(bkey_cmp(k.k->p, b->data->min_key) < 0);
|
||||
BUG_ON(bkey_cmp(k.k->p, b->data->max_key) > 0);
|
||||
|
||||
ret = bch2_gc_mark_key(c, k, &max_stale, true);
|
||||
ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, false,
|
||||
k, &max_stale, true);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (b->c.level) {
|
||||
struct btree *child;
|
||||
|
||||
bch2_bkey_buf_reassemble(&tmp, c, k);
|
||||
k = bkey_i_to_s_c(tmp.k);
|
||||
bch2_bkey_buf_reassemble(&cur, c, k);
|
||||
k = bkey_i_to_s_c(cur.k);
|
||||
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
|
||||
ret = bch2_gc_check_topology(c, k,
|
||||
&next_node_start,
|
||||
b->data->max_key,
|
||||
ret = bch2_gc_check_topology(c, b,
|
||||
&prev, cur,
|
||||
!bch2_btree_and_journal_iter_peek(&iter).k);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (b->c.level > target_depth) {
|
||||
child = bch2_btree_node_get_noiter(c, tmp.k,
|
||||
b->c.btree_id, b->c.level - 1);
|
||||
ret = PTR_ERR_OR_ZERO(child);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = bch2_gc_btree_init_recurse(c, child,
|
||||
journal_keys, target_depth);
|
||||
six_unlock_read(&child->c.lock);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
}
|
||||
}
|
||||
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
if (b->c.level > target_depth) {
|
||||
bch2_btree_and_journal_iter_exit(&iter);
|
||||
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
struct btree *child;
|
||||
|
||||
bch2_bkey_buf_reassemble(&cur, c, k);
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
|
||||
child = bch2_btree_node_get_noiter(c, cur.k,
|
||||
b->c.btree_id, b->c.level - 1,
|
||||
false);
|
||||
ret = PTR_ERR_OR_ZERO(child);
|
||||
|
||||
if (fsck_err_on(ret == -EIO, c,
|
||||
"unreadable btree node")) {
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur.k->k.p);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = bch2_gc_btree_init_recurse(c, child,
|
||||
target_depth);
|
||||
six_unlock_read(&child->c.lock);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
bch2_bkey_buf_exit(&cur, c);
|
||||
bch2_bkey_buf_exit(&prev, c);
|
||||
bch2_btree_and_journal_iter_exit(&iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_btree_init(struct bch_fs *c,
|
||||
struct journal_keys *journal_keys,
|
||||
enum btree_id btree_id)
|
||||
{
|
||||
struct btree *b;
|
||||
@ -355,11 +517,11 @@ static int bch2_gc_btree_init(struct bch_fs *c,
|
||||
}
|
||||
|
||||
if (b->c.level >= target_depth)
|
||||
ret = bch2_gc_btree_init_recurse(c, b,
|
||||
journal_keys, target_depth);
|
||||
ret = bch2_gc_btree_init_recurse(c, b, target_depth);
|
||||
|
||||
if (!ret)
|
||||
ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
|
||||
ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, true,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
&max_stale, true);
|
||||
fsck_err:
|
||||
six_unlock_read(&b->c.lock);
|
||||
@ -373,8 +535,7 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
|
||||
(int) btree_id_to_gc_phase(r);
|
||||
}
|
||||
|
||||
static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
|
||||
bool initial)
|
||||
static int bch2_gc_btrees(struct bch_fs *c, bool initial)
|
||||
{
|
||||
enum btree_id ids[BTREE_ID_NR];
|
||||
unsigned i;
|
||||
@ -386,8 +547,7 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
enum btree_id id = ids[i];
|
||||
int ret = initial
|
||||
? bch2_gc_btree_init(c, journal_keys,
|
||||
id)
|
||||
? bch2_gc_btree_init(c, id)
|
||||
: bch2_gc_btree(c, id, initial);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -775,8 +935,7 @@ static int bch2_gc_start(struct bch_fs *c)
|
||||
* move around - if references move backwards in the ordering GC
|
||||
* uses, GC could skip past them
|
||||
*/
|
||||
int bch2_gc(struct bch_fs *c, struct journal_keys *journal_keys,
|
||||
bool initial)
|
||||
int bch2_gc(struct bch_fs *c, bool initial)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
u64 start_time = local_clock();
|
||||
@ -798,7 +957,7 @@ again:
|
||||
|
||||
bch2_mark_superblocks(c);
|
||||
|
||||
ret = bch2_gc_btrees(c, journal_keys, initial);
|
||||
ret = bch2_gc_btrees(c, initial);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -808,16 +967,15 @@ again:
|
||||
bch2_mark_allocator_buckets(c);
|
||||
|
||||
c->gc_count++;
|
||||
out:
|
||||
if (!ret &&
|
||||
(test_bit(BCH_FS_FIXED_GENS, &c->flags) ||
|
||||
(!iter && bch2_test_restart_gc))) {
|
||||
|
||||
if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) ||
|
||||
(!iter && bch2_test_restart_gc)) {
|
||||
/*
|
||||
* XXX: make sure gens we fixed got saved
|
||||
*/
|
||||
if (iter++ <= 2) {
|
||||
bch_info(c, "Fixed gens, restarting mark and sweep:");
|
||||
clear_bit(BCH_FS_FIXED_GENS, &c->flags);
|
||||
bch_info(c, "Second GC pass needed, restarting:");
|
||||
clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
|
||||
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
@ -832,7 +990,7 @@ out:
|
||||
bch_info(c, "Unable to fix bucket gens, looping");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
out:
|
||||
if (!ret) {
|
||||
bch2_journal_block(&c->journal);
|
||||
|
||||
@ -1371,7 +1529,7 @@ static int bch2_gc_thread(void *arg)
|
||||
* Full gc is currently incompatible with btree key cache:
|
||||
*/
|
||||
#if 0
|
||||
ret = bch2_gc(c, NULL, false, false);
|
||||
ret = bch2_gc(c, false, false);
|
||||
#else
|
||||
ret = bch2_gc_gens(c);
|
||||
#endif
|
||||
|
@ -6,8 +6,7 @@
|
||||
|
||||
void bch2_coalesce(struct bch_fs *);
|
||||
|
||||
struct journal_keys;
|
||||
int bch2_gc(struct bch_fs *, struct journal_keys *, bool);
|
||||
int bch2_gc(struct bch_fs *, bool);
|
||||
int bch2_gc_gens(struct bch_fs *);
|
||||
void bch2_gc_thread_stop(struct bch_fs *);
|
||||
int bch2_gc_thread_start(struct bch_fs *);
|
||||
|
@ -753,6 +753,11 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
|
||||
struct bch_btree_ptr_v2 *bp =
|
||||
&bkey_i_to_btree_ptr_v2(&b->key)->v;
|
||||
|
||||
if (BTREE_PTR_RANGE_UPDATED(bp)) {
|
||||
b->data->min_key = bp->min_key;
|
||||
b->data->max_key = b->key.k.p;
|
||||
}
|
||||
|
||||
btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
|
||||
BTREE_ERR_MUST_RETRY, c, b, NULL,
|
||||
"incorrect min_key: got %llu:%llu should be %llu:%llu",
|
||||
|
@ -297,7 +297,6 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
||||
bp->v.mem_ptr = 0;
|
||||
bp->v.seq = b->data->keys.seq;
|
||||
bp->v.sectors_written = 0;
|
||||
bp->v.sectors = cpu_to_le16(c->opts.btree_node_size);
|
||||
}
|
||||
|
||||
if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))
|
||||
|
@ -744,7 +744,6 @@ err:
|
||||
static int ec_stripe_bkey_update(struct btree_trans *trans,
|
||||
struct bkey_i_stripe *new)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter *iter;
|
||||
struct bkey_s_c k;
|
||||
const struct bch_stripe *existing;
|
||||
@ -759,7 +758,7 @@ static int ec_stripe_bkey_update(struct btree_trans *trans,
|
||||
goto err;
|
||||
|
||||
if (!k.k || k.k->type != KEY_TYPE_stripe) {
|
||||
bch_err(c, "error updating stripe: not found");
|
||||
bch_err(trans->c, "error updating stripe: not found");
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
@ -767,7 +766,7 @@ static int ec_stripe_bkey_update(struct btree_trans *trans,
|
||||
existing = bkey_s_c_to_stripe(k).v;
|
||||
|
||||
if (existing->nr_blocks != new->v.nr_blocks) {
|
||||
bch_err(c, "error updating stripe: nr_blocks does not match");
|
||||
bch_err(trans->c, "error updating stripe: nr_blocks does not match");
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
@ -215,9 +215,8 @@ void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
{
|
||||
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
|
||||
|
||||
pr_buf(out, "seq %llx sectors %u written %u min_key ",
|
||||
pr_buf(out, "seq %llx written %u min_key ",
|
||||
le64_to_cpu(bp.v->seq),
|
||||
le16_to_cpu(bp.v->sectors),
|
||||
le16_to_cpu(bp.v->sectors_written));
|
||||
|
||||
bch2_bpos_to_text(out, bp.v->min_key);
|
||||
@ -1082,10 +1081,9 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
unsigned nonce = UINT_MAX;
|
||||
unsigned i;
|
||||
|
||||
if (k.k->type == KEY_TYPE_btree_ptr)
|
||||
if (k.k->type == KEY_TYPE_btree_ptr ||
|
||||
k.k->type == KEY_TYPE_btree_ptr_v2)
|
||||
size_ondisk = c->opts.btree_node_size;
|
||||
if (k.k->type == KEY_TYPE_btree_ptr_v2)
|
||||
size_ondisk = le16_to_cpu(bkey_s_c_to_btree_ptr_v2(k).v->sectors);
|
||||
|
||||
bkey_extent_entry_for_each(ptrs, entry) {
|
||||
if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
|
||||
|
@ -1011,13 +1011,19 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
|
||||
}
|
||||
|
||||
list_for_each_entry(i, journal_entries, list) {
|
||||
unsigned ptr;
|
||||
|
||||
seq = le64_to_cpu(i->j.seq);
|
||||
BUG_ON(seq >= cur_seq);
|
||||
|
||||
if (seq < last_seq)
|
||||
continue;
|
||||
|
||||
journal_seq_pin(j, seq)->devs = i->devs;
|
||||
p = journal_seq_pin(j, seq);
|
||||
|
||||
p->devs.nr = 0;
|
||||
for (ptr = 0; ptr < i->nr_ptrs; ptr++)
|
||||
bch2_dev_list_add_dev(&p->devs, i->ptrs[ptr].dev);
|
||||
}
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
@ -46,15 +46,16 @@ struct journal_list {
|
||||
* be replayed:
|
||||
*/
|
||||
static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bch_extent_ptr entry_ptr,
|
||||
struct journal_list *jlist, struct jset *j,
|
||||
bool bad)
|
||||
{
|
||||
struct journal_replay *i, *pos;
|
||||
struct bch_devs_list devs = { .nr = 0 };
|
||||
struct journal_replay *i, *pos, *dup = NULL;
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct list_head *where;
|
||||
size_t bytes = vstruct_bytes(j);
|
||||
u64 last_seq = 0;
|
||||
int ret;
|
||||
int ret = JOURNAL_ENTRY_ADD_OK;
|
||||
|
||||
list_for_each_entry_reverse(i, jlist->head, list) {
|
||||
if (!JSET_NO_FLUSH(&i->j)) {
|
||||
@ -88,28 +89,29 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
where = jlist->head;
|
||||
add:
|
||||
i = where->next != jlist->head
|
||||
dup = where->next != jlist->head
|
||||
? container_of(where->next, struct journal_replay, list)
|
||||
: NULL;
|
||||
|
||||
if (dup && le64_to_cpu(j->seq) != le64_to_cpu(dup->j.seq))
|
||||
dup = NULL;
|
||||
|
||||
/*
|
||||
* Duplicate journal entries? If so we want the one that didn't have a
|
||||
* checksum error:
|
||||
*/
|
||||
if (i && le64_to_cpu(j->seq) == le64_to_cpu(i->j.seq)) {
|
||||
if (i->bad) {
|
||||
devs = i->devs;
|
||||
__journal_replay_free(i);
|
||||
if (dup) {
|
||||
if (dup->bad) {
|
||||
/* we'll replace @dup: */
|
||||
} else if (bad) {
|
||||
goto found;
|
||||
} else {
|
||||
fsck_err_on(bytes != vstruct_bytes(&i->j) ||
|
||||
memcmp(j, &i->j, bytes), c,
|
||||
fsck_err_on(bytes != vstruct_bytes(&dup->j) ||
|
||||
memcmp(j, &dup->j, bytes), c,
|
||||
"found duplicate but non identical journal entries (seq %llu)",
|
||||
le64_to_cpu(j->seq));
|
||||
goto found;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
|
||||
@ -118,17 +120,34 @@ add:
|
||||
goto out;
|
||||
}
|
||||
|
||||
list_add(&i->list, where);
|
||||
i->devs = devs;
|
||||
i->bad = bad;
|
||||
i->ignore = false;
|
||||
i->nr_ptrs = 0;
|
||||
i->bad = bad;
|
||||
i->ignore = false;
|
||||
memcpy(&i->j, j, bytes);
|
||||
|
||||
if (dup) {
|
||||
i->nr_ptrs = dup->nr_ptrs;
|
||||
memcpy(i->ptrs, dup->ptrs, sizeof(dup->ptrs));
|
||||
__journal_replay_free(dup);
|
||||
}
|
||||
|
||||
list_add(&i->list, where);
|
||||
found:
|
||||
if (!bch2_dev_list_has_dev(i->devs, ca->dev_idx))
|
||||
bch2_dev_list_add_dev(&i->devs, ca->dev_idx);
|
||||
else
|
||||
fsck_err_on(1, c, "duplicate journal entries on same device");
|
||||
ret = JOURNAL_ENTRY_ADD_OK;
|
||||
for (ptr = i->ptrs; ptr < i->ptrs + i->nr_ptrs; i++) {
|
||||
if (ptr->dev == ca->dev_idx) {
|
||||
bch_err(c, "duplicate journal entry %llu on same device",
|
||||
le64_to_cpu(i->j.seq));
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (i->nr_ptrs >= ARRAY_SIZE(i->ptrs)) {
|
||||
bch_err(c, "found too many copies of journal entry %llu",
|
||||
le64_to_cpu(i->j.seq));
|
||||
goto out;
|
||||
}
|
||||
|
||||
i->ptrs[i->nr_ptrs++] = entry_ptr;
|
||||
out:
|
||||
fsck_err:
|
||||
return ret;
|
||||
@ -654,7 +673,10 @@ reread:
|
||||
ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
|
||||
|
||||
mutex_lock(&jlist->lock);
|
||||
ret = journal_entry_add(c, ca, jlist, j, ret != 0);
|
||||
ret = journal_entry_add(c, ca, (struct bch_extent_ptr) {
|
||||
.dev = ca->dev_idx,
|
||||
.offset = offset,
|
||||
}, jlist, j, ret != 0);
|
||||
mutex_unlock(&jlist->lock);
|
||||
|
||||
switch (ret) {
|
||||
@ -742,6 +764,23 @@ err:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct journal_replay *j)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < j->nr_ptrs; i++) {
|
||||
struct bch_dev *ca = c->devs[j->ptrs[i].dev];
|
||||
|
||||
if (i)
|
||||
pr_buf(out, " ");
|
||||
pr_buf(out, "%u:%llu (offset %llu)",
|
||||
j->ptrs[i].dev,
|
||||
(u64) j->ptrs[i].offset,
|
||||
(u64) j->ptrs[i].offset % ca->mi.bucket_size);
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_journal_read(struct bch_fs *c, struct list_head *list,
|
||||
u64 *blacklist_seq, u64 *start_seq)
|
||||
{
|
||||
@ -839,6 +878,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
|
||||
|
||||
while (seq < le64_to_cpu(i->j.seq)) {
|
||||
u64 missing_start, missing_end;
|
||||
char buf1[200], buf2[200];
|
||||
|
||||
while (seq < le64_to_cpu(i->j.seq) &&
|
||||
bch2_journal_seq_is_blacklisted(c, seq, false))
|
||||
@ -853,10 +893,23 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
|
||||
!bch2_journal_seq_is_blacklisted(c, seq, false))
|
||||
seq++;
|
||||
|
||||
if (i->list.prev != list) {
|
||||
struct printbuf out = PBUF(buf1);
|
||||
struct journal_replay *p = list_prev_entry(i, list);
|
||||
|
||||
bch2_journal_ptrs_to_text(&out, c, p);
|
||||
pr_buf(&out, " size %llu", vstruct_sectors(&p->j, c->block_bits));
|
||||
} else
|
||||
sprintf(buf1, "(none)");
|
||||
bch2_journal_ptrs_to_text(&PBUF(buf2), c, i);
|
||||
|
||||
missing_end = seq - 1;
|
||||
fsck_err(c, "journal entries %llu-%llu missing! (replaying %llu-%llu)",
|
||||
fsck_err(c, "journal entries %llu-%llu missing! (replaying %llu-%llu)\n"
|
||||
" prev at %s\n"
|
||||
" next at %s",
|
||||
missing_start, missing_end,
|
||||
last_seq, *blacklist_seq - 1);
|
||||
last_seq, *blacklist_seq - 1,
|
||||
buf1, buf2);
|
||||
}
|
||||
|
||||
seq++;
|
||||
@ -865,7 +918,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
|
||||
list_for_each_entry(i, list, list) {
|
||||
struct jset_entry *entry;
|
||||
struct bkey_i *k, *_n;
|
||||
struct bch_replicas_padded replicas;
|
||||
struct bch_replicas_padded replicas = {
|
||||
.e.data_type = BCH_DATA_journal,
|
||||
.e.nr_required = 1,
|
||||
};
|
||||
unsigned ptr;
|
||||
char buf[80];
|
||||
|
||||
if (i->ignore)
|
||||
@ -875,13 +932,14 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
|
||||
for (ptr = 0; ptr < i->nr_ptrs; ptr++)
|
||||
replicas.e.devs[replicas.e.nr_devs++] = i->ptrs[ptr].dev;
|
||||
|
||||
/*
|
||||
* If we're mounting in degraded mode - if we didn't read all
|
||||
* the devices - this is wrong:
|
||||
*/
|
||||
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, i->devs);
|
||||
|
||||
if (!degraded &&
|
||||
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
||||
fsck_err_on(!bch2_replicas_marked(c, &replicas.e), c,
|
||||
|
@ -8,7 +8,9 @@
|
||||
*/
|
||||
struct journal_replay {
|
||||
struct list_head list;
|
||||
struct bch_devs_list devs;
|
||||
struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX];
|
||||
unsigned nr_ptrs;
|
||||
|
||||
/* checksum error, but we may want to try using it anyways: */
|
||||
bool bad;
|
||||
bool ignore;
|
||||
|
@ -40,78 +40,169 @@ static void drop_alloc_keys(struct journal_keys *keys)
|
||||
|
||||
/* iterate over keys read from the journal: */
|
||||
|
||||
static struct journal_key *journal_key_search(struct journal_keys *journal_keys,
|
||||
enum btree_id id, unsigned level,
|
||||
struct bpos pos)
|
||||
static int __journal_key_cmp(enum btree_id l_btree_id,
|
||||
unsigned l_level,
|
||||
struct bpos l_pos,
|
||||
struct journal_key *r)
|
||||
{
|
||||
return (cmp_int(l_btree_id, r->btree_id) ?:
|
||||
cmp_int(l_level, r->level) ?:
|
||||
bkey_cmp(l_pos, r->k->k.p));
|
||||
}
|
||||
|
||||
static int journal_key_cmp(struct journal_key *l, struct journal_key *r)
|
||||
{
|
||||
return (cmp_int(l->btree_id, r->btree_id) ?:
|
||||
cmp_int(l->level, r->level) ?:
|
||||
bkey_cmp(l->k->k.p, r->k->k.p));
|
||||
}
|
||||
|
||||
static size_t journal_key_search(struct journal_keys *journal_keys,
|
||||
enum btree_id id, unsigned level,
|
||||
struct bpos pos)
|
||||
{
|
||||
size_t l = 0, r = journal_keys->nr, m;
|
||||
|
||||
while (l < r) {
|
||||
m = l + ((r - l) >> 1);
|
||||
if ((cmp_int(id, journal_keys->d[m].btree_id) ?:
|
||||
cmp_int(level, journal_keys->d[m].level) ?:
|
||||
bkey_cmp(pos, journal_keys->d[m].k->k.p)) > 0)
|
||||
if (__journal_key_cmp(id, level, pos, &journal_keys->d[m]) > 0)
|
||||
l = m + 1;
|
||||
else
|
||||
r = m;
|
||||
}
|
||||
|
||||
BUG_ON(l < journal_keys->nr &&
|
||||
(cmp_int(id, journal_keys->d[l].btree_id) ?:
|
||||
cmp_int(level, journal_keys->d[l].level) ?:
|
||||
bkey_cmp(pos, journal_keys->d[l].k->k.p)) > 0);
|
||||
__journal_key_cmp(id, level, pos, &journal_keys->d[l]) > 0);
|
||||
|
||||
BUG_ON(l &&
|
||||
(cmp_int(id, journal_keys->d[l - 1].btree_id) ?:
|
||||
cmp_int(level, journal_keys->d[l - 1].level) ?:
|
||||
bkey_cmp(pos, journal_keys->d[l - 1].k->k.p)) <= 0);
|
||||
__journal_key_cmp(id, level, pos, &journal_keys->d[l - 1]) <= 0);
|
||||
|
||||
return l < journal_keys->nr ? journal_keys->d + l : NULL;
|
||||
return l;
|
||||
}
|
||||
|
||||
static void journal_iter_fix(struct bch_fs *c, struct journal_iter *iter, unsigned idx)
|
||||
{
|
||||
struct bkey_i *n = iter->keys->d[idx].k;
|
||||
struct btree_and_journal_iter *biter =
|
||||
container_of(iter, struct btree_and_journal_iter, journal);
|
||||
|
||||
if (iter->idx > idx ||
|
||||
(iter->idx == idx &&
|
||||
biter->last &&
|
||||
bkey_cmp(n->k.p, biter->unpacked.p) <= 0))
|
||||
iter->idx++;
|
||||
}
|
||||
|
||||
int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
|
||||
unsigned level, struct bkey_i *k)
|
||||
{
|
||||
struct journal_key n = {
|
||||
.btree_id = id,
|
||||
.level = level,
|
||||
.k = k,
|
||||
.allocated = true
|
||||
};
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
struct journal_iter *iter;
|
||||
unsigned idx = journal_key_search(keys, id, level, k->k.p);
|
||||
|
||||
if (idx < keys->nr &&
|
||||
journal_key_cmp(&n, &keys->d[idx]) == 0) {
|
||||
if (keys->d[idx].allocated)
|
||||
kfree(keys->d[idx].k);
|
||||
keys->d[idx] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (keys->nr == keys->size) {
|
||||
struct journal_keys new_keys = {
|
||||
.nr = keys->nr,
|
||||
.size = keys->size * 2,
|
||||
.journal_seq_base = keys->journal_seq_base,
|
||||
};
|
||||
|
||||
new_keys.d = kvmalloc(sizeof(new_keys.d[0]) * new_keys.size, GFP_KERNEL);
|
||||
if (!new_keys.d)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
|
||||
kvfree(keys->d);
|
||||
*keys = new_keys;
|
||||
}
|
||||
|
||||
array_insert_item(keys->d, keys->nr, idx, n);
|
||||
|
||||
list_for_each_entry(iter, &c->journal_iters, list)
|
||||
journal_iter_fix(c, iter, idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
|
||||
unsigned level, struct bpos pos)
|
||||
{
|
||||
struct bkey_i *whiteout =
|
||||
kmalloc(sizeof(struct bkey), GFP_KERNEL);
|
||||
int ret;
|
||||
|
||||
if (!whiteout)
|
||||
return -ENOMEM;
|
||||
|
||||
bkey_init(&whiteout->k);
|
||||
whiteout->k.p = pos;
|
||||
|
||||
ret = bch2_journal_key_insert(c, id, level, whiteout);
|
||||
if (ret)
|
||||
kfree(whiteout);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
|
||||
{
|
||||
if (iter->k &&
|
||||
iter->k < iter->keys->d + iter->keys->nr &&
|
||||
iter->k->btree_id == iter->btree_id &&
|
||||
iter->k->level == iter->level)
|
||||
return iter->k->k;
|
||||
struct journal_key *k = iter->idx - iter->keys->nr
|
||||
? iter->keys->d + iter->idx : NULL;
|
||||
|
||||
iter->k = NULL;
|
||||
if (k &&
|
||||
k->btree_id == iter->btree_id &&
|
||||
k->level == iter->level)
|
||||
return k->k;
|
||||
|
||||
iter->idx = iter->keys->nr;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void bch2_journal_iter_advance(struct journal_iter *iter)
|
||||
{
|
||||
if (iter->k)
|
||||
iter->k++;
|
||||
if (iter->idx < iter->keys->nr)
|
||||
iter->idx++;
|
||||
}
|
||||
|
||||
static void bch2_journal_iter_init(struct journal_iter *iter,
|
||||
struct journal_keys *journal_keys,
|
||||
static void bch2_journal_iter_exit(struct journal_iter *iter)
|
||||
{
|
||||
list_del(&iter->list);
|
||||
}
|
||||
|
||||
static void bch2_journal_iter_init(struct bch_fs *c,
|
||||
struct journal_iter *iter,
|
||||
enum btree_id id, unsigned level,
|
||||
struct bpos pos)
|
||||
{
|
||||
iter->btree_id = id;
|
||||
iter->level = level;
|
||||
iter->keys = journal_keys;
|
||||
iter->k = journal_key_search(journal_keys, id, level, pos);
|
||||
iter->keys = &c->journal_keys;
|
||||
iter->idx = journal_key_search(&c->journal_keys, id, level, pos);
|
||||
list_add(&iter->list, &c->journal_iters);
|
||||
}
|
||||
|
||||
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
|
||||
{
|
||||
return iter->btree
|
||||
? bch2_btree_iter_peek(iter->btree)
|
||||
: bch2_btree_node_iter_peek_unpack(&iter->node_iter,
|
||||
iter->b, &iter->unpacked);
|
||||
return bch2_btree_node_iter_peek_unpack(&iter->node_iter,
|
||||
iter->b, &iter->unpacked);
|
||||
}
|
||||
|
||||
static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter)
|
||||
{
|
||||
if (iter->btree)
|
||||
bch2_btree_iter_next(iter->btree);
|
||||
else
|
||||
bch2_btree_node_iter_advance(&iter->node_iter, iter->b);
|
||||
bch2_btree_node_iter_advance(&iter->node_iter, iter->b);
|
||||
}
|
||||
|
||||
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
|
||||
@ -160,7 +251,7 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
|
||||
|
||||
if (iter->b &&
|
||||
bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) {
|
||||
iter->journal.k = NULL;
|
||||
iter->journal.idx = iter->journal.keys->nr;
|
||||
iter->last = none;
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
@ -181,26 +272,20 @@ struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *
|
||||
return bch2_btree_and_journal_iter_peek(iter);
|
||||
}
|
||||
|
||||
void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter,
|
||||
struct btree_trans *trans,
|
||||
struct journal_keys *journal_keys,
|
||||
enum btree_id id, struct bpos pos)
|
||||
void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter)
|
||||
{
|
||||
memset(iter, 0, sizeof(*iter));
|
||||
|
||||
iter->btree = bch2_trans_get_iter(trans, id, pos, BTREE_ITER_PREFETCH);
|
||||
bch2_journal_iter_init(&iter->journal, journal_keys, id, 0, pos);
|
||||
bch2_journal_iter_exit(&iter->journal);
|
||||
}
|
||||
|
||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
|
||||
struct journal_keys *journal_keys,
|
||||
struct bch_fs *c,
|
||||
struct btree *b)
|
||||
{
|
||||
memset(iter, 0, sizeof(*iter));
|
||||
|
||||
iter->b = b;
|
||||
bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
|
||||
bch2_journal_iter_init(&iter->journal, journal_keys,
|
||||
bch2_journal_iter_init(c, &iter->journal,
|
||||
b->c.btree_id, b->c.level, b->data->min_key);
|
||||
}
|
||||
|
||||
@ -244,7 +329,7 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
|
||||
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
ret = key_fn(c, btree_id, b->c.level, k);
|
||||
@ -257,7 +342,8 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
|
||||
child = bch2_btree_node_get_noiter(c, tmp.k,
|
||||
b->c.btree_id, b->c.level - 1);
|
||||
b->c.btree_id, b->c.level - 1,
|
||||
false);
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(child);
|
||||
if (ret)
|
||||
@ -277,6 +363,7 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
|
||||
}
|
||||
}
|
||||
|
||||
bch2_btree_and_journal_iter_exit(&iter);
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
return ret;
|
||||
}
|
||||
@ -333,6 +420,12 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
|
||||
|
||||
void bch2_journal_keys_free(struct journal_keys *keys)
|
||||
{
|
||||
struct journal_key *i;
|
||||
|
||||
for (i = keys->d; i < keys->d + keys->nr; i++)
|
||||
if (i->allocated)
|
||||
kfree(i->k);
|
||||
|
||||
kvfree(keys->d);
|
||||
keys->d = NULL;
|
||||
keys->nr = 0;
|
||||
@ -361,7 +454,9 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
|
||||
nr_keys++;
|
||||
}
|
||||
|
||||
keys.d = kvmalloc(sizeof(keys.d[0]) * nr_keys, GFP_KERNEL);
|
||||
keys.size = roundup_pow_of_two(nr_keys);
|
||||
|
||||
keys.d = kvmalloc(sizeof(keys.d[0]) * keys.size, GFP_KERNEL);
|
||||
if (!keys.d)
|
||||
goto err;
|
||||
|
||||
@ -545,14 +640,16 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_journal_replay_key(struct bch_fs *c, enum btree_id id,
|
||||
unsigned level, struct bkey_i *k)
|
||||
static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k)
|
||||
{
|
||||
return bch2_trans_do(c, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_JOURNAL_REPLAY,
|
||||
__bch2_journal_replay_key(&trans, id, level, k));
|
||||
unsigned commit_flags = BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW;
|
||||
|
||||
if (!k->allocated)
|
||||
commit_flags |= BTREE_INSERT_JOURNAL_REPLAY;
|
||||
|
||||
return bch2_trans_do(c, NULL, NULL, commit_flags,
|
||||
__bch2_journal_replay_key(&trans, k->btree_id, k->level, k->k));
|
||||
}
|
||||
|
||||
static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k)
|
||||
@ -628,7 +725,7 @@ static int bch2_journal_replay(struct bch_fs *c,
|
||||
|
||||
if (i->level) {
|
||||
j->replay_journal_seq = keys.journal_seq_base + i->journal_seq;
|
||||
ret = bch2_journal_replay_key(c, i->btree_id, i->level, i->k);
|
||||
ret = bch2_journal_replay_key(c, i);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -658,7 +755,7 @@ static int bch2_journal_replay(struct bch_fs *c,
|
||||
|
||||
ret = i->k->k.size
|
||||
? bch2_extent_replay_key(c, i->btree_id, i->k)
|
||||
: bch2_journal_replay_key(c, i->btree_id, i->level, i->k);
|
||||
: bch2_journal_replay_key(c, i);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -670,7 +767,8 @@ static int bch2_journal_replay(struct bch_fs *c,
|
||||
bch2_journal_flush_all_pins(j);
|
||||
return bch2_journal_error(j);
|
||||
err:
|
||||
bch_err(c, "journal replay: error %d while replaying key", ret);
|
||||
bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
|
||||
ret, bch2_btree_ids[i->btree_id], i->level);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1105,7 +1203,7 @@ use_clean:
|
||||
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
|
||||
bch_info(c, "starting mark and sweep");
|
||||
err = "error in mark and sweep";
|
||||
ret = bch2_gc(c, &c->journal_keys, true);
|
||||
ret = bch2_gc(c, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "mark and sweep done");
|
||||
|
@ -6,10 +6,11 @@
|
||||
for (i = (keys).d; i < (keys).d + (keys).nr; (i)++)
|
||||
|
||||
struct journal_iter {
|
||||
struct list_head list;
|
||||
enum btree_id btree_id;
|
||||
unsigned level;
|
||||
size_t idx;
|
||||
struct journal_keys *keys;
|
||||
struct journal_key *k;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -17,8 +18,6 @@ struct journal_iter {
|
||||
*/
|
||||
|
||||
struct btree_and_journal_iter {
|
||||
struct btree_iter *btree;
|
||||
|
||||
struct btree *b;
|
||||
struct btree_node_iter node_iter;
|
||||
struct bkey unpacked;
|
||||
@ -32,16 +31,18 @@ struct btree_and_journal_iter {
|
||||
} last;
|
||||
};
|
||||
|
||||
int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bkey_i *);
|
||||
int bch2_journal_key_delete(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos);
|
||||
|
||||
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
|
||||
|
||||
void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *,
|
||||
struct btree_trans *,
|
||||
struct journal_keys *,
|
||||
enum btree_id, struct bpos);
|
||||
void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
|
||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
||||
struct journal_keys *,
|
||||
struct bch_fs *,
|
||||
struct btree *);
|
||||
|
||||
typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b);
|
||||
|
@ -276,19 +276,19 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
|
||||
return "Bad number of member devices";
|
||||
|
||||
if (!BCH_SB_META_REPLICAS_WANT(sb) ||
|
||||
BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
|
||||
BCH_SB_META_REPLICAS_WANT(sb) > BCH_REPLICAS_MAX)
|
||||
return "Invalid number of metadata replicas";
|
||||
|
||||
if (!BCH_SB_META_REPLICAS_REQ(sb) ||
|
||||
BCH_SB_META_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
|
||||
BCH_SB_META_REPLICAS_REQ(sb) > BCH_REPLICAS_MAX)
|
||||
return "Invalid number of metadata replicas";
|
||||
|
||||
if (!BCH_SB_DATA_REPLICAS_WANT(sb) ||
|
||||
BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
|
||||
BCH_SB_DATA_REPLICAS_WANT(sb) > BCH_REPLICAS_MAX)
|
||||
return "Invalid number of data replicas";
|
||||
|
||||
if (!BCH_SB_DATA_REPLICAS_REQ(sb) ||
|
||||
BCH_SB_DATA_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
|
||||
BCH_SB_DATA_REPLICAS_REQ(sb) > BCH_REPLICAS_MAX)
|
||||
return "Invalid number of data replicas";
|
||||
|
||||
if (BCH_SB_META_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
|
||||
|
@ -684,6 +684,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
bch2_blacklist_entries_gc);
|
||||
|
||||
INIT_LIST_HEAD(&c->journal_entries);
|
||||
INIT_LIST_HEAD(&c->journal_iters);
|
||||
|
||||
INIT_LIST_HEAD(&c->fsck_errors);
|
||||
mutex_init(&c->fsck_error_lock);
|
||||
|
@ -475,7 +475,7 @@ STORE(bch2_fs)
|
||||
*/
|
||||
#if 0
|
||||
down_read(&c->state_lock);
|
||||
bch2_gc(c, NULL, false, false);
|
||||
bch2_gc(c, false, false);
|
||||
up_read(&c->state_lock);
|
||||
#else
|
||||
bch2_gc_gens(c);
|
||||
|
Loading…
Reference in New Issue
Block a user