Update bcachefs sources to bee34d805c bcachefs: Repair bad data pointers

This commit is contained in:
Kent Overstreet 2021-01-28 16:16:51 -05:00
parent 19f921604d
commit 7740db24f7
19 changed files with 587 additions and 233 deletions

View File

@ -1 +1 @@
ffc900d5936ae538e34d18a6ce739d0a5a9178cf bee34d805cf75e57f9380e0ee91771b9d90b2b2d

View File

@ -509,7 +509,8 @@ enum {
BCH_FS_ERRORS_FIXED, BCH_FS_ERRORS_FIXED,
/* misc: */ /* misc: */
BCH_FS_FIXED_GENS, BCH_FS_NEED_ANOTHER_GC,
BCH_FS_DELETED_NODES,
BCH_FS_NEED_ALLOC_WRITE, BCH_FS_NEED_ALLOC_WRITE,
BCH_FS_REBUILD_REPLICAS, BCH_FS_REBUILD_REPLICAS,
BCH_FS_HOLD_BTREE_WRITES, BCH_FS_HOLD_BTREE_WRITES,
@ -539,11 +540,13 @@ struct journal_keys {
struct journal_key { struct journal_key {
enum btree_id btree_id:8; enum btree_id btree_id:8;
unsigned level:8; unsigned level:8;
bool allocated;
struct bkey_i *k; struct bkey_i *k;
u32 journal_seq; u32 journal_seq;
u32 journal_offset; u32 journal_offset;
} *d; } *d;
size_t nr; size_t nr;
size_t size;
u64 journal_seq_base; u64 journal_seq_base;
}; };
@ -840,6 +843,7 @@ struct bch_fs {
struct journal journal; struct journal journal;
struct list_head journal_entries; struct list_head journal_entries;
struct journal_keys journal_keys; struct journal_keys journal_keys;
struct list_head journal_iters;
u64 last_bucket_seq_cleanup; u64 last_bucket_seq_cleanup;

View File

@ -603,13 +603,14 @@ struct bch_btree_ptr_v2 {
__u64 mem_ptr; __u64 mem_ptr;
__le64 seq; __le64 seq;
__le16 sectors_written; __le16 sectors_written;
/* In case we ever decide to do variable size btree nodes: */ __le16 flags;
__le16 sectors;
struct bpos min_key; struct bpos min_key;
struct bch_extent_ptr start[0]; struct bch_extent_ptr start[0];
__u64 _data[0]; __u64 _data[0];
} __attribute__((packed, aligned(8))); } __attribute__((packed, aligned(8)));
LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1);
struct bch_extent { struct bch_extent {
struct bch_val v; struct bch_val v;

View File

@ -7,6 +7,7 @@
#include "btree_iter.h" #include "btree_iter.h"
#include "btree_locking.h" #include "btree_locking.h"
#include "debug.h" #include "debug.h"
#include "error.h"
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
@ -812,9 +813,12 @@ lock_node:
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
EBUG_ON(b->c.btree_id != iter->btree_id || EBUG_ON(b->c.btree_id != iter->btree_id);
BTREE_NODE_LEVEL(b->data) != level || EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
bkey_cmp(b->data->max_key, k->k.p)); EBUG_ON(bkey_cmp(b->data->max_key, k->k.p));
EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
bkey_cmp(b->data->min_key,
bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
return b; return b;
} }
@ -822,7 +826,8 @@ lock_node:
struct btree *bch2_btree_node_get_noiter(struct bch_fs *c, struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
const struct bkey_i *k, const struct bkey_i *k,
enum btree_id btree_id, enum btree_id btree_id,
unsigned level) unsigned level,
bool nofill)
{ {
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
struct btree *b; struct btree *b;
@ -837,6 +842,9 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
retry: retry:
b = btree_cache_find(bc, k); b = btree_cache_find(bc, k);
if (unlikely(!b)) { if (unlikely(!b)) {
if (nofill)
return NULL;
b = bch2_btree_node_fill(c, NULL, k, btree_id, b = bch2_btree_node_fill(c, NULL, k, btree_id,
level, SIX_LOCK_read, true); level, SIX_LOCK_read, true);
@ -883,9 +891,12 @@ lock_node:
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
EBUG_ON(b->c.btree_id != btree_id || EBUG_ON(b->c.btree_id != btree_id);
BTREE_NODE_LEVEL(b->data) != level || EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
bkey_cmp(b->data->max_key, k->k.p)); EBUG_ON(bkey_cmp(b->data->max_key, k->k.p));
EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
bkey_cmp(b->data->min_key,
bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
return b; return b;
} }
@ -995,8 +1006,22 @@ out:
if (sib != btree_prev_sib) if (sib != btree_prev_sib)
swap(n1, n2); swap(n1, n2);
BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p), if (bkey_cmp(bkey_successor(n1->key.k.p),
n2->data->min_key)); n2->data->min_key)) {
char buf1[200], buf2[200];
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&n1->key));
bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&n2->key));
bch2_fs_inconsistent(c, "btree topology error at btree %s level %u:\n"
"prev: %s\n"
"next: %s\n",
bch2_btree_ids[iter->btree_id], level,
buf1, buf2);
six_unlock_intent(&ret->c.lock);
ret = NULL;
}
} }
bch2_btree_trans_verify_locks(trans); bch2_btree_trans_verify_locks(trans);

View File

@ -26,7 +26,7 @@ struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
enum six_lock_type, unsigned long); enum six_lock_type, unsigned long);
struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
enum btree_id, unsigned); enum btree_id, unsigned, bool);
struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *, struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
struct btree *, enum btree_node_sibling); struct btree *, enum btree_node_sibling);

View File

@ -50,39 +50,199 @@ static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
__gc_pos_set(c, new_pos); __gc_pos_set(c, new_pos);
} }
/*
* Missing: if an interior btree node is empty, we need to do something -
* perhaps just kill it
*/
static int bch2_gc_check_topology(struct bch_fs *c, static int bch2_gc_check_topology(struct bch_fs *c,
struct bkey_s_c k, struct btree *b,
struct bpos *expected_start, struct bkey_buf *prev,
struct bpos expected_end, struct bkey_buf cur,
bool is_last) bool is_last)
{ {
struct bpos node_start = b->data->min_key;
struct bpos node_end = b->data->max_key;
struct bpos expected_start = bkey_deleted(&prev->k->k)
? node_start
: bkey_successor(prev->k->k.p);
char buf1[200], buf2[200];
bool update_min = false;
bool update_max = false;
int ret = 0; int ret = 0;
if (k.k->type == KEY_TYPE_btree_ptr_v2) { if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k);
if (fsck_err_on(bkey_cmp(*expected_start, bp.v->min_key), c, if (bkey_deleted(&prev->k->k))
"btree node with incorrect min_key: got %llu:%llu, should be %llu:%llu", scnprintf(buf1, sizeof(buf1), "start of node: %llu:%llu",
bp.v->min_key.inode, node_start.inode,
bp.v->min_key.offset, node_start.offset);
expected_start->inode, else
expected_start->offset)) { bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k));
BUG();
} if (fsck_err_on(bkey_cmp(expected_start, bp->v.min_key), c,
"btree node with incorrect min_key at btree %s level %u:\n"
" prev %s\n"
" cur %s",
bch2_btree_ids[b->c.btree_id], b->c.level,
buf1,
(bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2)))
update_min = true;
} }
*expected_start = bkey_cmp(k.k->p, POS_MAX)
? bkey_successor(k.k->p)
: k.k->p;
if (fsck_err_on(is_last && if (fsck_err_on(is_last &&
bkey_cmp(k.k->p, expected_end), c, bkey_cmp(cur.k->k.p, node_end), c,
"btree node with incorrect max_key: got %llu:%llu, should be %llu:%llu", "btree node with incorrect max_key at btree %s level %u:\n"
k.k->p.inode, " %s\n"
k.k->p.offset, " expected %s",
expected_end.inode, bch2_btree_ids[b->c.btree_id], b->c.level,
expected_end.offset)) { (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(cur.k)), buf1),
BUG(); (bch2_bpos_to_text(&PBUF(buf2), node_end), buf2)))
update_max = true;
bch2_bkey_buf_copy(prev, c, cur.k);
if (update_min || update_max) {
struct bkey_i *new;
struct bkey_i_btree_ptr_v2 *bp = NULL;
struct btree *n;
if (update_max) {
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur.k->k.p);
if (ret)
return ret;
}
new = kmalloc(bkey_bytes(&cur.k->k), GFP_KERNEL);
if (!new)
return -ENOMEM;
bkey_copy(new, cur.k);
if (new->k.type == KEY_TYPE_btree_ptr_v2)
bp = bkey_i_to_btree_ptr_v2(new);
if (update_min)
bp->v.min_key = expected_start;
if (update_max)
new->k.p = node_end;
if (bp)
SET_BTREE_PTR_RANGE_UPDATED(&bp->v, true);
ret = bch2_journal_key_insert(c, b->c.btree_id, b->c.level, new);
if (ret) {
kfree(new);
return ret;
}
n = bch2_btree_node_get_noiter(c, cur.k, b->c.btree_id,
b->c.level - 1, true);
if (n) {
mutex_lock(&c->btree_cache.lock);
bch2_btree_node_hash_remove(&c->btree_cache, n);
bkey_copy(&n->key, new);
if (update_min)
n->data->min_key = expected_start;
if (update_max)
n->data->max_key = node_end;
ret = __bch2_btree_node_hash_insert(&c->btree_cache, n);
BUG_ON(ret);
mutex_unlock(&c->btree_cache.lock);
six_unlock_read(&n->c.lock);
}
}
fsck_err:
return ret;
}
static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
unsigned level, bool is_root,
struct bkey_s_c *k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k);
const struct bch_extent_ptr *ptr;
bool do_update = false;
int ret = 0;
bkey_for_each_ptr(ptrs, ptr) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g = PTR_BUCKET(ca, ptr, true);
struct bucket *g2 = PTR_BUCKET(ca, ptr, false);
if (fsck_err_on(!g->gen_valid, c,
"bucket %u:%zu data type %s ptr gen %u missing in alloc btree",
ptr->dev, PTR_BUCKET_NR(ca, ptr),
bch2_data_types[ptr_data_type(k->k, ptr)],
ptr->gen)) {
if (!ptr->cached) {
g2->_mark.gen = g->_mark.gen = ptr->gen;
g2->gen_valid = g->gen_valid = true;
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} else {
do_update = true;
}
}
if (fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
"bucket %u:%zu data type %s ptr gen in the future: %u > %u",
ptr->dev, PTR_BUCKET_NR(ca, ptr),
bch2_data_types[ptr_data_type(k->k, ptr)],
ptr->gen, g->mark.gen)) {
if (!ptr->cached) {
g2->_mark.gen = g->_mark.gen = ptr->gen;
g2->gen_valid = g->gen_valid = true;
g2->_mark.data_type = 0;
g2->_mark.dirty_sectors = 0;
g2->_mark.cached_sectors = 0;
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} else {
do_update = true;
}
}
if (fsck_err_on(!ptr->cached &&
gen_cmp(ptr->gen, g->mark.gen) < 0, c,
"bucket %u:%zu data type %s stale dirty ptr: %u < %u",
ptr->dev, PTR_BUCKET_NR(ca, ptr),
bch2_data_types[ptr_data_type(k->k, ptr)],
ptr->gen, g->mark.gen))
do_update = true;
}
if (do_update) {
struct bch_extent_ptr *ptr;
struct bkey_i *new;
if (is_root) {
bch_err(c, "cannot update btree roots yet");
return -EINVAL;
}
new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
if (!new)
return -ENOMEM;
bkey_reassemble(new, *k);
bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g = PTR_BUCKET(ca, ptr, true);
(ptr->cached &&
(!g->gen_valid || gen_cmp(ptr->gen, g->mark.gen) > 0)) ||
(!ptr->cached &&
gen_cmp(ptr->gen, g->mark.gen) < 0);
}));
ret = bch2_journal_key_insert(c, btree_id, level, new);
if (ret)
kfree(new);
else
*k = bkey_i_to_s_c(new);
} }
fsck_err: fsck_err:
return ret; return ret;
@ -90,7 +250,9 @@ fsck_err:
/* marking of btree keys/nodes: */ /* marking of btree keys/nodes: */
static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id,
unsigned level, bool is_root,
struct bkey_s_c k,
u8 *max_stale, bool initial) u8 *max_stale, bool initial)
{ {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@ -104,7 +266,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
BUG_ON(bch2_journal_seq_verify && BUG_ON(bch2_journal_seq_verify &&
k.k->version.lo > journal_cur_seq(&c->journal)); k.k->version.lo > journal_cur_seq(&c->journal));
/* XXX change to fsck check */
if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c, if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c,
"key version number higher than recorded: %llu > %llu", "key version number higher than recorded: %llu > %llu",
k.k->version.lo, k.k->version.lo,
@ -120,35 +281,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
return ret; return ret;
} }
bkey_for_each_ptr(ptrs, ptr) { ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, &k);
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g = PTR_BUCKET(ca, ptr, true);
struct bucket *g2 = PTR_BUCKET(ca, ptr, false);
if (mustfix_fsck_err_on(!g->gen_valid, c,
"bucket %u:%zu data type %s ptr gen %u missing in alloc btree",
ptr->dev, PTR_BUCKET_NR(ca, ptr),
bch2_data_types[ptr_data_type(k.k, ptr)],
ptr->gen)) {
g2->_mark.gen = g->_mark.gen = ptr->gen;
g2->gen_valid = g->gen_valid = true;
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
}
if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
"bucket %u:%zu data type %s ptr gen in the future: %u > %u",
ptr->dev, PTR_BUCKET_NR(ca, ptr),
bch2_data_types[ptr_data_type(k.k, ptr)],
ptr->gen, g->mark.gen)) {
g2->_mark.gen = g->_mark.gen = ptr->gen;
g2->gen_valid = g->gen_valid = true;
g2->_mark.data_type = 0;
g2->_mark.dirty_sectors = 0;
g2->_mark.cached_sectors = 0;
set_bit(BCH_FS_FIXED_GENS, &c->flags);
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
}
}
} }
bkey_for_each_ptr(ptrs, ptr) { bkey_for_each_ptr(ptrs, ptr) {
@ -169,10 +302,10 @@ fsck_err:
static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
bool initial) bool initial)
{ {
struct bpos next_node_start = b->data->min_key;
struct btree_node_iter iter; struct btree_node_iter iter;
struct bkey unpacked; struct bkey unpacked;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_buf prev, cur;
int ret = 0; int ret = 0;
*max_stale = 0; *max_stale = 0;
@ -181,26 +314,32 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
return 0; return 0;
bch2_btree_node_iter_init_from_start(&iter, b); bch2_btree_node_iter_init_from_start(&iter, b);
bch2_bkey_buf_init(&prev);
bch2_bkey_buf_init(&cur);
bkey_init(&prev.k->k);
while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) { while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
bch2_bkey_debugcheck(c, b, k); bch2_bkey_debugcheck(c, b, k);
ret = bch2_gc_mark_key(c, k, max_stale, initial); ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, false,
k, max_stale, initial);
if (ret) if (ret)
break; break;
bch2_btree_node_iter_advance(&iter, b); bch2_btree_node_iter_advance(&iter, b);
if (b->c.level) { if (b->c.level) {
ret = bch2_gc_check_topology(c, k, bch2_bkey_buf_reassemble(&cur, c, k);
&next_node_start,
b->data->max_key, ret = bch2_gc_check_topology(c, b, &prev, cur,
bch2_btree_node_iter_end(&iter)); bch2_btree_node_iter_end(&iter));
if (ret) if (ret)
break; break;
} }
} }
bch2_bkey_buf_exit(&cur, c);
bch2_bkey_buf_exit(&prev, c);
return ret; return ret;
} }
@ -253,7 +392,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
mutex_lock(&c->btree_root_lock); mutex_lock(&c->btree_root_lock);
b = c->btree_roots[btree_id].b; b = c->btree_roots[btree_id].b;
if (!btree_node_fake(b)) if (!btree_node_fake(b))
ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key), ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, true,
bkey_i_to_s_c(&b->key),
&max_stale, initial); &max_stale, initial);
gc_pos_set(c, gc_pos_btree_root(b->c.btree_id)); gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
mutex_unlock(&c->btree_root_lock); mutex_unlock(&c->btree_root_lock);
@ -262,18 +402,18 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
} }
static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
struct journal_keys *journal_keys,
unsigned target_depth) unsigned target_depth)
{ {
struct btree_and_journal_iter iter; struct btree_and_journal_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bpos next_node_start = b->data->min_key; struct bkey_buf cur, prev;
struct bkey_buf tmp;
u8 max_stale = 0; u8 max_stale = 0;
int ret = 0; int ret = 0;
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b); bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
bch2_bkey_buf_init(&tmp); bch2_bkey_buf_init(&prev);
bch2_bkey_buf_init(&cur);
bkey_init(&prev.k->k);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
bch2_bkey_debugcheck(c, b, k); bch2_bkey_debugcheck(c, b, k);
@ -281,50 +421,72 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
BUG_ON(bkey_cmp(k.k->p, b->data->min_key) < 0); BUG_ON(bkey_cmp(k.k->p, b->data->min_key) < 0);
BUG_ON(bkey_cmp(k.k->p, b->data->max_key) > 0); BUG_ON(bkey_cmp(k.k->p, b->data->max_key) > 0);
ret = bch2_gc_mark_key(c, k, &max_stale, true); ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, false,
k, &max_stale, true);
if (ret) if (ret)
break; break;
if (b->c.level) { if (b->c.level) {
struct btree *child; bch2_bkey_buf_reassemble(&cur, c, k);
k = bkey_i_to_s_c(cur.k);
bch2_bkey_buf_reassemble(&tmp, c, k);
k = bkey_i_to_s_c(tmp.k);
bch2_btree_and_journal_iter_advance(&iter); bch2_btree_and_journal_iter_advance(&iter);
ret = bch2_gc_check_topology(c, k, ret = bch2_gc_check_topology(c, b,
&next_node_start, &prev, cur,
b->data->max_key,
!bch2_btree_and_journal_iter_peek(&iter).k); !bch2_btree_and_journal_iter_peek(&iter).k);
if (ret) if (ret)
break; break;
if (b->c.level > target_depth) {
child = bch2_btree_node_get_noiter(c, tmp.k,
b->c.btree_id, b->c.level - 1);
ret = PTR_ERR_OR_ZERO(child);
if (ret)
break;
ret = bch2_gc_btree_init_recurse(c, child,
journal_keys, target_depth);
six_unlock_read(&child->c.lock);
if (ret)
break;
}
} else { } else {
bch2_btree_and_journal_iter_advance(&iter); bch2_btree_and_journal_iter_advance(&iter);
} }
} }
bch2_bkey_buf_exit(&tmp, c); if (b->c.level > target_depth) {
bch2_btree_and_journal_iter_exit(&iter);
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
struct btree *child;
bch2_bkey_buf_reassemble(&cur, c, k);
bch2_btree_and_journal_iter_advance(&iter);
child = bch2_btree_node_get_noiter(c, cur.k,
b->c.btree_id, b->c.level - 1,
false);
ret = PTR_ERR_OR_ZERO(child);
if (fsck_err_on(ret == -EIO, c,
"unreadable btree node")) {
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur.k->k.p);
if (ret)
return ret;
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
continue;
}
if (ret)
break;
ret = bch2_gc_btree_init_recurse(c, child,
target_depth);
six_unlock_read(&child->c.lock);
if (ret)
break;
}
}
fsck_err:
bch2_bkey_buf_exit(&cur, c);
bch2_bkey_buf_exit(&prev, c);
bch2_btree_and_journal_iter_exit(&iter);
return ret; return ret;
} }
static int bch2_gc_btree_init(struct bch_fs *c, static int bch2_gc_btree_init(struct bch_fs *c,
struct journal_keys *journal_keys,
enum btree_id btree_id) enum btree_id btree_id)
{ {
struct btree *b; struct btree *b;
@ -355,11 +517,11 @@ static int bch2_gc_btree_init(struct bch_fs *c,
} }
if (b->c.level >= target_depth) if (b->c.level >= target_depth)
ret = bch2_gc_btree_init_recurse(c, b, ret = bch2_gc_btree_init_recurse(c, b, target_depth);
journal_keys, target_depth);
if (!ret) if (!ret)
ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key), ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, true,
bkey_i_to_s_c(&b->key),
&max_stale, true); &max_stale, true);
fsck_err: fsck_err:
six_unlock_read(&b->c.lock); six_unlock_read(&b->c.lock);
@ -373,8 +535,7 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
(int) btree_id_to_gc_phase(r); (int) btree_id_to_gc_phase(r);
} }
static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys, static int bch2_gc_btrees(struct bch_fs *c, bool initial)
bool initial)
{ {
enum btree_id ids[BTREE_ID_NR]; enum btree_id ids[BTREE_ID_NR];
unsigned i; unsigned i;
@ -386,8 +547,7 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
for (i = 0; i < BTREE_ID_NR; i++) { for (i = 0; i < BTREE_ID_NR; i++) {
enum btree_id id = ids[i]; enum btree_id id = ids[i];
int ret = initial int ret = initial
? bch2_gc_btree_init(c, journal_keys, ? bch2_gc_btree_init(c, id)
id)
: bch2_gc_btree(c, id, initial); : bch2_gc_btree(c, id, initial);
if (ret) if (ret)
return ret; return ret;
@ -775,8 +935,7 @@ static int bch2_gc_start(struct bch_fs *c)
* move around - if references move backwards in the ordering GC * move around - if references move backwards in the ordering GC
* uses, GC could skip past them * uses, GC could skip past them
*/ */
int bch2_gc(struct bch_fs *c, struct journal_keys *journal_keys, int bch2_gc(struct bch_fs *c, bool initial)
bool initial)
{ {
struct bch_dev *ca; struct bch_dev *ca;
u64 start_time = local_clock(); u64 start_time = local_clock();
@ -798,7 +957,7 @@ again:
bch2_mark_superblocks(c); bch2_mark_superblocks(c);
ret = bch2_gc_btrees(c, journal_keys, initial); ret = bch2_gc_btrees(c, initial);
if (ret) if (ret)
goto out; goto out;
@ -808,16 +967,15 @@ again:
bch2_mark_allocator_buckets(c); bch2_mark_allocator_buckets(c);
c->gc_count++; c->gc_count++;
out:
if (!ret && if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) ||
(test_bit(BCH_FS_FIXED_GENS, &c->flags) || (!iter && bch2_test_restart_gc)) {
(!iter && bch2_test_restart_gc))) {
/* /*
* XXX: make sure gens we fixed got saved * XXX: make sure gens we fixed got saved
*/ */
if (iter++ <= 2) { if (iter++ <= 2) {
bch_info(c, "Fixed gens, restarting mark and sweep:"); bch_info(c, "Second GC pass needed, restarting:");
clear_bit(BCH_FS_FIXED_GENS, &c->flags); clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
percpu_down_write(&c->mark_lock); percpu_down_write(&c->mark_lock);
@ -832,7 +990,7 @@ out:
bch_info(c, "Unable to fix bucket gens, looping"); bch_info(c, "Unable to fix bucket gens, looping");
ret = -EINVAL; ret = -EINVAL;
} }
out:
if (!ret) { if (!ret) {
bch2_journal_block(&c->journal); bch2_journal_block(&c->journal);
@ -1371,7 +1529,7 @@ static int bch2_gc_thread(void *arg)
* Full gc is currently incompatible with btree key cache: * Full gc is currently incompatible with btree key cache:
*/ */
#if 0 #if 0
ret = bch2_gc(c, NULL, false, false); ret = bch2_gc(c, false, false);
#else #else
ret = bch2_gc_gens(c); ret = bch2_gc_gens(c);
#endif #endif

View File

@ -6,8 +6,7 @@
void bch2_coalesce(struct bch_fs *); void bch2_coalesce(struct bch_fs *);
struct journal_keys; int bch2_gc(struct bch_fs *, bool);
int bch2_gc(struct bch_fs *, struct journal_keys *, bool);
int bch2_gc_gens(struct bch_fs *); int bch2_gc_gens(struct bch_fs *);
void bch2_gc_thread_stop(struct bch_fs *); void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *); int bch2_gc_thread_start(struct bch_fs *);

View File

@ -753,6 +753,11 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
struct bch_btree_ptr_v2 *bp = struct bch_btree_ptr_v2 *bp =
&bkey_i_to_btree_ptr_v2(&b->key)->v; &bkey_i_to_btree_ptr_v2(&b->key)->v;
if (BTREE_PTR_RANGE_UPDATED(bp)) {
b->data->min_key = bp->min_key;
b->data->max_key = b->key.k.p;
}
btree_err_on(bkey_cmp(b->data->min_key, bp->min_key), btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
BTREE_ERR_MUST_RETRY, c, b, NULL, BTREE_ERR_MUST_RETRY, c, b, NULL,
"incorrect min_key: got %llu:%llu should be %llu:%llu", "incorrect min_key: got %llu:%llu should be %llu:%llu",

View File

@ -297,7 +297,6 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
bp->v.mem_ptr = 0; bp->v.mem_ptr = 0;
bp->v.seq = b->data->keys.seq; bp->v.seq = b->data->keys.seq;
bp->v.sectors_written = 0; bp->v.sectors_written = 0;
bp->v.sectors = cpu_to_le16(c->opts.btree_node_size);
} }
if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite)) if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))

View File

@ -744,7 +744,6 @@ err:
static int ec_stripe_bkey_update(struct btree_trans *trans, static int ec_stripe_bkey_update(struct btree_trans *trans,
struct bkey_i_stripe *new) struct bkey_i_stripe *new)
{ {
struct bch_fs *c = trans->c;
struct btree_iter *iter; struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
const struct bch_stripe *existing; const struct bch_stripe *existing;
@ -759,7 +758,7 @@ static int ec_stripe_bkey_update(struct btree_trans *trans,
goto err; goto err;
if (!k.k || k.k->type != KEY_TYPE_stripe) { if (!k.k || k.k->type != KEY_TYPE_stripe) {
bch_err(c, "error updating stripe: not found"); bch_err(trans->c, "error updating stripe: not found");
ret = -ENOENT; ret = -ENOENT;
goto err; goto err;
} }
@ -767,7 +766,7 @@ static int ec_stripe_bkey_update(struct btree_trans *trans,
existing = bkey_s_c_to_stripe(k).v; existing = bkey_s_c_to_stripe(k).v;
if (existing->nr_blocks != new->v.nr_blocks) { if (existing->nr_blocks != new->v.nr_blocks) {
bch_err(c, "error updating stripe: nr_blocks does not match"); bch_err(trans->c, "error updating stripe: nr_blocks does not match");
ret = -EINVAL; ret = -EINVAL;
goto err; goto err;
} }

View File

@ -215,9 +215,8 @@ void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
{ {
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
pr_buf(out, "seq %llx sectors %u written %u min_key ", pr_buf(out, "seq %llx written %u min_key ",
le64_to_cpu(bp.v->seq), le64_to_cpu(bp.v->seq),
le16_to_cpu(bp.v->sectors),
le16_to_cpu(bp.v->sectors_written)); le16_to_cpu(bp.v->sectors_written));
bch2_bpos_to_text(out, bp.v->min_key); bch2_bpos_to_text(out, bp.v->min_key);
@ -1082,10 +1081,9 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
unsigned nonce = UINT_MAX; unsigned nonce = UINT_MAX;
unsigned i; unsigned i;
if (k.k->type == KEY_TYPE_btree_ptr) if (k.k->type == KEY_TYPE_btree_ptr ||
k.k->type == KEY_TYPE_btree_ptr_v2)
size_ondisk = c->opts.btree_node_size; size_ondisk = c->opts.btree_node_size;
if (k.k->type == KEY_TYPE_btree_ptr_v2)
size_ondisk = le16_to_cpu(bkey_s_c_to_btree_ptr_v2(k).v->sectors);
bkey_extent_entry_for_each(ptrs, entry) { bkey_extent_entry_for_each(ptrs, entry) {
if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)

View File

@ -1011,13 +1011,19 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
} }
list_for_each_entry(i, journal_entries, list) { list_for_each_entry(i, journal_entries, list) {
unsigned ptr;
seq = le64_to_cpu(i->j.seq); seq = le64_to_cpu(i->j.seq);
BUG_ON(seq >= cur_seq); BUG_ON(seq >= cur_seq);
if (seq < last_seq) if (seq < last_seq)
continue; continue;
journal_seq_pin(j, seq)->devs = i->devs; p = journal_seq_pin(j, seq);
p->devs.nr = 0;
for (ptr = 0; ptr < i->nr_ptrs; ptr++)
bch2_dev_list_add_dev(&p->devs, i->ptrs[ptr].dev);
} }
spin_lock(&j->lock); spin_lock(&j->lock);

View File

@ -46,15 +46,16 @@ struct journal_list {
* be replayed: * be replayed:
*/ */
static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
struct bch_extent_ptr entry_ptr,
struct journal_list *jlist, struct jset *j, struct journal_list *jlist, struct jset *j,
bool bad) bool bad)
{ {
struct journal_replay *i, *pos; struct journal_replay *i, *pos, *dup = NULL;
struct bch_devs_list devs = { .nr = 0 }; struct bch_extent_ptr *ptr;
struct list_head *where; struct list_head *where;
size_t bytes = vstruct_bytes(j); size_t bytes = vstruct_bytes(j);
u64 last_seq = 0; u64 last_seq = 0;
int ret; int ret = JOURNAL_ENTRY_ADD_OK;
list_for_each_entry_reverse(i, jlist->head, list) { list_for_each_entry_reverse(i, jlist->head, list) {
if (!JSET_NO_FLUSH(&i->j)) { if (!JSET_NO_FLUSH(&i->j)) {
@ -88,28 +89,29 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
where = jlist->head; where = jlist->head;
add: add:
i = where->next != jlist->head dup = where->next != jlist->head
? container_of(where->next, struct journal_replay, list) ? container_of(where->next, struct journal_replay, list)
: NULL; : NULL;
if (dup && le64_to_cpu(j->seq) != le64_to_cpu(dup->j.seq))
dup = NULL;
/* /*
* Duplicate journal entries? If so we want the one that didn't have a * Duplicate journal entries? If so we want the one that didn't have a
* checksum error: * checksum error:
*/ */
if (i && le64_to_cpu(j->seq) == le64_to_cpu(i->j.seq)) { if (dup) {
if (i->bad) { if (dup->bad) {
devs = i->devs; /* we'll replace @dup: */
__journal_replay_free(i);
} else if (bad) { } else if (bad) {
goto found; goto found;
} else { } else {
fsck_err_on(bytes != vstruct_bytes(&i->j) || fsck_err_on(bytes != vstruct_bytes(&dup->j) ||
memcmp(j, &i->j, bytes), c, memcmp(j, &dup->j, bytes), c,
"found duplicate but non identical journal entries (seq %llu)", "found duplicate but non identical journal entries (seq %llu)",
le64_to_cpu(j->seq)); le64_to_cpu(j->seq));
goto found; goto found;
} }
} }
i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL); i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
@ -118,17 +120,34 @@ add:
goto out; goto out;
} }
list_add(&i->list, where); i->nr_ptrs = 0;
i->devs = devs; i->bad = bad;
i->bad = bad; i->ignore = false;
i->ignore = false;
memcpy(&i->j, j, bytes); memcpy(&i->j, j, bytes);
if (dup) {
i->nr_ptrs = dup->nr_ptrs;
memcpy(i->ptrs, dup->ptrs, sizeof(dup->ptrs));
__journal_replay_free(dup);
}
list_add(&i->list, where);
found: found:
if (!bch2_dev_list_has_dev(i->devs, ca->dev_idx)) for (ptr = i->ptrs; ptr < i->ptrs + i->nr_ptrs; i++) {
bch2_dev_list_add_dev(&i->devs, ca->dev_idx); if (ptr->dev == ca->dev_idx) {
else bch_err(c, "duplicate journal entry %llu on same device",
fsck_err_on(1, c, "duplicate journal entries on same device"); le64_to_cpu(i->j.seq));
ret = JOURNAL_ENTRY_ADD_OK; goto out;
}
}
if (i->nr_ptrs >= ARRAY_SIZE(i->ptrs)) {
bch_err(c, "found too many copies of journal entry %llu",
le64_to_cpu(i->j.seq));
goto out;
}
i->ptrs[i->nr_ptrs++] = entry_ptr;
out: out:
fsck_err: fsck_err:
return ret; return ret;
@ -654,7 +673,10 @@ reread:
ja->bucket_seq[bucket] = le64_to_cpu(j->seq); ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
mutex_lock(&jlist->lock); mutex_lock(&jlist->lock);
ret = journal_entry_add(c, ca, jlist, j, ret != 0); ret = journal_entry_add(c, ca, (struct bch_extent_ptr) {
.dev = ca->dev_idx,
.offset = offset,
}, jlist, j, ret != 0);
mutex_unlock(&jlist->lock); mutex_unlock(&jlist->lock);
switch (ret) { switch (ret) {
@ -742,6 +764,23 @@ err:
goto out; goto out;
} }
static void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct journal_replay *j)
{
unsigned i;
for (i = 0; i < j->nr_ptrs; i++) {
struct bch_dev *ca = c->devs[j->ptrs[i].dev];
if (i)
pr_buf(out, " ");
pr_buf(out, "%u:%llu (offset %llu)",
j->ptrs[i].dev,
(u64) j->ptrs[i].offset,
(u64) j->ptrs[i].offset % ca->mi.bucket_size);
}
}
int bch2_journal_read(struct bch_fs *c, struct list_head *list, int bch2_journal_read(struct bch_fs *c, struct list_head *list,
u64 *blacklist_seq, u64 *start_seq) u64 *blacklist_seq, u64 *start_seq)
{ {
@ -839,6 +878,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
while (seq < le64_to_cpu(i->j.seq)) { while (seq < le64_to_cpu(i->j.seq)) {
u64 missing_start, missing_end; u64 missing_start, missing_end;
char buf1[200], buf2[200];
while (seq < le64_to_cpu(i->j.seq) && while (seq < le64_to_cpu(i->j.seq) &&
bch2_journal_seq_is_blacklisted(c, seq, false)) bch2_journal_seq_is_blacklisted(c, seq, false))
@ -853,10 +893,23 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
!bch2_journal_seq_is_blacklisted(c, seq, false)) !bch2_journal_seq_is_blacklisted(c, seq, false))
seq++; seq++;
if (i->list.prev != list) {
struct printbuf out = PBUF(buf1);
struct journal_replay *p = list_prev_entry(i, list);
bch2_journal_ptrs_to_text(&out, c, p);
pr_buf(&out, " size %llu", vstruct_sectors(&p->j, c->block_bits));
} else
sprintf(buf1, "(none)");
bch2_journal_ptrs_to_text(&PBUF(buf2), c, i);
missing_end = seq - 1; missing_end = seq - 1;
fsck_err(c, "journal entries %llu-%llu missing! (replaying %llu-%llu)", fsck_err(c, "journal entries %llu-%llu missing! (replaying %llu-%llu)\n"
" prev at %s\n"
" next at %s",
missing_start, missing_end, missing_start, missing_end,
last_seq, *blacklist_seq - 1); last_seq, *blacklist_seq - 1,
buf1, buf2);
} }
seq++; seq++;
@ -865,7 +918,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
list_for_each_entry(i, list, list) { list_for_each_entry(i, list, list) {
struct jset_entry *entry; struct jset_entry *entry;
struct bkey_i *k, *_n; struct bkey_i *k, *_n;
struct bch_replicas_padded replicas; struct bch_replicas_padded replicas = {
.e.data_type = BCH_DATA_journal,
.e.nr_required = 1,
};
unsigned ptr;
char buf[80]; char buf[80];
if (i->ignore) if (i->ignore)
@ -875,13 +932,14 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list,
if (ret) if (ret)
goto fsck_err; goto fsck_err;
for (ptr = 0; ptr < i->nr_ptrs; ptr++)
replicas.e.devs[replicas.e.nr_devs++] = i->ptrs[ptr].dev;
/* /*
* If we're mounting in degraded mode - if we didn't read all * If we're mounting in degraded mode - if we didn't read all
* the devices - this is wrong: * the devices - this is wrong:
*/ */
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, i->devs);
if (!degraded && if (!degraded &&
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
fsck_err_on(!bch2_replicas_marked(c, &replicas.e), c, fsck_err_on(!bch2_replicas_marked(c, &replicas.e), c,

View File

@ -8,7 +8,9 @@
*/ */
struct journal_replay { struct journal_replay {
struct list_head list; struct list_head list;
struct bch_devs_list devs; struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX];
unsigned nr_ptrs;
/* checksum error, but we may want to try using it anyways: */ /* checksum error, but we may want to try using it anyways: */
bool bad; bool bad;
bool ignore; bool ignore;

View File

@ -40,78 +40,169 @@ static void drop_alloc_keys(struct journal_keys *keys)
/* iterate over keys read from the journal: */ /* iterate over keys read from the journal: */
static struct journal_key *journal_key_search(struct journal_keys *journal_keys, static int __journal_key_cmp(enum btree_id l_btree_id,
enum btree_id id, unsigned level, unsigned l_level,
struct bpos pos) struct bpos l_pos,
struct journal_key *r)
{
return (cmp_int(l_btree_id, r->btree_id) ?:
cmp_int(l_level, r->level) ?:
bkey_cmp(l_pos, r->k->k.p));
}
static int journal_key_cmp(struct journal_key *l, struct journal_key *r)
{
return (cmp_int(l->btree_id, r->btree_id) ?:
cmp_int(l->level, r->level) ?:
bkey_cmp(l->k->k.p, r->k->k.p));
}
static size_t journal_key_search(struct journal_keys *journal_keys,
enum btree_id id, unsigned level,
struct bpos pos)
{ {
size_t l = 0, r = journal_keys->nr, m; size_t l = 0, r = journal_keys->nr, m;
while (l < r) { while (l < r) {
m = l + ((r - l) >> 1); m = l + ((r - l) >> 1);
if ((cmp_int(id, journal_keys->d[m].btree_id) ?: if (__journal_key_cmp(id, level, pos, &journal_keys->d[m]) > 0)
cmp_int(level, journal_keys->d[m].level) ?:
bkey_cmp(pos, journal_keys->d[m].k->k.p)) > 0)
l = m + 1; l = m + 1;
else else
r = m; r = m;
} }
BUG_ON(l < journal_keys->nr && BUG_ON(l < journal_keys->nr &&
(cmp_int(id, journal_keys->d[l].btree_id) ?: __journal_key_cmp(id, level, pos, &journal_keys->d[l]) > 0);
cmp_int(level, journal_keys->d[l].level) ?:
bkey_cmp(pos, journal_keys->d[l].k->k.p)) > 0);
BUG_ON(l && BUG_ON(l &&
(cmp_int(id, journal_keys->d[l - 1].btree_id) ?: __journal_key_cmp(id, level, pos, &journal_keys->d[l - 1]) <= 0);
cmp_int(level, journal_keys->d[l - 1].level) ?:
bkey_cmp(pos, journal_keys->d[l - 1].k->k.p)) <= 0);
return l < journal_keys->nr ? journal_keys->d + l : NULL; return l;
}
static void journal_iter_fix(struct bch_fs *c, struct journal_iter *iter, unsigned idx)
{
struct bkey_i *n = iter->keys->d[idx].k;
struct btree_and_journal_iter *biter =
container_of(iter, struct btree_and_journal_iter, journal);
if (iter->idx > idx ||
(iter->idx == idx &&
biter->last &&
bkey_cmp(n->k.p, biter->unpacked.p) <= 0))
iter->idx++;
}
int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_i *k)
{
struct journal_key n = {
.btree_id = id,
.level = level,
.k = k,
.allocated = true
};
struct journal_keys *keys = &c->journal_keys;
struct journal_iter *iter;
unsigned idx = journal_key_search(keys, id, level, k->k.p);
if (idx < keys->nr &&
journal_key_cmp(&n, &keys->d[idx]) == 0) {
if (keys->d[idx].allocated)
kfree(keys->d[idx].k);
keys->d[idx] = n;
return 0;
}
if (keys->nr == keys->size) {
struct journal_keys new_keys = {
.nr = keys->nr,
.size = keys->size * 2,
.journal_seq_base = keys->journal_seq_base,
};
new_keys.d = kvmalloc(sizeof(new_keys.d[0]) * new_keys.size, GFP_KERNEL);
if (!new_keys.d)
return -ENOMEM;
memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
kvfree(keys->d);
*keys = new_keys;
}
array_insert_item(keys->d, keys->nr, idx, n);
list_for_each_entry(iter, &c->journal_iters, list)
journal_iter_fix(c, iter, idx);
return 0;
}
int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
unsigned level, struct bpos pos)
{
struct bkey_i *whiteout =
kmalloc(sizeof(struct bkey), GFP_KERNEL);
int ret;
if (!whiteout)
return -ENOMEM;
bkey_init(&whiteout->k);
whiteout->k.p = pos;
ret = bch2_journal_key_insert(c, id, level, whiteout);
if (ret)
kfree(whiteout);
return ret;
} }
static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter) static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
{ {
if (iter->k && struct journal_key *k = iter->idx - iter->keys->nr
iter->k < iter->keys->d + iter->keys->nr && ? iter->keys->d + iter->idx : NULL;
iter->k->btree_id == iter->btree_id &&
iter->k->level == iter->level)
return iter->k->k;
iter->k = NULL; if (k &&
k->btree_id == iter->btree_id &&
k->level == iter->level)
return k->k;
iter->idx = iter->keys->nr;
return NULL; return NULL;
} }
static void bch2_journal_iter_advance(struct journal_iter *iter) static void bch2_journal_iter_advance(struct journal_iter *iter)
{ {
if (iter->k) if (iter->idx < iter->keys->nr)
iter->k++; iter->idx++;
} }
static void bch2_journal_iter_init(struct journal_iter *iter, static void bch2_journal_iter_exit(struct journal_iter *iter)
struct journal_keys *journal_keys, {
list_del(&iter->list);
}
static void bch2_journal_iter_init(struct bch_fs *c,
struct journal_iter *iter,
enum btree_id id, unsigned level, enum btree_id id, unsigned level,
struct bpos pos) struct bpos pos)
{ {
iter->btree_id = id; iter->btree_id = id;
iter->level = level; iter->level = level;
iter->keys = journal_keys; iter->keys = &c->journal_keys;
iter->k = journal_key_search(journal_keys, id, level, pos); iter->idx = journal_key_search(&c->journal_keys, id, level, pos);
list_add(&iter->list, &c->journal_iters);
} }
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter) static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
{ {
return iter->btree return bch2_btree_node_iter_peek_unpack(&iter->node_iter,
? bch2_btree_iter_peek(iter->btree) iter->b, &iter->unpacked);
: bch2_btree_node_iter_peek_unpack(&iter->node_iter,
iter->b, &iter->unpacked);
} }
static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter) static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter)
{ {
if (iter->btree) bch2_btree_node_iter_advance(&iter->node_iter, iter->b);
bch2_btree_iter_next(iter->btree);
else
bch2_btree_node_iter_advance(&iter->node_iter, iter->b);
} }
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter) void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
@ -160,7 +251,7 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
if (iter->b && if (iter->b &&
bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) { bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) {
iter->journal.k = NULL; iter->journal.idx = iter->journal.keys->nr;
iter->last = none; iter->last = none;
return bkey_s_c_null; return bkey_s_c_null;
} }
@ -181,26 +272,20 @@ struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *
return bch2_btree_and_journal_iter_peek(iter); return bch2_btree_and_journal_iter_peek(iter);
} }
void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter, void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter)
struct btree_trans *trans,
struct journal_keys *journal_keys,
enum btree_id id, struct bpos pos)
{ {
memset(iter, 0, sizeof(*iter)); bch2_journal_iter_exit(&iter->journal);
iter->btree = bch2_trans_get_iter(trans, id, pos, BTREE_ITER_PREFETCH);
bch2_journal_iter_init(&iter->journal, journal_keys, id, 0, pos);
} }
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter, void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
struct journal_keys *journal_keys, struct bch_fs *c,
struct btree *b) struct btree *b)
{ {
memset(iter, 0, sizeof(*iter)); memset(iter, 0, sizeof(*iter));
iter->b = b; iter->b = b;
bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b); bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
bch2_journal_iter_init(&iter->journal, journal_keys, bch2_journal_iter_init(c, &iter->journal,
b->c.btree_id, b->c.level, b->data->min_key); b->c.btree_id, b->c.level, b->data->min_key);
} }
@ -244,7 +329,7 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
int ret = 0; int ret = 0;
bch2_bkey_buf_init(&tmp); bch2_bkey_buf_init(&tmp);
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b); bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
ret = key_fn(c, btree_id, b->c.level, k); ret = key_fn(c, btree_id, b->c.level, k);
@ -257,7 +342,8 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
bch2_btree_and_journal_iter_advance(&iter); bch2_btree_and_journal_iter_advance(&iter);
child = bch2_btree_node_get_noiter(c, tmp.k, child = bch2_btree_node_get_noiter(c, tmp.k,
b->c.btree_id, b->c.level - 1); b->c.btree_id, b->c.level - 1,
false);
ret = PTR_ERR_OR_ZERO(child); ret = PTR_ERR_OR_ZERO(child);
if (ret) if (ret)
@ -277,6 +363,7 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
} }
} }
bch2_btree_and_journal_iter_exit(&iter);
bch2_bkey_buf_exit(&tmp, c); bch2_bkey_buf_exit(&tmp, c);
return ret; return ret;
} }
@ -333,6 +420,12 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
void bch2_journal_keys_free(struct journal_keys *keys) void bch2_journal_keys_free(struct journal_keys *keys)
{ {
struct journal_key *i;
for (i = keys->d; i < keys->d + keys->nr; i++)
if (i->allocated)
kfree(i->k);
kvfree(keys->d); kvfree(keys->d);
keys->d = NULL; keys->d = NULL;
keys->nr = 0; keys->nr = 0;
@ -361,7 +454,9 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
nr_keys++; nr_keys++;
} }
keys.d = kvmalloc(sizeof(keys.d[0]) * nr_keys, GFP_KERNEL); keys.size = roundup_pow_of_two(nr_keys);
keys.d = kvmalloc(sizeof(keys.d[0]) * keys.size, GFP_KERNEL);
if (!keys.d) if (!keys.d)
goto err; goto err;
@ -545,14 +640,16 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
return ret; return ret;
} }
static int bch2_journal_replay_key(struct bch_fs *c, enum btree_id id, static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k)
unsigned level, struct bkey_i *k)
{ {
return bch2_trans_do(c, NULL, NULL, unsigned commit_flags = BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW;
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY, if (!k->allocated)
__bch2_journal_replay_key(&trans, id, level, k)); commit_flags |= BTREE_INSERT_JOURNAL_REPLAY;
return bch2_trans_do(c, NULL, NULL, commit_flags,
__bch2_journal_replay_key(&trans, k->btree_id, k->level, k->k));
} }
static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k) static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k)
@ -628,7 +725,7 @@ static int bch2_journal_replay(struct bch_fs *c,
if (i->level) { if (i->level) {
j->replay_journal_seq = keys.journal_seq_base + i->journal_seq; j->replay_journal_seq = keys.journal_seq_base + i->journal_seq;
ret = bch2_journal_replay_key(c, i->btree_id, i->level, i->k); ret = bch2_journal_replay_key(c, i);
if (ret) if (ret)
goto err; goto err;
} }
@ -658,7 +755,7 @@ static int bch2_journal_replay(struct bch_fs *c,
ret = i->k->k.size ret = i->k->k.size
? bch2_extent_replay_key(c, i->btree_id, i->k) ? bch2_extent_replay_key(c, i->btree_id, i->k)
: bch2_journal_replay_key(c, i->btree_id, i->level, i->k); : bch2_journal_replay_key(c, i);
if (ret) if (ret)
goto err; goto err;
} }
@ -670,7 +767,8 @@ static int bch2_journal_replay(struct bch_fs *c,
bch2_journal_flush_all_pins(j); bch2_journal_flush_all_pins(j);
return bch2_journal_error(j); return bch2_journal_error(j);
err: err:
bch_err(c, "journal replay: error %d while replaying key", ret); bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
ret, bch2_btree_ids[i->btree_id], i->level);
return ret; return ret;
} }
@ -1105,7 +1203,7 @@ use_clean:
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) { test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
bch_info(c, "starting mark and sweep"); bch_info(c, "starting mark and sweep");
err = "error in mark and sweep"; err = "error in mark and sweep";
ret = bch2_gc(c, &c->journal_keys, true); ret = bch2_gc(c, true);
if (ret) if (ret)
goto err; goto err;
bch_verbose(c, "mark and sweep done"); bch_verbose(c, "mark and sweep done");

View File

@ -6,10 +6,11 @@
for (i = (keys).d; i < (keys).d + (keys).nr; (i)++) for (i = (keys).d; i < (keys).d + (keys).nr; (i)++)
struct journal_iter { struct journal_iter {
struct list_head list;
enum btree_id btree_id; enum btree_id btree_id;
unsigned level; unsigned level;
size_t idx;
struct journal_keys *keys; struct journal_keys *keys;
struct journal_key *k;
}; };
/* /*
@ -17,8 +18,6 @@ struct journal_iter {
*/ */
struct btree_and_journal_iter { struct btree_and_journal_iter {
struct btree_iter *btree;
struct btree *b; struct btree *b;
struct btree_node_iter node_iter; struct btree_node_iter node_iter;
struct bkey unpacked; struct bkey unpacked;
@ -32,16 +31,18 @@ struct btree_and_journal_iter {
} last; } last;
}; };
int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
unsigned, struct bkey_i *);
int bch2_journal_key_delete(struct bch_fs *, enum btree_id,
unsigned, struct bpos);
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *); void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *, void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
struct btree_trans *,
struct journal_keys *,
enum btree_id, struct bpos);
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *, void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
struct journal_keys *, struct bch_fs *,
struct btree *); struct btree *);
typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b); typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b);

View File

@ -276,19 +276,19 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
return "Bad number of member devices"; return "Bad number of member devices";
if (!BCH_SB_META_REPLICAS_WANT(sb) || if (!BCH_SB_META_REPLICAS_WANT(sb) ||
BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX) BCH_SB_META_REPLICAS_WANT(sb) > BCH_REPLICAS_MAX)
return "Invalid number of metadata replicas"; return "Invalid number of metadata replicas";
if (!BCH_SB_META_REPLICAS_REQ(sb) || if (!BCH_SB_META_REPLICAS_REQ(sb) ||
BCH_SB_META_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX) BCH_SB_META_REPLICAS_REQ(sb) > BCH_REPLICAS_MAX)
return "Invalid number of metadata replicas"; return "Invalid number of metadata replicas";
if (!BCH_SB_DATA_REPLICAS_WANT(sb) || if (!BCH_SB_DATA_REPLICAS_WANT(sb) ||
BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX) BCH_SB_DATA_REPLICAS_WANT(sb) > BCH_REPLICAS_MAX)
return "Invalid number of data replicas"; return "Invalid number of data replicas";
if (!BCH_SB_DATA_REPLICAS_REQ(sb) || if (!BCH_SB_DATA_REPLICAS_REQ(sb) ||
BCH_SB_DATA_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX) BCH_SB_DATA_REPLICAS_REQ(sb) > BCH_REPLICAS_MAX)
return "Invalid number of data replicas"; return "Invalid number of data replicas";
if (BCH_SB_META_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR) if (BCH_SB_META_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)

View File

@ -684,6 +684,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_blacklist_entries_gc); bch2_blacklist_entries_gc);
INIT_LIST_HEAD(&c->journal_entries); INIT_LIST_HEAD(&c->journal_entries);
INIT_LIST_HEAD(&c->journal_iters);
INIT_LIST_HEAD(&c->fsck_errors); INIT_LIST_HEAD(&c->fsck_errors);
mutex_init(&c->fsck_error_lock); mutex_init(&c->fsck_error_lock);

View File

@ -475,7 +475,7 @@ STORE(bch2_fs)
*/ */
#if 0 #if 0
down_read(&c->state_lock); down_read(&c->state_lock);
bch2_gc(c, NULL, false, false); bch2_gc(c, false, false);
up_read(&c->state_lock); up_read(&c->state_lock);
#else #else
bch2_gc_gens(c); bch2_gc_gens(c);