Update bcachefs sources to f1c9030ccb bcachefs: Don't error out when just reading the journal

This commit is contained in:
Kent Overstreet 2022-12-01 11:20:40 -05:00
parent 0c98cd7bf6
commit f82cd58008
9 changed files with 314 additions and 131 deletions

View File

@ -1 +1 @@
eabde7cb370040434991209cc3644c45079900da
f1c9030ccbf6d7b5c46f08f92ee878bfc9f6ee6b

View File

@ -721,7 +721,106 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
return 0;
}
/*
* This synthesizes deleted extents for holes, similar to BTREE_ITER_SLOTS for
* extents style btrees, but works on non-extents btrees:
*/
struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole)
{
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
if (bkey_err(k))
return k;
if (k.k->type) {
return k;
} else {
struct btree_iter iter2;
struct bpos next;
bch2_trans_copy_iter(&iter2, iter);
k = bch2_btree_iter_peek_upto(&iter2,
bkey_min(bkey_min(end,
iter->path->l[0].b->key.k.p),
POS(iter->pos.inode, iter->pos.offset + U32_MAX - 1)));
next = iter2.pos;
bch2_trans_iter_exit(iter->trans, &iter2);
BUG_ON(next.offset >= iter->pos.offset + U32_MAX);
if (bkey_err(k))
return k;
bkey_init(hole);
hole->p = iter->pos;
bch2_key_resize(hole, next.offset - iter->pos.offset);
return (struct bkey_s_c) { hole, NULL };
}
}
static bool next_bucket(struct bch_fs *c, struct bpos *bucket)
{
struct bch_dev *ca;
unsigned iter;
if (bch2_dev_bucket_exists(c, *bucket))
return true;
if (bch2_dev_exists2(c, bucket->inode)) {
ca = bch_dev_bkey_exists(c, bucket->inode);
if (bucket->offset < ca->mi.first_bucket) {
bucket->offset = ca->mi.first_bucket;
return true;
}
bucket->inode++;
bucket->offset = 0;
}
rcu_read_lock();
iter = bucket->inode;
ca = __bch2_next_dev(c, &iter, NULL);
if (ca)
bucket->offset = ca->mi.first_bucket;
rcu_read_unlock();
return ca != NULL;
}
struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, struct bkey *hole)
{
struct bch_fs *c = iter->trans->c;
struct bkey_s_c k;
again:
k = bch2_get_key_or_hole(iter, POS_MAX, hole);
if (bkey_err(k))
return k;
if (!k.k->type) {
struct bpos bucket = bkey_start_pos(k.k);
if (!bch2_dev_bucket_exists(c, bucket)) {
if (!next_bucket(c, &bucket))
return bkey_s_c_null;
bch2_btree_iter_set_pos(iter, bucket);
goto again;
}
if (!bch2_dev_bucket_exists(c, k.k->p)) {
struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
bch2_key_resize(hole, ca->mi.nbuckets - bucket.offset);
}
}
return k;
}
static int bch2_check_alloc_key(struct btree_trans *trans,
struct bkey_s_c alloc_k,
struct btree_iter *alloc_iter,
struct btree_iter *discard_iter,
struct btree_iter *freespace_iter)
@ -731,20 +830,10 @@ static int bch2_check_alloc_key(struct btree_trans *trans,
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a;
unsigned discard_key_type, freespace_key_type;
struct bkey_s_c alloc_k, k;
struct bkey_s_c k;
struct printbuf buf = PRINTBUF;
int ret;
alloc_k = bch2_dev_bucket_exists(c, alloc_iter->pos)
? bch2_btree_iter_peek_slot(alloc_iter)
: bch2_btree_iter_peek(alloc_iter);
if (!alloc_k.k)
return 1;
ret = bkey_err(alloc_k);
if (ret)
return ret;
if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_k.k->p), c,
"alloc key for invalid device:bucket %llu:%llu",
alloc_k.k->p.inode, alloc_k.k->p.offset))
@ -827,6 +916,61 @@ fsck_err:
return ret;
}
static int bch2_check_alloc_hole(struct btree_trans *trans,
struct bpos start,
struct bpos *end,
struct btree_iter *freespace_iter)
{
struct bch_fs *c = trans->c;
struct bch_dev *ca;
struct bkey_s_c k;
struct printbuf buf = PRINTBUF;
int ret;
ca = bch_dev_bkey_exists(c, start.inode);
if (!ca->mi.freespace_initialized)
return 0;
bch2_btree_iter_set_pos(freespace_iter, start);
k = bch2_btree_iter_peek_slot(freespace_iter);
ret = bkey_err(k);
if (ret)
goto err;
*end = bkey_min(k.k->p, *end);
if (k.k->type != KEY_TYPE_set &&
(c->opts.reconstruct_alloc ||
fsck_err(c, "hole in alloc btree missing in freespace btree\n"
" device %llu buckets %llu-%llu",
freespace_iter->pos.inode,
freespace_iter->pos.offset,
end->offset))) {
struct bkey_i *update =
bch2_trans_kmalloc(trans, sizeof(*update));
ret = PTR_ERR_OR_ZERO(update);
if (ret)
goto err;
bkey_init(&update->k);
update->k.type = KEY_TYPE_set;
update->k.p = freespace_iter->pos;
bch2_key_resize(&update->k,
min_t(u64, U32_MAX, end->offset -
freespace_iter->pos.offset));
ret = bch2_trans_update(trans, freespace_iter, update, 0);
if (ret)
goto err;
}
err:
fsck_err:
printbuf_exit(&buf);
return ret;
}
static int bch2_check_discard_freespace_key(struct btree_trans *trans,
struct btree_iter *iter)
{
@ -886,6 +1030,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_iter iter, discard_iter, freespace_iter;
struct bkey hole;
struct bkey_s_c k;
int ret = 0;
@ -897,17 +1042,52 @@ int bch2_check_alloc_info(struct bch_fs *c)
BTREE_ITER_PREFETCH);
bch2_trans_iter_init(&trans, &freespace_iter, BTREE_ID_freespace, POS_MIN,
BTREE_ITER_PREFETCH);
while (1) {
ret = commit_do(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
bch2_check_alloc_key(&trans, &iter,
&discard_iter,
&freespace_iter));
struct bpos next;
bch2_trans_begin(&trans);
k = bch2_get_key_or_real_bucket_hole(&iter, &hole);
ret = bkey_err(k);
if (ret)
goto bkey_err;
if (!k.k)
break;
bch2_btree_iter_advance(&iter);
if (k.k->type) {
next = bpos_nosnap_successor(k.k->p);
ret = bch2_check_alloc_key(&trans,
k, &iter,
&discard_iter,
&freespace_iter);
if (ret)
break;
} else {
next = k.k->p;
ret = bch2_check_alloc_hole(&trans,
bkey_start_pos(k.k),
&next,
&freespace_iter);
if (ret)
goto bkey_err;
}
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
if (ret)
goto bkey_err;
bch2_btree_iter_set_pos(&iter, next);
bkey_err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
}
bch2_trans_iter_exit(&trans, &freespace_iter);
bch2_trans_iter_exit(&trans, &discard_iter);
@ -1305,6 +1485,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey hole;
struct bpos end = POS(ca->dev_idx, ca->mi.nbuckets);
struct bch_member *m;
int ret;
@ -1320,12 +1501,13 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
*/
while (1) {
bch2_trans_begin(&trans);
ret = 0;
if (bkey_ge(iter.pos, end))
if (bkey_ge(iter.pos, end)) {
ret = 0;
break;
}
k = bch2_btree_iter_peek_slot(&iter);
k = bch2_get_key_or_hole(&iter, end, &hole);
ret = bkey_err(k);
if (ret)
goto bkey_err;
@ -1347,34 +1529,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
bch2_btree_iter_advance(&iter);
} else {
/*
* When there's a hole, process a whole range of keys
* all at once:
*
* This is similar to how extent btree iterators in
* slots mode will synthesize a whole range - a
* KEY_TYPE_deleted extent.
*
* But alloc keys aren't extents (they have zero size),
* so we're open coding it here:
*/
struct btree_iter iter2;
struct bkey_i *freespace;
struct bpos next;
bch2_trans_copy_iter(&iter2, &iter);
k = bch2_btree_iter_peek_upto(&iter2,
bkey_min(bkey_min(end,
iter.path->l[0].b->key.k.p),
POS(iter.pos.inode, iter.pos.offset + U32_MAX - 1)));
next = iter2.pos;
ret = bkey_err(k);
bch2_trans_iter_exit(&trans, &iter2);
BUG_ON(next.offset >= iter.pos.offset + U32_MAX);
if (ret)
goto bkey_err;
freespace = bch2_trans_kmalloc(&trans, sizeof(*freespace));
ret = PTR_ERR_OR_ZERO(freespace);
@ -1382,10 +1537,9 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
goto bkey_err;
bkey_init(&freespace->k);
freespace->k.type = KEY_TYPE_set;
freespace->k.p = iter.pos;
bch2_key_resize(&freespace->k, next.offset - iter.pos.offset);
freespace->k.type = KEY_TYPE_set;
freespace->k.p = k.k->p;
freespace->k.size = k.k->size;
ret = __bch2_btree_insert(&trans, BTREE_ID_freespace, freespace) ?:
bch2_trans_commit(&trans, NULL, NULL,
@ -1394,7 +1548,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
if (ret)
goto bkey_err;
bch2_btree_iter_set_pos(&iter, next);
bch2_btree_iter_set_pos(&iter, k.k->p);
}
bkey_err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))

View File

@ -662,6 +662,32 @@ void bch2_btree_path_level_init(struct btree_trans *trans,
/* Btree path: fixups after btree node updates: */
static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, struct btree *b)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
trans_for_each_update(trans, i)
if (!i->cached &&
i->level == b->c.level &&
i->btree_id == b->c.btree_id &&
bpos_cmp(i->k->k.p, b->data->min_key) >= 0 &&
bpos_cmp(i->k->k.p, b->data->max_key) <= 0) {
i->old_v = bch2_btree_path_peek_slot(i->path, &i->old_k).v;
if (unlikely(trans->journal_replay_not_finished)) {
struct bkey_i *j_k =
bch2_journal_keys_peek_slot(c, i->btree_id, i->level,
i->k->k.p);
if (j_k) {
i->old_k = j_k->k;
i->old_v = &j_k->v;
}
}
}
}
/*
* A btree node is being replaced - update the iterator to point to the new
* node:
@ -685,6 +711,8 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
bch2_btree_path_level_init(trans, path, b);
}
bch2_trans_revalidate_updates_in_node(trans, b);
}
/*
@ -697,6 +725,8 @@ void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
trans_for_each_path_with_node(trans, b, path)
__btree_path_level_init(path, b->c.level);
bch2_trans_revalidate_updates_in_node(trans, b);
}
/* Btree path: traverse, set_pos: */
@ -2598,14 +2628,9 @@ static inline void btree_path_list_remove(struct btree_trans *trans,
unsigned i;
EBUG_ON(path->sorted_idx >= trans->nr_sorted);
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
trans->nr_sorted--;
memmove_u64s_down_small(trans->sorted + path->sorted_idx,
trans->sorted + path->sorted_idx + 1,
DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
#else
array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx);
#endif
for (i = path->sorted_idx; i < trans->nr_sorted; i++)
trans->paths[trans->sorted[i]].sorted_idx = i;
@ -2629,15 +2654,7 @@ static inline void btree_path_list_add(struct btree_trans *trans,
trans->traverse_all_idx >= path->sorted_idx)
trans->traverse_all_idx++;
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1,
trans->sorted + path->sorted_idx,
DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
trans->nr_sorted++;
trans->sorted[path->sorted_idx] = path->idx;
#else
array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx);
#endif
for (i = path->sorted_idx; i < trans->nr_sorted; i++)
trans->paths[trans->sorted[i]].sorted_idx = i;

View File

@ -558,6 +558,22 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
return k;
}
static inline struct bkey_s_c
__bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
struct btree_iter *iter,
struct bpos end,
unsigned flags)
{
struct bkey_s_c k;
while (btree_trans_too_many_iters(trans) ||
(k = bch2_btree_iter_peek_upto_type(iter, end, flags),
bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
bch2_trans_begin(trans);
return k;
}
#define lockrestart_do(_trans, _do) \
({ \
u32 _restart_count; \
@ -716,6 +732,15 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
#define for_each_btree_key_upto(_trans, _iter, _btree_id, \
_start, _end, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
(_k) = __bch2_btree_iter_peek_upto_and_restart((_trans), \
&(_iter), _end, _flags),\
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \
_start, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \

View File

@ -24,6 +24,28 @@
#include <linux/sort.h>
#include <trace/events/bcachefs.h>
static void verify_update_old_key(struct btree_trans *trans, struct btree_insert_entry *i)
{
#ifdef CONFIG_BCACHEFS_DEBUG
struct bch_fs *c = trans->c;
struct bkey u;
struct bkey_s_c k = bch2_btree_path_peek_slot(i->path, &u);
if (unlikely(trans->journal_replay_not_finished)) {
struct bkey_i *j_k =
bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p);
if (j_k)
k = bkey_i_to_s_c(j_k);
}
i->old_k.needs_whiteout = k.k->needs_whiteout;
BUG_ON(memcmp(&i->old_k, k.k, sizeof(struct bkey)));
BUG_ON(i->old_v != k.v);
#endif
}
static int __must_check
bch2_trans_update_by_path(struct btree_trans *, struct btree_path *,
struct bkey_i *, enum btree_update_flags);
@ -341,6 +363,7 @@ btree_key_can_insert_cached(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct bkey_cached *ck = (void *) path->l[0].b;
struct btree_insert_entry *i;
unsigned new_u64s;
struct bkey_i *new_k;
@ -368,6 +391,10 @@ btree_key_can_insert_cached(struct btree_trans *trans,
return -ENOMEM;
}
trans_for_each_update(trans, i)
if (i->old_v == &ck->k->v)
i->old_v = &new_k->v;
ck->u64s = new_u64s;
ck->k = new_k;
return 0;
@ -383,6 +410,8 @@ static int run_one_mem_trigger(struct btree_trans *trans,
struct bkey_i *new = i->k;
int ret;
verify_update_old_key(trans, i);
if (unlikely(flags & BTREE_TRIGGER_NORUN))
return 0;
@ -420,6 +449,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
struct bkey old_k = i->old_k;
struct bkey_s_c old = { &old_k, i->old_v };
verify_update_old_key(trans, i);
if ((i->flags & BTREE_TRIGGER_NORUN) ||
!(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)))
return 0;
@ -598,33 +629,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
if (btree_node_type_needs_gc(i->bkey_type))
marking = true;
/*
* Revalidate before calling mem triggers - XXX, ugly:
*
* - successful btree node splits don't cause transaction
* restarts and will have invalidated the pointer to the bkey
* value
* - btree_node_lock_for_insert() -> btree_node_prep_for_write()
* when it has to resort
* - btree_key_can_insert_cached() when it has to reallocate
*
* Ugly because we currently have no way to tell if the
* pointer's been invalidated, which means it's debatabale
* whether we should be stashing the old key at all.
*/
i->old_v = bch2_btree_path_peek_slot(i->path, &i->old_k).v;
if (unlikely(trans->journal_replay_not_finished)) {
struct bkey_i *j_k =
bch2_journal_keys_peek_slot(c, i->btree_id, i->level,
i->k->k.p);
if (j_k) {
i->old_k = j_k->k;
i->old_v = &j_k->v;
}
}
}
/*
@ -690,6 +694,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
if (i->key_cache_already_flushed)
continue;
verify_update_old_key(trans, i);
entry = bch2_journal_add_entry(j, &trans->journal_res,
BCH_JSET_ENTRY_overwrite,
i->btree_id, i->level,

View File

@ -31,14 +31,12 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
u64 sectors = 0;
int ret;
for_each_btree_key(trans, iter, BTREE_ID_extents,
SPOS(inum, 0, snapshot), 0, k, ret) {
if (k.k->p.inode != inum)
break;
for_each_btree_key_upto(trans, iter, BTREE_ID_extents,
SPOS(inum, 0, snapshot),
POS(inum, U64_MAX),
0, k, ret)
if (bkey_extent_is_allocation(k.k))
sectors += k.k->size;
}
bch2_trans_iter_exit(trans, &iter);
@ -54,11 +52,10 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
u64 subdirs = 0;
int ret;
for_each_btree_key(trans, iter, BTREE_ID_dirents,
SPOS(inum, 0, snapshot), 0, k, ret) {
if (k.k->p.inode != inum)
break;
for_each_btree_key_upto(trans, iter, BTREE_ID_dirents,
SPOS(inum, 0, snapshot),
POS(inum, U64_MAX),
0, k, ret) {
if (k.k->type != KEY_TYPE_dirent)
continue;
@ -66,7 +63,6 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
if (d.v->d_type == DT_DIR)
subdirs++;
}
bch2_trans_iter_exit(trans, &iter);
return ret ?: subdirs;

View File

@ -1188,8 +1188,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
if (!last_seq) {
fsck_err(c, "journal read done, but no entries found after dropping non-flushes");
ret = -1;
goto err;
return 0;
}
bch_info(c, "journal read done, replaying entries %llu-%llu",

View File

@ -1118,6 +1118,13 @@ int bch2_fs_recovery(struct bch_fs *c)
if (ret)
goto err;
/*
* note: cmd_list_journal needs the blacklist table fully up to date so
* it can asterisk ignored journal entries:
*/
if (c->opts.read_journal_only)
goto out;
genradix_for_each_reverse(&c->journal_entries, iter, i)
if (*i && !(*i)->ignore) {
last_journal_entry = &(*i)->j;
@ -1189,13 +1196,6 @@ use_clean:
}
}
/*
* note: cmd_list_journal needs the blacklist table fully up to date so
* it can asterisk ignored journal entries:
*/
if (c->opts.read_journal_only)
goto out;
ret = bch2_fs_journal_start(&c->journal, journal_seq);
if (ret)
goto err;

View File

@ -582,20 +582,6 @@ static inline void memmove_u64s_down(void *dst, const void *src,
__memmove_u64s_down(dst, src, u64s);
}
static inline void __memmove_u64s_down_small(void *dst, const void *src,
unsigned u64s)
{
memcpy_u64s_small(dst, src, u64s);
}
static inline void memmove_u64s_down_small(void *dst, const void *src,
unsigned u64s)
{
EBUG_ON(dst > src);
__memmove_u64s_down_small(dst, src, u64s);
}
static inline void __memmove_u64s_up_small(void *_dst, const void *_src,
unsigned u64s)
{