Update bcachefs sources to 070ec8d07b bcachefs: Snapshot depth, skiplist fields

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-07-10 14:17:18 -04:00
parent 73bf371f4c
commit 21ae8a4b71
12 changed files with 355 additions and 103 deletions

View File

@ -1 +1 @@
fb39031ade476044b4d89e6a8f20de8e025be39c
070ec8d07bcab34fde39499a79b9da6f4254ec7c

View File

@ -28,6 +28,7 @@
#include <linux/bit_spinlock.h>
#define BIT(nr) (1UL << (nr))
#define BIT_ULL(nr) (1ULL << (nr))
#include <linux/rhashtable-types.h>
/*

View File

@ -660,12 +660,11 @@ enum bch_write_ref {
#define PASS_FSCK BIT(1)
#define PASS_UNCLEAN BIT(2)
#define PASS_ALWAYS BIT(3)
#define PASS_UPGRADE(v) ((v) << 4)
#define BCH_RECOVERY_PASSES() \
x(alloc_read, PASS_ALWAYS) \
x(stripes_read, PASS_ALWAYS) \
x(initialize_subvolumes, PASS_UPGRADE(bcachefs_metadata_version_snapshot_2)) \
x(initialize_subvolumes, 0) \
x(snapshots_read, PASS_ALWAYS) \
x(check_allocations, PASS_FSCK) \
x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \
@ -677,8 +676,8 @@ enum bch_write_ref {
x(check_extents_to_backpointers,PASS_FSCK) \
x(check_alloc_to_lru_refs, PASS_FSCK) \
x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \
x(bucket_gens_init, PASS_UPGRADE(bcachefs_metadata_version_bucket_gens)) \
x(fs_upgrade_for_subvolumes, PASS_UPGRADE(bcachefs_metadata_version_snapshot_2)) \
x(bucket_gens_init, 0) \
x(fs_upgrade_for_subvolumes, 0) \
x(check_snapshot_trees, PASS_FSCK) \
x(check_snapshots, PASS_FSCK) \
x(check_subvols, PASS_FSCK) \
@ -690,7 +689,7 @@ enum bch_write_ref {
x(check_root, PASS_FSCK) \
x(check_directory_structure, PASS_FSCK) \
x(check_nlinks, PASS_FSCK) \
x(fix_reflink_p, PASS_UPGRADE(bcachefs_metadata_version_reflink_p_fix)) \
x(fix_reflink_p, 0) \
enum bch_recovery_pass {
#define x(n, when) BCH_RECOVERY_PASS_##n,
@ -1033,6 +1032,8 @@ struct bch_fs {
u64 journal_replay_seq_start;
u64 journal_replay_seq_end;
enum bch_recovery_pass curr_recovery_pass;
/* bitmap of explicitly enabled recovery passes: */
u64 recovery_passes_explicit;
/* DEBUG JUNK */
struct dentry *fs_debug_dir;
@ -1177,12 +1178,6 @@ static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
return dev < c->sb.nr_devices && c->devs[dev];
}
static inline bool bch2_version_upgrading_to(const struct bch_fs *c, unsigned new_version)
{
return c->sb.version_upgrade_complete < new_version &&
c->sb.version >= new_version;
}
#define BKEY_PADDED_ONSTACK(key, pad) \
struct { struct bkey_i key; __u64 key ## _pad[pad]; }

View File

@ -1148,6 +1148,8 @@ struct bch_snapshot {
__le32 children[2];
__le32 subvol;
__le32 tree;
__le32 depth;
__le32 skip[3];
};
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
@ -1578,32 +1580,60 @@ struct bch_sb_field_journal_seq_blacklist {
#define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10)))
#define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0)
#define BCH_METADATA_VERSIONS() \
x(bkey_renumber, BCH_VERSION(0, 10)) \
x(inode_btree_change, BCH_VERSION(0, 11)) \
x(snapshot, BCH_VERSION(0, 12)) \
x(inode_backpointers, BCH_VERSION(0, 13)) \
x(btree_ptr_sectors_written, BCH_VERSION(0, 14)) \
x(snapshot_2, BCH_VERSION(0, 15)) \
x(reflink_p_fix, BCH_VERSION(0, 16)) \
x(subvol_dirent, BCH_VERSION(0, 17)) \
x(inode_v2, BCH_VERSION(0, 18)) \
x(freespace, BCH_VERSION(0, 19)) \
x(alloc_v4, BCH_VERSION(0, 20)) \
x(new_data_types, BCH_VERSION(0, 21)) \
x(backpointers, BCH_VERSION(0, 22)) \
x(inode_v3, BCH_VERSION(0, 23)) \
x(unwritten_extents, BCH_VERSION(0, 24)) \
x(bucket_gens, BCH_VERSION(0, 25)) \
x(lru_v2, BCH_VERSION(0, 26)) \
x(fragmentation_lru, BCH_VERSION(0, 27)) \
x(no_bps_in_alloc_keys, BCH_VERSION(0, 28)) \
x(snapshot_trees, BCH_VERSION(0, 29)) \
x(major_minor, BCH_VERSION(1, 0))
#define RECOVERY_PASS_ALL_FSCK (1ULL << 63)
#define BCH_METADATA_VERSIONS() \
x(bkey_renumber, BCH_VERSION(0, 10), \
RECOVERY_PASS_ALL_FSCK) \
x(inode_btree_change, BCH_VERSION(0, 11), \
RECOVERY_PASS_ALL_FSCK) \
x(snapshot, BCH_VERSION(0, 12), \
RECOVERY_PASS_ALL_FSCK) \
x(inode_backpointers, BCH_VERSION(0, 13), \
RECOVERY_PASS_ALL_FSCK) \
x(btree_ptr_sectors_written, BCH_VERSION(0, 14), \
RECOVERY_PASS_ALL_FSCK) \
x(snapshot_2, BCH_VERSION(0, 15), \
BIT_ULL(BCH_RECOVERY_PASS_fs_upgrade_for_subvolumes)| \
BIT_ULL(BCH_RECOVERY_PASS_initialize_subvolumes)| \
RECOVERY_PASS_ALL_FSCK) \
x(reflink_p_fix, BCH_VERSION(0, 16), \
BIT_ULL(BCH_RECOVERY_PASS_fix_reflink_p)) \
x(subvol_dirent, BCH_VERSION(0, 17), \
RECOVERY_PASS_ALL_FSCK) \
x(inode_v2, BCH_VERSION(0, 18), \
RECOVERY_PASS_ALL_FSCK) \
x(freespace, BCH_VERSION(0, 19), \
RECOVERY_PASS_ALL_FSCK) \
x(alloc_v4, BCH_VERSION(0, 20), \
RECOVERY_PASS_ALL_FSCK) \
x(new_data_types, BCH_VERSION(0, 21), \
RECOVERY_PASS_ALL_FSCK) \
x(backpointers, BCH_VERSION(0, 22), \
RECOVERY_PASS_ALL_FSCK) \
x(inode_v3, BCH_VERSION(0, 23), \
RECOVERY_PASS_ALL_FSCK) \
x(unwritten_extents, BCH_VERSION(0, 24), \
RECOVERY_PASS_ALL_FSCK) \
x(bucket_gens, BCH_VERSION(0, 25), \
BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \
RECOVERY_PASS_ALL_FSCK) \
x(lru_v2, BCH_VERSION(0, 26), \
RECOVERY_PASS_ALL_FSCK) \
x(fragmentation_lru, BCH_VERSION(0, 27), \
RECOVERY_PASS_ALL_FSCK) \
x(no_bps_in_alloc_keys, BCH_VERSION(0, 28), \
RECOVERY_PASS_ALL_FSCK) \
x(snapshot_trees, BCH_VERSION(0, 29), \
RECOVERY_PASS_ALL_FSCK) \
x(major_minor, BCH_VERSION(1, 0), \
0) \
x(snapshot_skiplists, BCH_VERSION(1, 1), \
BIT_ULL(BCH_RECOVERY_PASS_check_snapshots))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
#define x(t, n) bcachefs_metadata_version_##t = n,
#define x(t, n, upgrade_passes) bcachefs_metadata_version_##t = n,
BCH_METADATA_VERSIONS()
#undef x
bcachefs_metadata_version_max

View File

@ -795,6 +795,14 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags)))
#define for_each_btree_key_reverse_commit(_trans, _iter, _btree_id, \
_start, _iter_flags, _k, \
_disk_res, _journal_seq, _commit_flags,\
_do) \
for_each_btree_key_reverse(_trans, _iter, _btree_id, _start, _iter_flags, _k,\
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags)))
#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \
_start, _end, _iter_flags, _k, \
_disk_res, _journal_seq, _commit_flags,\

View File

@ -594,10 +594,21 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
unsigned iter_flags =
BTREE_ITER_INTENT|
BTREE_ITER_NOT_EXTENTS;
unsigned update_flags = BTREE_TRIGGER_NORUN;
int ret;
/*
* BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to
* keep the key cache coherent with the underlying btree. Nothing
* besides the allocator is doing updates yet so we don't need key cache
* coherency for non-alloc btrees, and key cache fills for snapshots
* btrees use BTREE_ITER_FILTER_SNAPSHOTS, which isn't available until
* the snapshots recovery pass runs.
*/
if (!k->level && k->btree_id == BTREE_ID_alloc)
iter_flags |= BTREE_ITER_CACHED;
else
update_flags |= BTREE_UPDATE_KEY_CACHE_RECLAIM;
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, k->level,
@ -610,7 +621,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
if (k->overwritten)
goto out;
ret = bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN);
ret = bch2_trans_update(trans, &iter, k->k, update_flags);
out:
bch2_trans_iter_exit(trans, &iter);
return ret;
@ -1115,6 +1126,7 @@ static void check_version_upgrade(struct bch_fs *c)
unsigned latest_version = bcachefs_metadata_version_current;
unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
unsigned new_version = 0;
u64 recovery_passes;
if (old_version < bcachefs_metadata_required_upgrade_below) {
if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
@ -1161,13 +1173,16 @@ static void check_version_upgrade(struct bch_fs *c)
bch2_version_to_text(&buf, new_version);
prt_newline(&buf);
prt_str(&buf, "fsck required");
recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version);
if (recovery_passes) {
prt_str(&buf, "fsck required");
c->recovery_passes_explicit |= recovery_passes;
c->opts.fix_errors = FSCK_OPT_YES;
}
bch_info(c, "%s", buf.buf);
c->opts.fsck = true;
c->opts.fix_errors = FSCK_OPT_YES;
mutex_lock(&c->sb_lock);
bch2_sb_upgrade(c, new_version);
mutex_unlock(&c->sb_lock);
@ -1199,21 +1214,30 @@ static struct recovery_pass_fn recovery_passes[] = {
#undef x
};
u64 bch2_fsck_recovery_passes(void)
{
u64 ret = 0;
for (unsigned i = 0; i < ARRAY_SIZE(recovery_passes); i++)
if (recovery_passes[i].when & PASS_FSCK)
ret |= BIT_ULL(i);
return ret;
}
static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
{
struct recovery_pass_fn *p = recovery_passes + c->curr_recovery_pass;
if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
return false;
if (c->recovery_passes_explicit & BIT_ULL(pass))
return true;
if ((p->when & PASS_FSCK) && c->opts.fsck)
return true;
if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
return true;
if (p->when & PASS_ALWAYS)
return true;
if (p->when >= PASS_UPGRADE(0) &&
bch2_version_upgrading_to(c, p->when >> 4))
return true;
return false;
}
@ -1297,7 +1321,7 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
if (!c->opts.nochanges)
if (c->opts.fsck || !(c->opts.nochanges && c->opts.norecovery))
check_version_upgrade(c);
if (c->opts.fsck && c->opts.norecovery) {

View File

@ -52,6 +52,8 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
void bch2_journal_keys_free(struct journal_keys *);
void bch2_journal_entries_free(struct bch_fs *);
u64 bch2_fsck_recovery_passes(void);
int bch2_fs_recovery(struct bch_fs *);
int bch2_fs_initialize(struct bch_fs *);

View File

@ -8,8 +8,41 @@
#include "fs.h"
#include "subvolume.h"
#include <linux/random.h>
static int bch2_subvolume_delete(struct btree_trans *, u32);
static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor)
{
struct snapshot_t *s = snapshot_t(c, id);
if (s->skip[2] <= ancestor)
return s->skip[2];
if (s->skip[1] <= ancestor)
return s->skip[1];
if (s->skip[0] <= ancestor)
return s->skip[0];
return s->parent;
}
bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots);
while (id && id < ancestor)
id = get_ancestor_below(c, id, ancestor);
return id == ancestor;
}
static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
{
while (id && id < ancestor)
id = snapshot_t(c, id)->parent;
return id == ancestor;
}
/* Snapshot tree: */
void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
@ -140,6 +173,25 @@ int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k,
}
}
if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) {
prt_printf(err, "skiplist not normalized");
return -BCH_ERR_invalid_bkey;
}
for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
id = le32_to_cpu(s.v->skip[i]);
if (!id != !s.v->parent ||
(s.v->parent &&
id <= k.k->p.offset)) {
prt_printf(err, "bad skiplist node %u)", id);
return -BCH_ERR_invalid_bkey;
}
}
}
return 0;
}
@ -161,10 +213,17 @@ int bch2_mark_snapshot(struct btree_trans *trans,
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
t->parent = le32_to_cpu(s.v->parent);
t->skip[0] = le32_to_cpu(s.v->skip[0]);
t->skip[1] = le32_to_cpu(s.v->skip[1]);
t->skip[2] = le32_to_cpu(s.v->skip[2]);
t->depth = le32_to_cpu(s.v->depth);
t->children[0] = le32_to_cpu(s.v->children[0]);
t->children[1] = le32_to_cpu(s.v->children[1]);
t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
t->tree = le32_to_cpu(s.v->tree);
if (BCH_SNAPSHOT_DELETED(s.v))
set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
} else {
t->parent = 0;
t->children[0] = 0;
@ -370,9 +429,9 @@ static int check_snapshot_tree(struct btree_trans *trans,
"snapshot tree points to missing subvolume:\n %s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
fsck_err_on(!bch2_snapshot_is_ancestor(c,
le32_to_cpu(subvol.snapshot),
root_id), c,
fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
le32_to_cpu(subvol.snapshot),
root_id), c,
"snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
@ -441,7 +500,48 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans,
if (ret)
return ret;
return bch2_snapshot_is_ancestor(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
}
static u32 snapshot_rand_ancestor_get(struct bch_fs *c, u32 id)
{
struct snapshot_t *s;
if (!id)
return 0;
s = snapshot_t(c, id);
if (!s->parent)
return id;
return bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
}
static int snapshot_rand_ancestor_good(struct btree_trans *trans,
struct bch_snapshot s)
{
struct bch_snapshot a;
unsigned i;
int ret;
for (i = 0; i < 3; i++) {
if (!s.parent != !s.skip[i])
return false;
if (!s.parent)
continue;
ret = snapshot_lookup(trans, le32_to_cpu(s.skip[i]), &a);
if (bch2_err_matches(ret, ENOENT))
return false;
if (ret)
return ret;
if (a.tree != s.tree)
return false;
}
return true;
}
/*
@ -451,14 +551,15 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans,
*/
static int snapshot_tree_ptr_repair(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c_snapshot *s)
struct bkey_s_c k,
struct bch_snapshot *s)
{
struct bch_fs *c = trans->c;
struct btree_iter root_iter;
struct bch_snapshot_tree s_t;
struct bkey_s_c_snapshot root;
struct bkey_i_snapshot *u;
u32 root_id = bch2_snapshot_root(c, s->k->p.offset), tree_id;
u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id;
int ret;
root = bch2_bkey_get_iter_typed(trans, &root_iter,
@ -484,18 +585,18 @@ static int snapshot_tree_ptr_repair(struct btree_trans *trans,
goto err;
u->v.tree = cpu_to_le32(tree_id);
if (s->k->p.snapshot == root_id)
*s = snapshot_i_to_s_c(u);
if (k.k->p.offset == root_id)
*s = u->v;
}
if (s->k->p.snapshot != root_id) {
u = bch2_bkey_make_mut_typed(trans, iter, &s->s_c, 0, snapshot);
if (k.k->p.offset != root_id) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto err;
u->v.tree = cpu_to_le32(tree_id);
*s = snapshot_i_to_s_c(u);
*s = u->v;
}
err:
bch2_trans_iter_exit(trans, &root_iter);
@ -507,9 +608,14 @@ static int check_snapshot(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct bkey_s_c_snapshot s;
struct bch_snapshot s;
struct bch_subvolume subvol;
struct bch_snapshot v;
struct bkey_i_snapshot *u;
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
struct snapshot_t *parent = parent_id
? snapshot_t(c, parent_id)
: NULL;
struct printbuf buf = PRINTBUF;
bool should_have_subvol;
u32 i, id;
@ -518,94 +624,119 @@ static int check_snapshot(struct btree_trans *trans,
if (k.k->type != KEY_TYPE_snapshot)
return 0;
s = bkey_s_c_to_snapshot(k);
id = le32_to_cpu(s.v->parent);
memset(&s, 0, sizeof(s));
memcpy(&s, k.v, bkey_val_bytes(k.k));
id = le32_to_cpu(s.parent);
if (id) {
ret = snapshot_lookup(trans, id, &v);
if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot with nonexistent parent:\n %s",
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
if (ret)
goto err;
if (le32_to_cpu(v.children[0]) != s.k->p.offset &&
le32_to_cpu(v.children[1]) != s.k->p.offset) {
if (le32_to_cpu(v.children[0]) != k.k->p.offset &&
le32_to_cpu(v.children[1]) != k.k->p.offset) {
bch_err(c, "snapshot parent %u missing pointer to child %llu",
id, s.k->p.offset);
id, k.k->p.offset);
ret = -EINVAL;
goto err;
}
}
for (i = 0; i < 2 && s.v->children[i]; i++) {
id = le32_to_cpu(s.v->children[i]);
for (i = 0; i < 2 && s.children[i]; i++) {
id = le32_to_cpu(s.children[i]);
ret = snapshot_lookup(trans, id, &v);
if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot node %llu has nonexistent child %u",
s.k->p.offset, id);
k.k->p.offset, id);
if (ret)
goto err;
if (le32_to_cpu(v.parent) != s.k->p.offset) {
if (le32_to_cpu(v.parent) != k.k->p.offset) {
bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
id, le32_to_cpu(v.parent), s.k->p.offset);
id, le32_to_cpu(v.parent), k.k->p.offset);
ret = -EINVAL;
goto err;
}
}
should_have_subvol = BCH_SNAPSHOT_SUBVOL(s.v) &&
!BCH_SNAPSHOT_DELETED(s.v);
should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
!BCH_SNAPSHOT_DELETED(&s);
if (should_have_subvol) {
id = le32_to_cpu(s.v->subvol);
id = le32_to_cpu(s.subvol);
ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
if (ret)
goto err;
if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) {
if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) {
bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
s.k->p.offset);
k.k->p.offset);
ret = -EINVAL;
goto err;
}
} else {
if (fsck_err_on(s.v->subvol, c, "snapshot should not point to subvol:\n %s",
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
struct bkey_i_snapshot *u = bch2_trans_kmalloc(trans, sizeof(*u));
if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n %s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto err;
bkey_reassemble(&u->k_i, s.s_c);
u->v.subvol = 0;
ret = bch2_trans_update(trans, iter, &u->k_i, 0);
if (ret)
goto err;
s = snapshot_i_to_s_c(u);
s = u->v;
}
}
ret = snapshot_tree_ptr_good(trans, s.k->p.offset, le32_to_cpu(s.v->tree));
ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree));
if (ret < 0)
goto err;
if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s",
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
ret = snapshot_tree_ptr_repair(trans, iter, &s);
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
if (ret)
goto err;
}
ret = 0;
if (BCH_SNAPSHOT_DELETED(s.v))
set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
if (fsck_err_on(le32_to_cpu(s.depth) != (parent ? parent->depth + 1 : 0), c,
"snapshot with incorrect depth fields, should be %u:\n %s",
parent->depth + 1,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto err;
u->v.depth = cpu_to_le32(parent ? parent->depth + 1 : 0);
s = u->v;
}
ret = snapshot_rand_ancestor_good(trans, s);
if (ret < 0)
goto err;
if (fsck_err_on(!ret, c, "snapshot with bad rand_ancestor field:\n %s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto err;
for (i = 0; i < ARRAY_SIZE(u->v.skip); i++)
u->v.skip[i] = cpu_to_le32(snapshot_rand_ancestor_get(c, parent_id));
bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_int);
s = u->v;
}
ret = 0;
err:
fsck_err:
printbuf_exit(&buf);
@ -618,9 +749,13 @@ int bch2_check_snapshots(struct bch_fs *c)
struct bkey_s_c k;
int ret;
/*
* We iterate backwards as checking/fixing the depth field requires that
* the parent's depth already be correct:
*/
ret = bch2_trans_run(c,
for_each_btree_key_commit(&trans, iter,
BTREE_ID_snapshots, POS_MIN,
for_each_btree_key_reverse_commit(&trans, iter,
BTREE_ID_snapshots, POS_MAX,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_snapshot(&trans, &iter, k)));
@ -847,10 +982,12 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
u32 *snapshot_subvols,
unsigned nr_snapids)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_i_snapshot *n;
struct bkey_s_c k;
unsigned i;
unsigned i, j;
u32 depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
@ -880,6 +1017,12 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
n->v.parent = cpu_to_le32(parent);
n->v.subvol = cpu_to_le32(snapshot_subvols[i]);
n->v.tree = cpu_to_le32(tree);
n->v.depth = cpu_to_le32(depth);
for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
n->v.skip[j] = cpu_to_le32(snapshot_rand_ancestor_get(c, parent));
bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_int);
SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,

View File

@ -37,11 +37,36 @@ static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
return genradix_ptr(&c->snapshots, U32_MAX - id);
}
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{
return snapshot_t(c, id)->parent;
}
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
#ifdef CONFIG_BCACHEFS_DEBUG
u32 parent = snapshot_t(c, id)->parent;
if (parent &&
snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1)
panic("id %u depth=%u parent %u depth=%u\n",
id, snapshot_t(c, id)->depth,
parent, snapshot_t(c, parent)->depth);
return parent;
#else
return snapshot_t(c, id)->parent;
#endif
}
static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
{
while (n--)
id = bch2_snapshot_parent(c, id);
return id;
}
static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
{
u32 parent;
@ -84,13 +109,7 @@ static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
return 0;
}
static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
while (id && id < ancestor)
id = bch2_snapshot_parent(c, id);
return id == ancestor;
}
bool bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32);
static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id)
{

View File

@ -8,6 +8,8 @@ typedef DARRAY(u32) snapshot_id_list;
struct snapshot_t {
u32 parent;
u32 skip[3];
u32 depth;
u32 children[2];
u32 subvol; /* Nonzero only if a subvolume points to this node: */
u32 tree;

View File

@ -4,6 +4,7 @@
#include "btree_update_interior.h"
#include "buckets.h"
#include "checksum.h"
#include "counters.h"
#include "disk_groups.h"
#include "ec.h"
#include "error.h"
@ -12,24 +13,29 @@
#include "journal_io.h"
#include "journal_sb.h"
#include "journal_seq_blacklist.h"
#include "recovery.h"
#include "replicas.h"
#include "quota.h"
#include "super-io.h"
#include "super.h"
#include "trace.h"
#include "vstructs.h"
#include "counters.h"
#include <linux/backing-dev.h>
#include <linux/sort.h>
struct bch2_metadata_version_str {
struct bch2_metadata_version {
u16 version;
const char *name;
u64 recovery_passes;
};
static const struct bch2_metadata_version_str bch2_metadata_versions[] = {
#define x(n, v) { .version = v, .name = #n },
static const struct bch2_metadata_version bch2_metadata_versions[] = {
#define x(n, v, _recovery_passes) { \
.version = v, \
.name = #n, \
.recovery_passes = _recovery_passes, \
},
BCH_METADATA_VERSIONS()
#undef x
};
@ -61,6 +67,24 @@ unsigned bch2_latest_compatible_version(unsigned v)
return v;
}
u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
unsigned old_version,
unsigned new_version)
{
u64 ret = 0;
for (const struct bch2_metadata_version *i = bch2_metadata_versions;
i < bch2_metadata_versions + ARRAY_SIZE(bch2_metadata_versions);
i++)
if (i->version > old_version && i->version <= new_version) {
if (i->recovery_passes & RECOVERY_PASS_ALL_FSCK)
ret |= bch2_fsck_recovery_passes();
ret |= i->recovery_passes;
}
return ret &= ~RECOVERY_PASS_ALL_FSCK;
}
const char * const bch2_sb_fields[] = {
#define x(name, nr) #name,
BCH_SB_FIELDS()

View File

@ -18,6 +18,10 @@ static inline bool bch2_version_compatible(u16 version)
void bch2_version_to_text(struct printbuf *, unsigned);
unsigned bch2_latest_compatible_version(unsigned);
u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
unsigned,
unsigned);
struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *,
enum bch_sb_field_type, unsigned);