Update bcachefs sources to 070ec8d07b bcachefs: Snapshot depth, skiplist fields

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-07-10 14:17:18 -04:00
parent 73bf371f4c
commit 21ae8a4b71
12 changed files with 355 additions and 103 deletions

View File

@ -1 +1 @@
fb39031ade476044b4d89e6a8f20de8e025be39c 070ec8d07bcab34fde39499a79b9da6f4254ec7c

View File

@ -28,6 +28,7 @@
#include <linux/bit_spinlock.h> #include <linux/bit_spinlock.h>
#define BIT(nr) (1UL << (nr)) #define BIT(nr) (1UL << (nr))
#define BIT_ULL(nr) (1ULL << (nr))
#include <linux/rhashtable-types.h> #include <linux/rhashtable-types.h>
/* /*

View File

@ -660,12 +660,11 @@ enum bch_write_ref {
#define PASS_FSCK BIT(1) #define PASS_FSCK BIT(1)
#define PASS_UNCLEAN BIT(2) #define PASS_UNCLEAN BIT(2)
#define PASS_ALWAYS BIT(3) #define PASS_ALWAYS BIT(3)
#define PASS_UPGRADE(v) ((v) << 4)
#define BCH_RECOVERY_PASSES() \ #define BCH_RECOVERY_PASSES() \
x(alloc_read, PASS_ALWAYS) \ x(alloc_read, PASS_ALWAYS) \
x(stripes_read, PASS_ALWAYS) \ x(stripes_read, PASS_ALWAYS) \
x(initialize_subvolumes, PASS_UPGRADE(bcachefs_metadata_version_snapshot_2)) \ x(initialize_subvolumes, 0) \
x(snapshots_read, PASS_ALWAYS) \ x(snapshots_read, PASS_ALWAYS) \
x(check_allocations, PASS_FSCK) \ x(check_allocations, PASS_FSCK) \
x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \ x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \
@ -677,8 +676,8 @@ enum bch_write_ref {
x(check_extents_to_backpointers,PASS_FSCK) \ x(check_extents_to_backpointers,PASS_FSCK) \
x(check_alloc_to_lru_refs, PASS_FSCK) \ x(check_alloc_to_lru_refs, PASS_FSCK) \
x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \ x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \
x(bucket_gens_init, PASS_UPGRADE(bcachefs_metadata_version_bucket_gens)) \ x(bucket_gens_init, 0) \
x(fs_upgrade_for_subvolumes, PASS_UPGRADE(bcachefs_metadata_version_snapshot_2)) \ x(fs_upgrade_for_subvolumes, 0) \
x(check_snapshot_trees, PASS_FSCK) \ x(check_snapshot_trees, PASS_FSCK) \
x(check_snapshots, PASS_FSCK) \ x(check_snapshots, PASS_FSCK) \
x(check_subvols, PASS_FSCK) \ x(check_subvols, PASS_FSCK) \
@ -690,7 +689,7 @@ enum bch_write_ref {
x(check_root, PASS_FSCK) \ x(check_root, PASS_FSCK) \
x(check_directory_structure, PASS_FSCK) \ x(check_directory_structure, PASS_FSCK) \
x(check_nlinks, PASS_FSCK) \ x(check_nlinks, PASS_FSCK) \
x(fix_reflink_p, PASS_UPGRADE(bcachefs_metadata_version_reflink_p_fix)) \ x(fix_reflink_p, 0) \
enum bch_recovery_pass { enum bch_recovery_pass {
#define x(n, when) BCH_RECOVERY_PASS_##n, #define x(n, when) BCH_RECOVERY_PASS_##n,
@ -1033,6 +1032,8 @@ struct bch_fs {
u64 journal_replay_seq_start; u64 journal_replay_seq_start;
u64 journal_replay_seq_end; u64 journal_replay_seq_end;
enum bch_recovery_pass curr_recovery_pass; enum bch_recovery_pass curr_recovery_pass;
/* bitmap of explicitly enabled recovery passes: */
u64 recovery_passes_explicit;
/* DEBUG JUNK */ /* DEBUG JUNK */
struct dentry *fs_debug_dir; struct dentry *fs_debug_dir;
@ -1177,12 +1178,6 @@ static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
return dev < c->sb.nr_devices && c->devs[dev]; return dev < c->sb.nr_devices && c->devs[dev];
} }
static inline bool bch2_version_upgrading_to(const struct bch_fs *c, unsigned new_version)
{
return c->sb.version_upgrade_complete < new_version &&
c->sb.version >= new_version;
}
#define BKEY_PADDED_ONSTACK(key, pad) \ #define BKEY_PADDED_ONSTACK(key, pad) \
struct { struct bkey_i key; __u64 key ## _pad[pad]; } struct { struct bkey_i key; __u64 key ## _pad[pad]; }

View File

@ -1148,6 +1148,8 @@ struct bch_snapshot {
__le32 children[2]; __le32 children[2];
__le32 subvol; __le32 subvol;
__le32 tree; __le32 tree;
__le32 depth;
__le32 skip[3];
}; };
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1) LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
@ -1578,32 +1580,60 @@ struct bch_sb_field_journal_seq_blacklist {
#define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10))) #define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10)))
#define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0) #define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0)
#define BCH_METADATA_VERSIONS() \ #define RECOVERY_PASS_ALL_FSCK (1ULL << 63)
x(bkey_renumber, BCH_VERSION(0, 10)) \
x(inode_btree_change, BCH_VERSION(0, 11)) \ #define BCH_METADATA_VERSIONS() \
x(snapshot, BCH_VERSION(0, 12)) \ x(bkey_renumber, BCH_VERSION(0, 10), \
x(inode_backpointers, BCH_VERSION(0, 13)) \ RECOVERY_PASS_ALL_FSCK) \
x(btree_ptr_sectors_written, BCH_VERSION(0, 14)) \ x(inode_btree_change, BCH_VERSION(0, 11), \
x(snapshot_2, BCH_VERSION(0, 15)) \ RECOVERY_PASS_ALL_FSCK) \
x(reflink_p_fix, BCH_VERSION(0, 16)) \ x(snapshot, BCH_VERSION(0, 12), \
x(subvol_dirent, BCH_VERSION(0, 17)) \ RECOVERY_PASS_ALL_FSCK) \
x(inode_v2, BCH_VERSION(0, 18)) \ x(inode_backpointers, BCH_VERSION(0, 13), \
x(freespace, BCH_VERSION(0, 19)) \ RECOVERY_PASS_ALL_FSCK) \
x(alloc_v4, BCH_VERSION(0, 20)) \ x(btree_ptr_sectors_written, BCH_VERSION(0, 14), \
x(new_data_types, BCH_VERSION(0, 21)) \ RECOVERY_PASS_ALL_FSCK) \
x(backpointers, BCH_VERSION(0, 22)) \ x(snapshot_2, BCH_VERSION(0, 15), \
x(inode_v3, BCH_VERSION(0, 23)) \ BIT_ULL(BCH_RECOVERY_PASS_fs_upgrade_for_subvolumes)| \
x(unwritten_extents, BCH_VERSION(0, 24)) \ BIT_ULL(BCH_RECOVERY_PASS_initialize_subvolumes)| \
x(bucket_gens, BCH_VERSION(0, 25)) \ RECOVERY_PASS_ALL_FSCK) \
x(lru_v2, BCH_VERSION(0, 26)) \ x(reflink_p_fix, BCH_VERSION(0, 16), \
x(fragmentation_lru, BCH_VERSION(0, 27)) \ BIT_ULL(BCH_RECOVERY_PASS_fix_reflink_p)) \
x(no_bps_in_alloc_keys, BCH_VERSION(0, 28)) \ x(subvol_dirent, BCH_VERSION(0, 17), \
x(snapshot_trees, BCH_VERSION(0, 29)) \ RECOVERY_PASS_ALL_FSCK) \
x(major_minor, BCH_VERSION(1, 0)) x(inode_v2, BCH_VERSION(0, 18), \
RECOVERY_PASS_ALL_FSCK) \
x(freespace, BCH_VERSION(0, 19), \
RECOVERY_PASS_ALL_FSCK) \
x(alloc_v4, BCH_VERSION(0, 20), \
RECOVERY_PASS_ALL_FSCK) \
x(new_data_types, BCH_VERSION(0, 21), \
RECOVERY_PASS_ALL_FSCK) \
x(backpointers, BCH_VERSION(0, 22), \
RECOVERY_PASS_ALL_FSCK) \
x(inode_v3, BCH_VERSION(0, 23), \
RECOVERY_PASS_ALL_FSCK) \
x(unwritten_extents, BCH_VERSION(0, 24), \
RECOVERY_PASS_ALL_FSCK) \
x(bucket_gens, BCH_VERSION(0, 25), \
BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \
RECOVERY_PASS_ALL_FSCK) \
x(lru_v2, BCH_VERSION(0, 26), \
RECOVERY_PASS_ALL_FSCK) \
x(fragmentation_lru, BCH_VERSION(0, 27), \
RECOVERY_PASS_ALL_FSCK) \
x(no_bps_in_alloc_keys, BCH_VERSION(0, 28), \
RECOVERY_PASS_ALL_FSCK) \
x(snapshot_trees, BCH_VERSION(0, 29), \
RECOVERY_PASS_ALL_FSCK) \
x(major_minor, BCH_VERSION(1, 0), \
0) \
x(snapshot_skiplists, BCH_VERSION(1, 1), \
BIT_ULL(BCH_RECOVERY_PASS_check_snapshots))
enum bcachefs_metadata_version { enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9, bcachefs_metadata_version_min = 9,
#define x(t, n) bcachefs_metadata_version_##t = n, #define x(t, n, upgrade_passes) bcachefs_metadata_version_##t = n,
BCH_METADATA_VERSIONS() BCH_METADATA_VERSIONS()
#undef x #undef x
bcachefs_metadata_version_max bcachefs_metadata_version_max

View File

@ -795,6 +795,14 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags))) (_journal_seq), (_commit_flags)))
#define for_each_btree_key_reverse_commit(_trans, _iter, _btree_id, \
_start, _iter_flags, _k, \
_disk_res, _journal_seq, _commit_flags,\
_do) \
for_each_btree_key_reverse(_trans, _iter, _btree_id, _start, _iter_flags, _k,\
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags)))
#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \ #define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \
_start, _end, _iter_flags, _k, \ _start, _end, _iter_flags, _k, \
_disk_res, _journal_seq, _commit_flags,\ _disk_res, _journal_seq, _commit_flags,\

View File

@ -594,10 +594,21 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
unsigned iter_flags = unsigned iter_flags =
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_NOT_EXTENTS; BTREE_ITER_NOT_EXTENTS;
unsigned update_flags = BTREE_TRIGGER_NORUN;
int ret; int ret;
/*
* BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to
* keep the key cache coherent with the underlying btree. Nothing
* besides the allocator is doing updates yet so we don't need key cache
* coherency for non-alloc btrees, and key cache fills for snapshots
* btrees use BTREE_ITER_FILTER_SNAPSHOTS, which isn't available until
* the snapshots recovery pass runs.
*/
if (!k->level && k->btree_id == BTREE_ID_alloc) if (!k->level && k->btree_id == BTREE_ID_alloc)
iter_flags |= BTREE_ITER_CACHED; iter_flags |= BTREE_ITER_CACHED;
else
update_flags |= BTREE_UPDATE_KEY_CACHE_RECLAIM;
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, k->level, BTREE_MAX_DEPTH, k->level,
@ -610,7 +621,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
if (k->overwritten) if (k->overwritten)
goto out; goto out;
ret = bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN); ret = bch2_trans_update(trans, &iter, k->k, update_flags);
out: out:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
@ -1115,6 +1126,7 @@ static void check_version_upgrade(struct bch_fs *c)
unsigned latest_version = bcachefs_metadata_version_current; unsigned latest_version = bcachefs_metadata_version_current;
unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
unsigned new_version = 0; unsigned new_version = 0;
u64 recovery_passes;
if (old_version < bcachefs_metadata_required_upgrade_below) { if (old_version < bcachefs_metadata_required_upgrade_below) {
if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
@ -1161,13 +1173,16 @@ static void check_version_upgrade(struct bch_fs *c)
bch2_version_to_text(&buf, new_version); bch2_version_to_text(&buf, new_version);
prt_newline(&buf); prt_newline(&buf);
prt_str(&buf, "fsck required"); recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version);
if (recovery_passes) {
prt_str(&buf, "fsck required");
c->recovery_passes_explicit |= recovery_passes;
c->opts.fix_errors = FSCK_OPT_YES;
}
bch_info(c, "%s", buf.buf); bch_info(c, "%s", buf.buf);
c->opts.fsck = true;
c->opts.fix_errors = FSCK_OPT_YES;
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
bch2_sb_upgrade(c, new_version); bch2_sb_upgrade(c, new_version);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
@ -1199,21 +1214,30 @@ static struct recovery_pass_fn recovery_passes[] = {
#undef x #undef x
}; };
u64 bch2_fsck_recovery_passes(void)
{
u64 ret = 0;
for (unsigned i = 0; i < ARRAY_SIZE(recovery_passes); i++)
if (recovery_passes[i].when & PASS_FSCK)
ret |= BIT_ULL(i);
return ret;
}
static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
{ {
struct recovery_pass_fn *p = recovery_passes + c->curr_recovery_pass; struct recovery_pass_fn *p = recovery_passes + c->curr_recovery_pass;
if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read) if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
return false; return false;
if (c->recovery_passes_explicit & BIT_ULL(pass))
return true;
if ((p->when & PASS_FSCK) && c->opts.fsck) if ((p->when & PASS_FSCK) && c->opts.fsck)
return true; return true;
if ((p->when & PASS_UNCLEAN) && !c->sb.clean) if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
return true; return true;
if (p->when & PASS_ALWAYS) if (p->when & PASS_ALWAYS)
return true; return true;
if (p->when >= PASS_UPGRADE(0) &&
bch2_version_upgrading_to(c, p->when >> 4))
return true;
return false; return false;
} }
@ -1297,7 +1321,7 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err; goto err;
} }
if (!c->opts.nochanges) if (c->opts.fsck || !(c->opts.nochanges && c->opts.norecovery))
check_version_upgrade(c); check_version_upgrade(c);
if (c->opts.fsck && c->opts.norecovery) { if (c->opts.fsck && c->opts.norecovery) {

View File

@ -52,6 +52,8 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
void bch2_journal_keys_free(struct journal_keys *); void bch2_journal_keys_free(struct journal_keys *);
void bch2_journal_entries_free(struct bch_fs *); void bch2_journal_entries_free(struct bch_fs *);
u64 bch2_fsck_recovery_passes(void);
int bch2_fs_recovery(struct bch_fs *); int bch2_fs_recovery(struct bch_fs *);
int bch2_fs_initialize(struct bch_fs *); int bch2_fs_initialize(struct bch_fs *);

View File

@ -8,8 +8,41 @@
#include "fs.h" #include "fs.h"
#include "subvolume.h" #include "subvolume.h"
#include <linux/random.h>
static int bch2_subvolume_delete(struct btree_trans *, u32); static int bch2_subvolume_delete(struct btree_trans *, u32);
static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor)
{
struct snapshot_t *s = snapshot_t(c, id);
if (s->skip[2] <= ancestor)
return s->skip[2];
if (s->skip[1] <= ancestor)
return s->skip[1];
if (s->skip[0] <= ancestor)
return s->skip[0];
return s->parent;
}
bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots);
while (id && id < ancestor)
id = get_ancestor_below(c, id, ancestor);
return id == ancestor;
}
static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
{
while (id && id < ancestor)
id = snapshot_t(c, id)->parent;
return id == ancestor;
}
/* Snapshot tree: */ /* Snapshot tree: */
void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
@ -140,6 +173,25 @@ int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k,
} }
} }
if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) {
prt_printf(err, "skiplist not normalized");
return -BCH_ERR_invalid_bkey;
}
for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
id = le32_to_cpu(s.v->skip[i]);
if (!id != !s.v->parent ||
(s.v->parent &&
id <= k.k->p.offset)) {
prt_printf(err, "bad skiplist node %u)", id);
return -BCH_ERR_invalid_bkey;
}
}
}
return 0; return 0;
} }
@ -161,10 +213,17 @@ int bch2_mark_snapshot(struct btree_trans *trans,
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
t->parent = le32_to_cpu(s.v->parent); t->parent = le32_to_cpu(s.v->parent);
t->skip[0] = le32_to_cpu(s.v->skip[0]);
t->skip[1] = le32_to_cpu(s.v->skip[1]);
t->skip[2] = le32_to_cpu(s.v->skip[2]);
t->depth = le32_to_cpu(s.v->depth);
t->children[0] = le32_to_cpu(s.v->children[0]); t->children[0] = le32_to_cpu(s.v->children[0]);
t->children[1] = le32_to_cpu(s.v->children[1]); t->children[1] = le32_to_cpu(s.v->children[1]);
t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0; t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
t->tree = le32_to_cpu(s.v->tree); t->tree = le32_to_cpu(s.v->tree);
if (BCH_SNAPSHOT_DELETED(s.v))
set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
} else { } else {
t->parent = 0; t->parent = 0;
t->children[0] = 0; t->children[0] = 0;
@ -370,9 +429,9 @@ static int check_snapshot_tree(struct btree_trans *trans,
"snapshot tree points to missing subvolume:\n %s", "snapshot tree points to missing subvolume:\n %s",
(printbuf_reset(&buf), (printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
fsck_err_on(!bch2_snapshot_is_ancestor(c, fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
le32_to_cpu(subvol.snapshot), le32_to_cpu(subvol.snapshot),
root_id), c, root_id), c,
"snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s", "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s",
(printbuf_reset(&buf), (printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
@ -441,7 +500,48 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
return bch2_snapshot_is_ancestor(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot)); return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
}
static u32 snapshot_rand_ancestor_get(struct bch_fs *c, u32 id)
{
struct snapshot_t *s;
if (!id)
return 0;
s = snapshot_t(c, id);
if (!s->parent)
return id;
return bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
}
static int snapshot_rand_ancestor_good(struct btree_trans *trans,
struct bch_snapshot s)
{
struct bch_snapshot a;
unsigned i;
int ret;
for (i = 0; i < 3; i++) {
if (!s.parent != !s.skip[i])
return false;
if (!s.parent)
continue;
ret = snapshot_lookup(trans, le32_to_cpu(s.skip[i]), &a);
if (bch2_err_matches(ret, ENOENT))
return false;
if (ret)
return ret;
if (a.tree != s.tree)
return false;
}
return true;
} }
/* /*
@ -451,14 +551,15 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans,
*/ */
static int snapshot_tree_ptr_repair(struct btree_trans *trans, static int snapshot_tree_ptr_repair(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_s_c_snapshot *s) struct bkey_s_c k,
struct bch_snapshot *s)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter root_iter; struct btree_iter root_iter;
struct bch_snapshot_tree s_t; struct bch_snapshot_tree s_t;
struct bkey_s_c_snapshot root; struct bkey_s_c_snapshot root;
struct bkey_i_snapshot *u; struct bkey_i_snapshot *u;
u32 root_id = bch2_snapshot_root(c, s->k->p.offset), tree_id; u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id;
int ret; int ret;
root = bch2_bkey_get_iter_typed(trans, &root_iter, root = bch2_bkey_get_iter_typed(trans, &root_iter,
@ -484,18 +585,18 @@ static int snapshot_tree_ptr_repair(struct btree_trans *trans,
goto err; goto err;
u->v.tree = cpu_to_le32(tree_id); u->v.tree = cpu_to_le32(tree_id);
if (s->k->p.snapshot == root_id) if (k.k->p.offset == root_id)
*s = snapshot_i_to_s_c(u); *s = u->v;
} }
if (s->k->p.snapshot != root_id) { if (k.k->p.offset != root_id) {
u = bch2_bkey_make_mut_typed(trans, iter, &s->s_c, 0, snapshot); u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u); ret = PTR_ERR_OR_ZERO(u);
if (ret) if (ret)
goto err; goto err;
u->v.tree = cpu_to_le32(tree_id); u->v.tree = cpu_to_le32(tree_id);
*s = snapshot_i_to_s_c(u); *s = u->v;
} }
err: err:
bch2_trans_iter_exit(trans, &root_iter); bch2_trans_iter_exit(trans, &root_iter);
@ -507,9 +608,14 @@ static int check_snapshot(struct btree_trans *trans,
struct bkey_s_c k) struct bkey_s_c k)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_s_c_snapshot s; struct bch_snapshot s;
struct bch_subvolume subvol; struct bch_subvolume subvol;
struct bch_snapshot v; struct bch_snapshot v;
struct bkey_i_snapshot *u;
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
struct snapshot_t *parent = parent_id
? snapshot_t(c, parent_id)
: NULL;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
bool should_have_subvol; bool should_have_subvol;
u32 i, id; u32 i, id;
@ -518,94 +624,119 @@ static int check_snapshot(struct btree_trans *trans,
if (k.k->type != KEY_TYPE_snapshot) if (k.k->type != KEY_TYPE_snapshot)
return 0; return 0;
s = bkey_s_c_to_snapshot(k); memset(&s, 0, sizeof(s));
id = le32_to_cpu(s.v->parent); memcpy(&s, k.v, bkey_val_bytes(k.k));
id = le32_to_cpu(s.parent);
if (id) { if (id) {
ret = snapshot_lookup(trans, id, &v); ret = snapshot_lookup(trans, id, &v);
if (bch2_err_matches(ret, ENOENT)) if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot with nonexistent parent:\n %s", bch_err(c, "snapshot with nonexistent parent:\n %s",
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)); (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
if (ret) if (ret)
goto err; goto err;
if (le32_to_cpu(v.children[0]) != s.k->p.offset && if (le32_to_cpu(v.children[0]) != k.k->p.offset &&
le32_to_cpu(v.children[1]) != s.k->p.offset) { le32_to_cpu(v.children[1]) != k.k->p.offset) {
bch_err(c, "snapshot parent %u missing pointer to child %llu", bch_err(c, "snapshot parent %u missing pointer to child %llu",
id, s.k->p.offset); id, k.k->p.offset);
ret = -EINVAL; ret = -EINVAL;
goto err; goto err;
} }
} }
for (i = 0; i < 2 && s.v->children[i]; i++) { for (i = 0; i < 2 && s.children[i]; i++) {
id = le32_to_cpu(s.v->children[i]); id = le32_to_cpu(s.children[i]);
ret = snapshot_lookup(trans, id, &v); ret = snapshot_lookup(trans, id, &v);
if (bch2_err_matches(ret, ENOENT)) if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot node %llu has nonexistent child %u", bch_err(c, "snapshot node %llu has nonexistent child %u",
s.k->p.offset, id); k.k->p.offset, id);
if (ret) if (ret)
goto err; goto err;
if (le32_to_cpu(v.parent) != s.k->p.offset) { if (le32_to_cpu(v.parent) != k.k->p.offset) {
bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
id, le32_to_cpu(v.parent), s.k->p.offset); id, le32_to_cpu(v.parent), k.k->p.offset);
ret = -EINVAL; ret = -EINVAL;
goto err; goto err;
} }
} }
should_have_subvol = BCH_SNAPSHOT_SUBVOL(s.v) && should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
!BCH_SNAPSHOT_DELETED(s.v); !BCH_SNAPSHOT_DELETED(&s);
if (should_have_subvol) { if (should_have_subvol) {
id = le32_to_cpu(s.v->subvol); id = le32_to_cpu(s.subvol);
ret = bch2_subvolume_get(trans, id, 0, false, &subvol); ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
if (bch2_err_matches(ret, ENOENT)) if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot points to nonexistent subvolume:\n %s", bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf)); (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
if (ret) if (ret)
goto err; goto err;
if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) { if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) {
bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
s.k->p.offset); k.k->p.offset);
ret = -EINVAL; ret = -EINVAL;
goto err; goto err;
} }
} else { } else {
if (fsck_err_on(s.v->subvol, c, "snapshot should not point to subvol:\n %s", if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n %s",
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
struct bkey_i_snapshot *u = bch2_trans_kmalloc(trans, sizeof(*u)); u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u); ret = PTR_ERR_OR_ZERO(u);
if (ret) if (ret)
goto err; goto err;
bkey_reassemble(&u->k_i, s.s_c);
u->v.subvol = 0; u->v.subvol = 0;
ret = bch2_trans_update(trans, iter, &u->k_i, 0); s = u->v;
if (ret)
goto err;
s = snapshot_i_to_s_c(u);
} }
} }
ret = snapshot_tree_ptr_good(trans, s.k->p.offset, le32_to_cpu(s.v->tree)); ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree));
if (ret < 0) if (ret < 0)
goto err; goto err;
if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s", if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s",
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = snapshot_tree_ptr_repair(trans, iter, &s); ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
if (ret) if (ret)
goto err; goto err;
} }
ret = 0; ret = 0;
if (BCH_SNAPSHOT_DELETED(s.v)) if (fsck_err_on(le32_to_cpu(s.depth) != (parent ? parent->depth + 1 : 0), c,
set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); "snapshot with incorrect depth fields, should be %u:\n %s",
parent->depth + 1,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto err;
u->v.depth = cpu_to_le32(parent ? parent->depth + 1 : 0);
s = u->v;
}
ret = snapshot_rand_ancestor_good(trans, s);
if (ret < 0)
goto err;
if (fsck_err_on(!ret, c, "snapshot with bad rand_ancestor field:\n %s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto err;
for (i = 0; i < ARRAY_SIZE(u->v.skip); i++)
u->v.skip[i] = cpu_to_le32(snapshot_rand_ancestor_get(c, parent_id));
bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_int);
s = u->v;
}
ret = 0;
err: err:
fsck_err: fsck_err:
printbuf_exit(&buf); printbuf_exit(&buf);
@ -618,9 +749,13 @@ int bch2_check_snapshots(struct bch_fs *c)
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
/*
* We iterate backwards as checking/fixing the depth field requires that
* the parent's depth already be correct:
*/
ret = bch2_trans_run(c, ret = bch2_trans_run(c,
for_each_btree_key_commit(&trans, iter, for_each_btree_key_reverse_commit(&trans, iter,
BTREE_ID_snapshots, POS_MIN, BTREE_ID_snapshots, POS_MAX,
BTREE_ITER_PREFETCH, k, BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_snapshot(&trans, &iter, k))); check_snapshot(&trans, &iter, k)));
@ -847,10 +982,12 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
u32 *snapshot_subvols, u32 *snapshot_subvols,
unsigned nr_snapids) unsigned nr_snapids)
{ {
struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
struct bkey_i_snapshot *n; struct bkey_i_snapshot *n;
struct bkey_s_c k; struct bkey_s_c k;
unsigned i; unsigned i, j;
u32 depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
int ret; int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
@ -880,6 +1017,12 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
n->v.parent = cpu_to_le32(parent); n->v.parent = cpu_to_le32(parent);
n->v.subvol = cpu_to_le32(snapshot_subvols[i]); n->v.subvol = cpu_to_le32(snapshot_subvols[i]);
n->v.tree = cpu_to_le32(tree); n->v.tree = cpu_to_le32(tree);
n->v.depth = cpu_to_le32(depth);
for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
n->v.skip[j] = cpu_to_le32(snapshot_rand_ancestor_get(c, parent));
bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_int);
SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,

View File

@ -37,11 +37,36 @@ static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
return genradix_ptr(&c->snapshots, U32_MAX - id); return genradix_ptr(&c->snapshots, U32_MAX - id);
} }
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{ {
return snapshot_t(c, id)->parent; return snapshot_t(c, id)->parent;
} }
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
#ifdef CONFIG_BCACHEFS_DEBUG
u32 parent = snapshot_t(c, id)->parent;
if (parent &&
snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1)
panic("id %u depth=%u parent %u depth=%u\n",
id, snapshot_t(c, id)->depth,
parent, snapshot_t(c, parent)->depth);
return parent;
#else
return snapshot_t(c, id)->parent;
#endif
}
static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
{
while (n--)
id = bch2_snapshot_parent(c, id);
return id;
}
static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
{ {
u32 parent; u32 parent;
@ -84,13 +109,7 @@ static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
return 0; return 0;
} }
static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) bool bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32);
{
while (id && id < ancestor)
id = bch2_snapshot_parent(c, id);
return id == ancestor;
}
static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id)
{ {

View File

@ -8,6 +8,8 @@ typedef DARRAY(u32) snapshot_id_list;
struct snapshot_t { struct snapshot_t {
u32 parent; u32 parent;
u32 skip[3];
u32 depth;
u32 children[2]; u32 children[2];
u32 subvol; /* Nonzero only if a subvolume points to this node: */ u32 subvol; /* Nonzero only if a subvolume points to this node: */
u32 tree; u32 tree;

View File

@ -4,6 +4,7 @@
#include "btree_update_interior.h" #include "btree_update_interior.h"
#include "buckets.h" #include "buckets.h"
#include "checksum.h" #include "checksum.h"
#include "counters.h"
#include "disk_groups.h" #include "disk_groups.h"
#include "ec.h" #include "ec.h"
#include "error.h" #include "error.h"
@ -12,24 +13,29 @@
#include "journal_io.h" #include "journal_io.h"
#include "journal_sb.h" #include "journal_sb.h"
#include "journal_seq_blacklist.h" #include "journal_seq_blacklist.h"
#include "recovery.h"
#include "replicas.h" #include "replicas.h"
#include "quota.h" #include "quota.h"
#include "super-io.h" #include "super-io.h"
#include "super.h" #include "super.h"
#include "trace.h" #include "trace.h"
#include "vstructs.h" #include "vstructs.h"
#include "counters.h"
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/sort.h> #include <linux/sort.h>
struct bch2_metadata_version_str { struct bch2_metadata_version {
u16 version; u16 version;
const char *name; const char *name;
u64 recovery_passes;
}; };
static const struct bch2_metadata_version_str bch2_metadata_versions[] = { static const struct bch2_metadata_version bch2_metadata_versions[] = {
#define x(n, v) { .version = v, .name = #n }, #define x(n, v, _recovery_passes) { \
.version = v, \
.name = #n, \
.recovery_passes = _recovery_passes, \
},
BCH_METADATA_VERSIONS() BCH_METADATA_VERSIONS()
#undef x #undef x
}; };
@ -61,6 +67,24 @@ unsigned bch2_latest_compatible_version(unsigned v)
return v; return v;
} }
u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
unsigned old_version,
unsigned new_version)
{
u64 ret = 0;
for (const struct bch2_metadata_version *i = bch2_metadata_versions;
i < bch2_metadata_versions + ARRAY_SIZE(bch2_metadata_versions);
i++)
if (i->version > old_version && i->version <= new_version) {
if (i->recovery_passes & RECOVERY_PASS_ALL_FSCK)
ret |= bch2_fsck_recovery_passes();
ret |= i->recovery_passes;
}
return ret &= ~RECOVERY_PASS_ALL_FSCK;
}
const char * const bch2_sb_fields[] = { const char * const bch2_sb_fields[] = {
#define x(name, nr) #name, #define x(name, nr) #name,
BCH_SB_FIELDS() BCH_SB_FIELDS()

View File

@ -18,6 +18,10 @@ static inline bool bch2_version_compatible(u16 version)
void bch2_version_to_text(struct printbuf *, unsigned); void bch2_version_to_text(struct printbuf *, unsigned);
unsigned bch2_latest_compatible_version(unsigned); unsigned bch2_latest_compatible_version(unsigned);
u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
unsigned,
unsigned);
struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type); struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *, struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *,
enum bch_sb_field_type, unsigned); enum bch_sb_field_type, unsigned);