429 lines
14 KiB
Diff
429 lines
14 KiB
Diff
From 7c410e21d8bb472455e57fdbf174c6accb3d9a78 Mon Sep 17 00:00:00 2001
|
|
From: Kent Overstreet <kent.overstreet@linux.dev>
|
|
Date: Thu, 12 Dec 2024 03:03:58 -0500
|
|
Subject: [PATCH 205/233] bcachefs: Snapshot deletion no longer uses
|
|
snapshot_t->equiv
|
|
Content-Type: text/plain; charset="utf-8"
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
Switch to generating a private list of interior nodes to delete, instead
|
|
of using the equivalence class in the global data structure.
|
|
|
|
This eliminates possible races with snapshot creation, and is much
|
|
cleaner - it'll let us delete a lot of janky code for calculating and
|
|
maintaining the equivalence classes.
|
|
|
|
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
|
|
Signed-off-by: Alexander Miroshnichenko <alex@millerson.name>
|
|
---
|
|
fs/bcachefs/snapshot.c | 268 ++++++++++++++++++++---------------------
|
|
1 file changed, 133 insertions(+), 135 deletions(-)
|
|
|
|
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
|
|
index ca7e4e975a60..0d60251946f1 100644
|
|
--- a/fs/bcachefs/snapshot.c
|
|
+++ b/fs/bcachefs/snapshot.c
|
|
@@ -1418,44 +1418,74 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
|
|
* that key to snapshot leaf nodes, where we can mutate it
|
|
*/
|
|
|
|
-static int delete_dead_snapshots_process_key(struct btree_trans *trans,
|
|
- struct btree_iter *iter,
|
|
- struct bkey_s_c k,
|
|
- snapshot_id_list *deleted)
|
|
+struct snapshot_interior_delete {
|
|
+ u32 id;
|
|
+ u32 live_child;
|
|
+};
|
|
+typedef DARRAY(struct snapshot_interior_delete) interior_delete_list;
|
|
+
|
|
+static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id)
|
|
{
|
|
- int ret = bch2_check_key_has_snapshot(trans, iter, k);
|
|
- if (ret)
|
|
- return ret < 0 ? ret : 0;
|
|
+ darray_for_each(*l, i)
|
|
+ if (i->id == id)
|
|
+ return i->live_child;
|
|
+ return 0;
|
|
+}
|
|
|
|
- struct bch_fs *c = trans->c;
|
|
- u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
|
|
- if (!equiv) /* key for invalid snapshot node, but we chose not to delete */
|
|
+static unsigned __live_child(struct snapshot_table *t, u32 id,
|
|
+ snapshot_id_list *delete_leaves,
|
|
+ interior_delete_list *delete_interior)
|
|
+{
|
|
+ struct snapshot_t *s = __snapshot_t(t, id);
|
|
+ if (!s)
|
|
return 0;
|
|
|
|
- if (snapshot_list_has_id(deleted, k.k->p.snapshot))
|
|
+ for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++)
|
|
+ if (s->children[i] &&
|
|
+ !snapshot_list_has_id(delete_leaves, s->children[i]) &&
|
|
+ !interior_delete_has_id(delete_interior, s->children[i]))
|
|
+ return s->children[i];
|
|
+
|
|
+ for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) {
|
|
+ u32 live_child = s->children[i]
|
|
+ ? __live_child(t, s->children[i], delete_leaves, delete_interior)
|
|
+ : 0;
|
|
+ if (live_child)
|
|
+ return live_child;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static unsigned live_child(struct bch_fs *c, u32 id,
|
|
+ snapshot_id_list *delete_leaves,
|
|
+ interior_delete_list *delete_interior)
|
|
+{
|
|
+ rcu_read_lock();
|
|
+ u32 ret = __live_child(rcu_dereference(c->snapshots), id,
|
|
+ delete_leaves, delete_interior);
|
|
+ rcu_read_unlock();
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int delete_dead_snapshots_process_key(struct btree_trans *trans,
|
|
+ struct btree_iter *iter,
|
|
+ struct bkey_s_c k,
|
|
+ snapshot_id_list *delete_leaves,
|
|
+ interior_delete_list *delete_interior)
|
|
+{
|
|
+ if (snapshot_list_has_id(delete_leaves, k.k->p.snapshot))
|
|
return bch2_btree_delete_at(trans, iter,
|
|
BTREE_UPDATE_internal_snapshot_node);
|
|
|
|
- /*
|
|
- * When we have a linear chain of snapshot nodes, we consider
|
|
- * those to form an equivalence class: we're going to collapse
|
|
- * them all down to a single node, and keep the leaf-most node -
|
|
- * which has the same id as the equivalence class id.
|
|
- *
|
|
- * If there are multiple keys in different snapshots at the same
|
|
- * position, we're only going to keep the one in the newest
|
|
- * snapshot (we delete the others above) - the rest have been
|
|
- * overwritten and are redundant, and for the key we're going to keep we
|
|
- * need to move it to the equivalance class ID if it's not there
|
|
- * already.
|
|
- */
|
|
- if (equiv != k.k->p.snapshot) {
|
|
+ u32 live_child = interior_delete_has_id(delete_interior, k.k->p.snapshot);
|
|
+ if (live_child) {
|
|
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
|
|
int ret = PTR_ERR_OR_ZERO(new);
|
|
if (ret)
|
|
return ret;
|
|
|
|
- new->k.p.snapshot = equiv;
|
|
+ new->k.p.snapshot = live_child;
|
|
|
|
struct btree_iter dst_iter;
|
|
struct bkey_s_c dst_k = bch2_bkey_get_iter(trans, &dst_iter,
|
|
@@ -1479,55 +1509,62 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans,
|
|
return 0;
|
|
}
|
|
|
|
-static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k)
|
|
+/*
|
|
+ * For a given snapshot, if it doesn't have a subvolume that points to it, and
|
|
+ * it doesn't have child snapshot nodes - it's now redundant and we can mark it
|
|
+ * as deleted.
|
|
+ */
|
|
+static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k,
|
|
+ snapshot_id_list *delete_leaves,
|
|
+ interior_delete_list *delete_interior)
|
|
{
|
|
- struct bkey_s_c_snapshot snap;
|
|
- u32 children[2];
|
|
- int ret;
|
|
-
|
|
if (k.k->type != KEY_TYPE_snapshot)
|
|
return 0;
|
|
|
|
- snap = bkey_s_c_to_snapshot(k);
|
|
- if (BCH_SNAPSHOT_DELETED(snap.v) ||
|
|
- BCH_SNAPSHOT_SUBVOL(snap.v))
|
|
+ struct bch_fs *c = trans->c;
|
|
+ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
|
|
+ unsigned live_children = 0;
|
|
+
|
|
+ if (BCH_SNAPSHOT_SUBVOL(s.v))
|
|
return 0;
|
|
|
|
- children[0] = le32_to_cpu(snap.v->children[0]);
|
|
- children[1] = le32_to_cpu(snap.v->children[1]);
|
|
+ for (unsigned i = 0; i < 2; i++) {
|
|
+ u32 child = le32_to_cpu(s.v->children[i]);
|
|
|
|
- ret = bch2_snapshot_live(trans, children[0]) ?:
|
|
- bch2_snapshot_live(trans, children[1]);
|
|
- if (ret < 0)
|
|
- return ret;
|
|
- return !ret;
|
|
-}
|
|
+ live_children += child &&
|
|
+ !snapshot_list_has_id(delete_leaves, child);
|
|
+ }
|
|
|
|
-/*
|
|
- * For a given snapshot, if it doesn't have a subvolume that points to it, and
|
|
- * it doesn't have child snapshot nodes - it's now redundant and we can mark it
|
|
- * as deleted.
|
|
- */
|
|
-static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k)
|
|
-{
|
|
- int ret = bch2_snapshot_needs_delete(trans, k);
|
|
+ if (live_children == 0) {
|
|
+ return snapshot_list_add(c, delete_leaves, s.k->p.offset);
|
|
+ } else if (live_children == 1) {
|
|
+ struct snapshot_interior_delete d = {
|
|
+ .id = s.k->p.offset,
|
|
+ .live_child = live_child(c, s.k->p.offset, delete_leaves, delete_interior),
|
|
+ };
|
|
+
|
|
+ if (!d.live_child) {
|
|
+ bch_err(c, "error finding live child of snapshot %u", d.id);
|
|
+ return -EINVAL;
|
|
+ }
|
|
|
|
- return ret <= 0
|
|
- ? ret
|
|
- : bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
|
|
+ return darray_push(delete_interior, d);
|
|
+ } else {
|
|
+ return 0;
|
|
+ }
|
|
}
|
|
|
|
static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
|
|
- snapshot_id_list *skip)
|
|
+ interior_delete_list *skip)
|
|
{
|
|
rcu_read_lock();
|
|
- while (snapshot_list_has_id(skip, id))
|
|
+ while (interior_delete_has_id(skip, id))
|
|
id = __bch2_snapshot_parent(c, id);
|
|
|
|
while (n--) {
|
|
do {
|
|
id = __bch2_snapshot_parent(c, id);
|
|
- } while (snapshot_list_has_id(skip, id));
|
|
+ } while (interior_delete_has_id(skip, id));
|
|
}
|
|
rcu_read_unlock();
|
|
|
|
@@ -1536,7 +1573,7 @@ static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
|
|
|
|
static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
|
|
struct btree_iter *iter, struct bkey_s_c k,
|
|
- snapshot_id_list *deleted)
|
|
+ interior_delete_list *deleted)
|
|
{
|
|
struct bch_fs *c = trans->c;
|
|
u32 nr_deleted_ancestors = 0;
|
|
@@ -1546,7 +1583,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
|
|
if (k.k->type != KEY_TYPE_snapshot)
|
|
return 0;
|
|
|
|
- if (snapshot_list_has_id(deleted, k.k->p.offset))
|
|
+ if (interior_delete_has_id(deleted, k.k->p.offset))
|
|
return 0;
|
|
|
|
s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot);
|
|
@@ -1555,7 +1592,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
|
|
return ret;
|
|
|
|
darray_for_each(*deleted, i)
|
|
- nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i);
|
|
+ nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id);
|
|
|
|
if (!nr_deleted_ancestors)
|
|
return 0;
|
|
@@ -1573,7 +1610,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
|
|
for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) {
|
|
u32 id = le32_to_cpu(s->v.skip[j]);
|
|
|
|
- if (snapshot_list_has_id(deleted, id)) {
|
|
+ if (interior_delete_has_id(deleted, id)) {
|
|
id = bch2_snapshot_nth_parent_skip(c,
|
|
parent,
|
|
depth > 1
|
|
@@ -1592,46 +1629,25 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
|
|
|
|
int bch2_delete_dead_snapshots(struct bch_fs *c)
|
|
{
|
|
- struct btree_trans *trans;
|
|
- snapshot_id_list deleted = { 0 };
|
|
- snapshot_id_list deleted_interior = { 0 };
|
|
- int ret = 0;
|
|
-
|
|
if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
|
|
return 0;
|
|
|
|
- trans = bch2_trans_get(c);
|
|
+ struct btree_trans *trans = bch2_trans_get(c);
|
|
+ snapshot_id_list delete_leaves = {};
|
|
+ interior_delete_list delete_interior = {};
|
|
+ int ret = 0;
|
|
|
|
/*
|
|
* For every snapshot node: If we have no live children and it's not
|
|
* pointed to by a subvolume, delete it:
|
|
*/
|
|
- ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots,
|
|
- POS_MIN, 0, k,
|
|
- NULL, NULL, 0,
|
|
- bch2_delete_redundant_snapshot(trans, k));
|
|
- bch_err_msg(c, ret, "deleting redundant snapshots");
|
|
- if (ret)
|
|
- goto err;
|
|
-
|
|
- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
|
|
- POS_MIN, 0, k,
|
|
- bch2_snapshot_set_equiv(trans, k));
|
|
- bch_err_msg(c, ret, "in bch2_snapshots_set_equiv");
|
|
+ ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k,
|
|
+ check_should_delete_snapshot(trans, k, &delete_leaves, &delete_interior));
|
|
+ bch_err_msg(c, ret, "walking snapshots");
|
|
if (ret)
|
|
goto err;
|
|
|
|
- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
|
|
- POS_MIN, 0, k, ({
|
|
- if (k.k->type != KEY_TYPE_snapshot)
|
|
- continue;
|
|
-
|
|
- BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v)
|
|
- ? snapshot_list_add(c, &deleted, k.k->p.offset)
|
|
- : 0;
|
|
- }));
|
|
- bch_err_msg(c, ret, "walking snapshots");
|
|
- if (ret)
|
|
+ if (!delete_leaves.nr && !delete_interior.nr)
|
|
goto err;
|
|
|
|
for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
|
|
@@ -1644,7 +1660,9 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
|
|
btree, POS_MIN,
|
|
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
|
|
&res, NULL, BCH_TRANS_COMMIT_no_enospc,
|
|
- delete_dead_snapshots_process_key(trans, &iter, k, &deleted));
|
|
+ delete_dead_snapshots_process_key(trans, &iter, k,
|
|
+ &delete_leaves,
|
|
+ &delete_interior));
|
|
|
|
bch2_disk_reservation_put(c, &res);
|
|
|
|
@@ -1653,22 +1671,13 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
|
|
goto err;
|
|
}
|
|
|
|
- bch2_trans_unlock(trans);
|
|
- down_write(&c->snapshot_create_lock);
|
|
-
|
|
- ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
|
|
- POS_MIN, 0, k, ({
|
|
- u32 snapshot = k.k->p.offset;
|
|
- u32 equiv = bch2_snapshot_equiv(c, snapshot);
|
|
-
|
|
- equiv != snapshot
|
|
- ? snapshot_list_add(c, &deleted_interior, snapshot)
|
|
- : 0;
|
|
- }));
|
|
-
|
|
- bch_err_msg(c, ret, "walking snapshots");
|
|
- if (ret)
|
|
- goto err_create_lock;
|
|
+ darray_for_each(delete_leaves, i) {
|
|
+ ret = commit_do(trans, NULL, NULL, 0,
|
|
+ bch2_snapshot_node_delete(trans, *i));
|
|
+ bch_err_msg(c, ret, "deleting snapshot %u", *i);
|
|
+ if (ret)
|
|
+ goto err;
|
|
+ }
|
|
|
|
/*
|
|
* Fixing children of deleted snapshots can't be done completely
|
|
@@ -1678,30 +1687,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
|
|
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
|
|
BTREE_ITER_intent, k,
|
|
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
|
- bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
|
|
+ bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &delete_interior));
|
|
if (ret)
|
|
- goto err_create_lock;
|
|
-
|
|
- darray_for_each(deleted, i) {
|
|
- ret = commit_do(trans, NULL, NULL, 0,
|
|
- bch2_snapshot_node_delete(trans, *i));
|
|
- bch_err_msg(c, ret, "deleting snapshot %u", *i);
|
|
- if (ret)
|
|
- goto err_create_lock;
|
|
- }
|
|
+ goto err;
|
|
|
|
- darray_for_each(deleted_interior, i) {
|
|
+ darray_for_each(delete_interior, i) {
|
|
ret = commit_do(trans, NULL, NULL, 0,
|
|
- bch2_snapshot_node_delete(trans, *i));
|
|
- bch_err_msg(c, ret, "deleting snapshot %u", *i);
|
|
+ bch2_snapshot_node_delete(trans, i->id));
|
|
+ bch_err_msg(c, ret, "deleting snapshot %u", i->id);
|
|
if (ret)
|
|
- goto err_create_lock;
|
|
+ goto err;
|
|
}
|
|
-err_create_lock:
|
|
- up_write(&c->snapshot_create_lock);
|
|
err:
|
|
- darray_exit(&deleted_interior);
|
|
- darray_exit(&deleted);
|
|
+ darray_exit(&delete_interior);
|
|
+ darray_exit(&delete_leaves);
|
|
bch2_trans_put(trans);
|
|
bch_err_fn(c, ret);
|
|
return ret;
|
|
@@ -1754,24 +1753,23 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
|
|
return ret;
|
|
}
|
|
|
|
-static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
|
|
+static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap)
|
|
{
|
|
- struct bch_fs *c = trans->c;
|
|
- struct bkey_s_c_snapshot snap;
|
|
- int ret = 0;
|
|
+ /* If there's one child, it's redundant and keys will be moved to the child */
|
|
+ return !!snap.v->children[0] + !!snap.v->children[1] == 1;
|
|
+}
|
|
|
|
+static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
|
|
+{
|
|
if (k.k->type != KEY_TYPE_snapshot)
|
|
return 0;
|
|
|
|
- snap = bkey_s_c_to_snapshot(k);
|
|
+ struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k);
|
|
if (BCH_SNAPSHOT_DELETED(snap.v) ||
|
|
- bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset ||
|
|
- (ret = bch2_snapshot_needs_delete(trans, k)) > 0) {
|
|
- set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
|
|
- return 0;
|
|
- }
|
|
+ interior_snapshot_needs_delete(snap))
|
|
+ set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags);
|
|
|
|
- return ret;
|
|
+ return 0;
|
|
}
|
|
|
|
int bch2_snapshots_read(struct bch_fs *c)
|
|
--
|
|
2.45.2
|
|
|