Update bcachefs sources to 4ec293b5ea95 bcachefs: Fix check_should_delete_snapshot()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-12-07 00:56:08 -05:00
parent 61c6cf57bd
commit 41f1fc3e9d
12 changed files with 326 additions and 217 deletions

View File

@ -1 +1 @@
af2c94ff96a44454a785878c6674fcf210c5a426 4ec293b5ea95fc73a346100d44758bec4cc03983

View File

@ -32,6 +32,7 @@ typedef unsigned gfp_t;
#define GFP_NOFS 0 #define GFP_NOFS 0
#define GFP_NOIO 0 #define GFP_NOIO 0
#define GFP_NOWAIT 0 #define GFP_NOWAIT 0
#define __GFP_RECLAIM 0
#define __GFP_FS 0 #define __GFP_FS 0
#define __GFP_IO 0 #define __GFP_IO 0
#define __GFP_NOWARN 0 #define __GFP_NOWARN 0

View File

@ -18,10 +18,15 @@
#include "sb/counters.h" #include "sb/counters.h"
#include <linux/module.h>
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/swap.h> #include <linux/swap.h>
bool bch2_mm_avoid_compaction = true;
module_param_named(mm_avoid_compaction, bch2_mm_avoid_compaction, bool, 0644);
MODULE_PARM_DESC(force_read_device, "");
const char * const bch2_btree_node_flags[] = { const char * const bch2_btree_node_flags[] = {
"typebit", "typebit",
"typebit", "typebit",
@ -90,7 +95,20 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
six_unlock_intent(&b->c.lock); six_unlock_intent(&b->c.lock);
} }
void __btree_node_data_free(struct btree *b) static void __btree_node_data_free(struct btree *b)
{
kvfree(b->data);
b->data = NULL;
#ifdef __KERNEL__
kvfree(b->aux_data);
#else
if (b->aux_data)
munmap(b->aux_data, btree_aux_data_bytes(b));
#endif
b->aux_data = NULL;
}
void bch2_btree_node_data_free_locked(struct btree *b)
{ {
BUG_ON(!list_empty(&b->list)); BUG_ON(!list_empty(&b->list));
BUG_ON(btree_node_hashed(b)); BUG_ON(btree_node_hashed(b));
@ -108,23 +126,15 @@ void __btree_node_data_free(struct btree *b)
EBUG_ON(btree_node_write_in_flight(b)); EBUG_ON(btree_node_write_in_flight(b));
clear_btree_node_just_written(b); clear_btree_node_just_written(b);
__btree_node_data_free(b);
kvfree(b->data);
b->data = NULL;
#ifdef __KERNEL__
kvfree(b->aux_data);
#else
munmap(b->aux_data, btree_aux_data_bytes(b));
#endif
b->aux_data = NULL;
} }
static void btree_node_data_free(struct bch_fs_btree_cache *bc, struct btree *b) static void bch2_btree_node_data_free(struct bch_fs_btree_cache *bc, struct btree *b)
{ {
BUG_ON(list_empty(&b->list)); BUG_ON(list_empty(&b->list));
list_del_init(&b->list); list_del_init(&b->list);
__btree_node_data_free(b); bch2_btree_node_data_free_locked(b);
--bc->nr_freeable; --bc->nr_freeable;
btree_node_to_freedlist(bc, b); btree_node_to_freedlist(bc, b);
@ -147,28 +157,44 @@ static const struct rhashtable_params bch_btree_cache_params = {
.automatic_shrinking = true, .automatic_shrinking = true,
}; };
static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp,
bool avoid_compaction)
{ {
BUG_ON(b->data || b->aux_data);
gfp |= __GFP_ACCOUNT|__GFP_RECLAIMABLE; gfp |= __GFP_ACCOUNT|__GFP_RECLAIMABLE;
b->data = kvmalloc(btree_buf_bytes(b), gfp); if (!b->data) {
if (!b->data) if (avoid_compaction && bch2_mm_avoid_compaction) {
return bch_err_throw(c, ENOMEM_btree_node_mem_alloc); /*
#ifdef __KERNEL__ * Cursed hack: mm doesn't know how to limit the amount of time
b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp); * we spend blocked on compaction, even if we specified a
#else * vmalloc fallback.
b->aux_data = mmap(NULL, btree_aux_data_bytes(b), *
PROT_READ|PROT_WRITE|PROT_EXEC, * So we have to do that ourselves: only try for a high order
MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); * page allocation if we're GFP_NOWAIT, otherwise straight to
if (b->aux_data == MAP_FAILED) * vmalloc.
b->aux_data = NULL; */
#endif b->data = gfp & __GFP_RECLAIM
? __vmalloc(btree_buf_bytes(b), gfp)
: kmalloc(btree_buf_bytes(b), gfp);
} else {
b->data = kvmalloc(btree_buf_bytes(b), gfp);
}
if (!b->data)
return bch_err_throw(c, ENOMEM_btree_node_mem_alloc);
}
if (!b->aux_data) { if (!b->aux_data) {
kvfree(b->data); #ifdef __KERNEL__
b->data = NULL; b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp);
return bch_err_throw(c, ENOMEM_btree_node_mem_alloc); #else
b->aux_data = mmap(NULL, btree_aux_data_bytes(b),
PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
if (b->aux_data == MAP_FAILED)
b->aux_data = NULL;
#endif
if (!b->aux_data)
return bch_err_throw(c, ENOMEM_btree_node_mem_alloc);
} }
return 0; return 0;
@ -176,9 +202,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
{ {
struct btree *b; struct btree *b = kzalloc(sizeof(struct btree), gfp);
b = kzalloc(sizeof(struct btree), gfp);
if (!b) if (!b)
return NULL; return NULL;
@ -195,7 +219,8 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
if (!b) if (!b)
return NULL; return NULL;
if (btree_node_data_alloc(c, b, GFP_KERNEL)) { if (btree_node_data_alloc(c, b, GFP_KERNEL, false)) {
__btree_node_data_free(b);
kfree(b); kfree(b);
return NULL; return NULL;
} }
@ -262,6 +287,9 @@ void __bch2_btree_node_hash_remove(struct bch_fs_btree_cache *bc, struct btree *
if (b->c.btree_id < BTREE_ID_NR) if (b->c.btree_id < BTREE_ID_NR)
--bc->nr_by_btree[b->c.btree_id]; --bc->nr_by_btree[b->c.btree_id];
--bc->live[btree_node_pinned(b)].nr; --bc->live[btree_node_pinned(b)].nr;
bc->nr_vmalloc -= is_vmalloc_addr(b->data);
list_del_init(&b->list); list_del_init(&b->list);
} }
@ -279,6 +307,8 @@ int __bch2_btree_node_hash_insert(struct bch_fs_btree_cache *bc, struct btree *b
b->hash_val = btree_ptr_hash_val(&b->key); b->hash_val = btree_ptr_hash_val(&b->key);
try(rhashtable_lookup_insert_fast(&bc->table, &b->hash, bch_btree_cache_params)); try(rhashtable_lookup_insert_fast(&bc->table, &b->hash, bch_btree_cache_params));
bc->nr_vmalloc += is_vmalloc_addr(b->data);
if (b->c.btree_id < BTREE_ID_NR) if (b->c.btree_id < BTREE_ID_NR)
bc->nr_by_btree[b->c.btree_id]++; bc->nr_by_btree[b->c.btree_id]++;
@ -502,7 +532,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
goto out; goto out;
if (!btree_node_reclaim(c, b)) { if (!btree_node_reclaim(c, b)) {
btree_node_data_free(bc, b); bch2_btree_node_data_free(bc, b);
six_unlock_write(&b->c.lock); six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock); six_unlock_intent(&b->c.lock);
freed++; freed++;
@ -519,7 +549,7 @@ restart:
--touched; --touched;
} else if (!btree_node_reclaim(c, b)) { } else if (!btree_node_reclaim(c, b)) {
__bch2_btree_node_hash_remove(bc, b); __bch2_btree_node_hash_remove(bc, b);
__btree_node_data_free(b); bch2_btree_node_data_free_locked(b);
btree_node_to_freedlist(bc, b); btree_node_to_freedlist(bc, b);
freed++; freed++;
@ -606,7 +636,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
BUG_ON(btree_node_read_in_flight(b) || BUG_ON(btree_node_read_in_flight(b) ||
btree_node_write_in_flight(b)); btree_node_write_in_flight(b));
btree_node_data_free(bc, b); bch2_btree_node_data_free(bc, b);
cond_resched(); cond_resched();
} }
@ -830,10 +860,12 @@ got_node:
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
if (btree_node_data_alloc(c, b, GFP_NOWAIT)) { if (btree_node_data_alloc(c, b, GFP_NOWAIT, true)) {
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN)) if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN, true)) {
__btree_node_data_free(b);
goto err; goto err;
}
} }
got_mem: got_mem:
@ -1371,7 +1403,7 @@ wait_on_io:
mutex_lock(&bc->lock); mutex_lock(&bc->lock);
bch2_btree_node_hash_remove(bc, b); bch2_btree_node_hash_remove(bc, b);
btree_node_data_free(bc, b); bch2_btree_node_data_free(bc, b);
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
out: out:
six_unlock_write(&b->c.lock); six_unlock_write(&b->c.lock);
@ -1484,6 +1516,7 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs_btree_ca
prt_btree_cache_line(out, c, "live:", bc->live[0].nr); prt_btree_cache_line(out, c, "live:", bc->live[0].nr);
prt_btree_cache_line(out, c, "pinned:", bc->live[1].nr); prt_btree_cache_line(out, c, "pinned:", bc->live[1].nr);
prt_btree_cache_line(out, c, "vmalloc:", bc->nr_vmalloc);
prt_btree_cache_line(out, c, "reserve:", bc->nr_reserve); prt_btree_cache_line(out, c, "reserve:", bc->nr_reserve);
prt_btree_cache_line(out, c, "freed:", bc->nr_freeable); prt_btree_cache_line(out, c, "freed:", bc->nr_freeable);
prt_btree_cache_line(out, c, "dirty:", atomic_long_read(&bc->nr_dirty)); prt_btree_cache_line(out, c, "dirty:", atomic_long_read(&bc->nr_dirty));

View File

@ -30,7 +30,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsig
void bch2_btree_cache_cannibalize_unlock(struct btree_trans *); void bch2_btree_cache_cannibalize_unlock(struct btree_trans *);
int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *); int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *);
void __btree_node_data_free(struct btree *); void bch2_btree_node_data_free_locked(struct btree *);
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool); struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool);

View File

@ -261,7 +261,7 @@ static int read_btree_nodes_worker(void *p)
} }
err: err:
if (b) if (b)
__btree_node_data_free(b); bch2_btree_node_data_free_locked(b);
kfree(b); kfree(b);
bio_put(bio); bio_put(bio);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan); enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);

View File

@ -95,6 +95,7 @@ void bch2_btree_node_wait_on_write(struct btree *b)
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
} }
__printf(7, 0)
static void btree_err_msg(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca, static void btree_err_msg(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca,
struct btree *b, struct bset *i, struct bkey_packed *k, struct btree *b, struct bset *i, struct bkey_packed *k,
const char *fmt, va_list args) const char *fmt, va_list args)

View File

@ -200,6 +200,7 @@ struct bch_fs_btree_cache {
struct list_head freed_nonpcpu; struct list_head freed_nonpcpu;
struct btree_cache_list live[2]; struct btree_cache_list live[2];
size_t nr_vmalloc;
size_t nr_freeable; size_t nr_freeable;
size_t nr_reserve; size_t nr_reserve;
size_t nr_by_btree[BTREE_ID_NR]; size_t nr_by_btree[BTREE_ID_NR];

View File

@ -519,19 +519,19 @@ static inline void prt_printf_reversed(struct printbuf *out, const char *fmt, ..
printbuf_reverse_from(out, orig_pos); printbuf_reverse_from(out, orig_pos);
} }
static int __bch2_inum_to_path(struct btree_trans *trans, DEFINE_DARRAY(subvol_inum);
u32 subvol, u64 inum, u32 snapshot,
struct printbuf *path) static int bch2_inum_to_path_reversed(struct btree_trans *trans,
u32 subvol, u64 inum, u32 snapshot,
struct printbuf *path)
{ {
unsigned orig_pos = path->pos; struct bch_fs *c = trans->c;
int ret = 0; int ret = 0;
DARRAY(subvol_inum) inums = {}; CLASS(darray_subvol_inum, inums)();
if (!snapshot) { if (!snapshot) {
if (subvol) { if (subvol) {
ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot); ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot);
if (ret)
goto disconnected;
} else { } else {
struct bkey_s_c k; struct bkey_s_c k;
for_each_btree_key_max_norestart(trans, iter, for_each_btree_key_max_norestart(trans, iter,
@ -544,14 +544,12 @@ static int __bch2_inum_to_path(struct btree_trans *trans,
break; break;
} }
} }
if (ret) if (!ret && !snapshot)
return ret; ret = bch_err_throw(c, ENOENT_snapshot);
if (!snapshot)
goto disconnected;
} }
} }
while (true) { while (!ret) {
subvol_inum n = (subvol_inum) { subvol ?: snapshot, inum }; subvol_inum n = (subvol_inum) { subvol ?: snapshot, inum };
if (darray_find_p(inums, i, i->subvol == n.subvol && i->inum == n.inum)) { if (darray_find_p(inums, i, i->subvol == n.subvol && i->inum == n.inum)) {
@ -559,22 +557,20 @@ static int __bch2_inum_to_path(struct btree_trans *trans,
break; break;
} }
ret = darray_push(&inums, n); try(darray_push(&inums, n));
if (ret)
goto err;
struct bch_inode_unpacked inode; struct bch_inode_unpacked inode;
ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0); ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0);
if (ret) if (ret)
goto disconnected; break;
if (inode.bi_subvol == BCACHEFS_ROOT_SUBVOL && if (inode.bi_subvol == BCACHEFS_ROOT_SUBVOL &&
inode.bi_inum == BCACHEFS_ROOT_INO) inode.bi_inum == BCACHEFS_ROOT_INO)
break; break;
if (!inode.bi_dir && !inode.bi_dir_offset) { if (!inode.bi_dir && !inode.bi_dir_offset) {
ret = bch_err_throw(trans->c, ENOENT_inode_no_backpointer); ret = bch_err_throw(c, ENOENT_inode_no_backpointer);
goto disconnected; break;
} }
inum = inode.bi_dir; inum = inode.bi_dir;
@ -582,7 +578,7 @@ static int __bch2_inum_to_path(struct btree_trans *trans,
subvol = inode.bi_parent_subvol; subvol = inode.bi_parent_subvol;
ret = bch2_subvolume_get_snapshot(trans, inode.bi_parent_subvol, &snapshot); ret = bch2_subvolume_get_snapshot(trans, inode.bi_parent_subvol, &snapshot);
if (ret) if (ret)
goto disconnected; break;
} }
CLASS(btree_iter, d_iter)(trans, BTREE_ID_dirents, CLASS(btree_iter, d_iter)(trans, BTREE_ID_dirents,
@ -590,7 +586,7 @@ static int __bch2_inum_to_path(struct btree_trans *trans,
struct bkey_s_c_dirent d = bch2_bkey_get_typed(&d_iter, dirent); struct bkey_s_c_dirent d = bch2_bkey_get_typed(&d_iter, dirent);
ret = bkey_err(d.s_c); ret = bkey_err(d.s_c);
if (ret) if (ret)
goto disconnected; break;
struct qstr dirent_name = bch2_dirent_get_name(d); struct qstr dirent_name = bch2_dirent_get_name(d);
@ -599,25 +595,26 @@ static int __bch2_inum_to_path(struct btree_trans *trans,
prt_char(path, '/'); prt_char(path, '/');
} }
if (orig_pos == path->pos) if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
prt_char(path, '/'); prt_printf_reversed(path, "(%s: disconnected at %llu.%u)",
out: bch2_err_str(ret), inum, snapshot);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ret;
goto err; }
ret = path->allocation_failure ? -ENOMEM : 0; static int __bch2_inum_to_path(struct btree_trans *trans,
if (ret) u32 subvol, u64 inum, u32 snapshot,
goto err; struct printbuf *path)
{
printbuf_reverse_from(path, orig_pos); unsigned orig_pos = path->pos;
darray_exit(&inums); int ret = bch2_inum_to_path_reversed(trans, subvol, inum, snapshot, path);
return 0; if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
err: path->pos = orig_pos; /* Don't leave garbage output */
darray_exit(&inums); else {
if (!ret && orig_pos == path->pos)
prt_char(path, '/');
printbuf_reverse_from(path, orig_pos);
}
return ret; return ret;
disconnected:
prt_printf_reversed(path, "(disconnected at %llu.%u)", inum, snapshot);
goto out;
} }
int bch2_inum_to_path(struct btree_trans *trans, int bch2_inum_to_path(struct btree_trans *trans,

View File

@ -34,25 +34,22 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans,
return 0; return 0;
} }
u32 bch2_snapshot_oldest_subvol(struct bch_fs *c, u32 snapshot_root, static u32 bch2_snapshot_oldest_subvol(struct bch_fs *c, u32 snapshot_root,
snapshot_id_list *skip) snapshot_id_list *skip)
{ {
guard(rcu)(); guard(rcu)();
struct snapshot_table *t = rcu_dereference(c->snapshots.table); struct snapshot_table *t = rcu_dereference(c->snapshots.table);
while (true) { while (true) {
u32 id = snapshot_root, subvol = 0; u32 subvol = 0;
while (id && __bch2_snapshot_exists(t, id)) { __for_each_snapshot_child(t, snapshot_root, NULL, id) {
if (!(skip && snapshot_list_has_id(skip, id))) { if (skip && snapshot_list_has_id(skip, id))
u32 s = __snapshot_t(t, id)->subvol; continue;
if (s && (!subvol || s < subvol)) u32 s = __snapshot_t(t, id)->subvol;
subvol = s; if (s && (!subvol || s < subvol))
} subvol = s;
id = bch2_snapshot_tree_next(t, id);
if (id == snapshot_root)
break;
} }
if (subvol || !skip) if (subvol || !skip)

View File

@ -183,7 +183,7 @@ struct snapshot_t *bch2_snapshot_t_mut(struct bch_fs *c, u32 id)
return __snapshot_t_mut(c, id); return __snapshot_t_mut(c, id);
} }
void bch2_snapshot_to_text(struct printbuf *out, struct bch_snapshot *s) void bch2_snapshot_to_text(struct printbuf *out, const struct bch_snapshot *s)
{ {
if (BCH_SNAPSHOT_SUBVOL(s)) if (BCH_SNAPSHOT_SUBVOL(s))
prt_str(out, "subvol "); prt_str(out, "subvol ");
@ -271,6 +271,63 @@ fsck_err:
return ret; return ret;
} }
static int snapshot_get_print(struct printbuf *out, struct btree_trans *trans, u32 id)
{
prt_printf(out, "%u \t", id);
struct bch_snapshot s;
int ret = lockrestart_do(trans, bch2_snapshot_lookup(trans, id, &s));
if (ret) {
prt_str(out, bch2_err_str(ret));
} else {
if (BCH_SNAPSHOT_SUBVOL(&s))
prt_str(out, "subvol ");
if (BCH_SNAPSHOT_WILL_DELETE(&s))
prt_str(out, "will_delete ");
if (BCH_SNAPSHOT_DELETED(&s))
prt_str(out, "deleted ");
if (BCH_SNAPSHOT_NO_KEYS(&s))
prt_str(out, "no_keys ");
prt_printf(out, "subvol %u", le32_to_cpu(s.subvol));
}
prt_newline(out);
return 0;
}
static unsigned snapshot_tree_max_depth(struct bch_fs *c, u32 start)
{
unsigned depth = 0, max_depth = 0;
guard(rcu)();
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
__for_each_snapshot_child(t, start, &depth, id)
max_depth = max(depth, max_depth);
return max_depth;
}
static int bch2_snapshot_tree_keys_to_text(struct printbuf *out, struct btree_trans *trans, u32 start)
{
printbuf_tabstop_push(out, out->indent + 12 + 2 * snapshot_tree_max_depth(trans->c, start));
unsigned depth = 0, prev_depth = 0;
for_each_snapshot_child(trans->c, start, &depth, id) {
int d = depth - prev_depth;
if (d > 0)
printbuf_indent_add(out, d * 2);
else
printbuf_indent_sub(out, -d * 2);
prev_depth = depth;
try(snapshot_get_print(out, trans, id));
}
printbuf_indent_sub(out, prev_depth * 2);
return 0;
}
static int __bch2_mark_snapshot(struct btree_trans *trans, static int __bch2_mark_snapshot(struct btree_trans *trans,
enum btree_id btree, unsigned level, enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s_c new, struct bkey_s_c old, struct bkey_s_c new,
@ -352,24 +409,38 @@ static u32 bch2_snapshot_right_child(struct snapshot_table *t, u32 id)
return bch2_snapshot_child(t, id, 1); return bch2_snapshot_child(t, id, 1);
} }
u32 bch2_snapshot_tree_next(struct snapshot_table *t, u32 id) u32 __bch2_snapshot_tree_next(struct snapshot_table *t, u32 id, unsigned *depth)
{ {
u32 n, parent; int _depth;
if (!depth)
depth = &_depth;
n = bch2_snapshot_left_child(t, id); u32 n = bch2_snapshot_left_child(t, id);
if (n) if (n) {
(*depth)++;
return n; return n;
}
u32 parent;
while ((parent = __bch2_snapshot_parent(t, id))) { while ((parent = __bch2_snapshot_parent(t, id))) {
(*depth)--;
n = bch2_snapshot_right_child(t, parent); n = bch2_snapshot_right_child(t, parent);
if (n && n != id) if (n && n != id) {
(*depth)++;
return n; return n;
}
id = parent; id = parent;
} }
return 0; return 0;
} }
u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id, unsigned *depth)
{
guard(rcu)();
return __bch2_snapshot_tree_next(rcu_dereference(c->snapshots.table), id, depth);
}
int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot *s) struct bch_snapshot *s)
{ {
@ -404,6 +475,53 @@ int __bch2_get_snapshot_overwrites(struct btree_trans *trans,
return ret; return ret;
} }
static void bch2_snapshot_delete_nodes_to_text(struct printbuf *out, struct snapshot_delete *d, bool full)
{
size_t limit = !full ? 10 : SIZE_MAX;
prt_printf(out, "deleting from trees");
darray_for_each_max(d->deleting_from_trees, i, limit)
prt_printf(out, " %u", *i);
if (d->deleting_from_trees.nr > limit)
prt_str(out, " (many)");
prt_newline(out);
prt_printf(out, "deleting leaves");
darray_for_each_max(d->delete_leaves, i, limit)
prt_printf(out, " %u", *i);
if (d->delete_leaves.nr > limit)
prt_str(out, " (many)");
prt_newline(out);
prt_printf(out, "interior");
darray_for_each_max(d->delete_interior, i, limit)
prt_printf(out, " %u->%u", i->id, i->live_child);
if (d->delete_interior.nr > limit)
prt_str(out, " (many)");
prt_newline(out);
}
void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c)
{
struct snapshot_delete *d = &c->snapshots.delete;
if (!d->running) {
prt_str(out, "(not running)");
return;
}
scoped_guard(mutex, &d->progress_lock) {
prt_printf(out, "Snapshot deletion v%u\n", d->version);
prt_str(out, "Progress: ");
bch2_progress_to_text(out, &d->progress);
prt_newline(out);
bch2_snapshot_delete_nodes_to_text(out, d, false);
}
}
/* /*
* Mark a snapshot as deleted, for future cleanup: * Mark a snapshot as deleted, for future cleanup:
*/ */
@ -446,11 +564,9 @@ static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s)
swap(s->children[0], s->children[1]); swap(s->children[0], s->children[1]);
} }
static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id, bool delete_interior)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
u32 parent_id, child_id;
unsigned i;
struct bkey_i_snapshot *s = struct bkey_i_snapshot *s =
bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, id), 0, snapshot); bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, id), 0, snapshot);
@ -462,10 +578,17 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
return ret; return ret;
BUG_ON(BCH_SNAPSHOT_DELETED(&s->v)); BUG_ON(BCH_SNAPSHOT_DELETED(&s->v));
BUG_ON(s->v.children[1]);
parent_id = le32_to_cpu(s->v.parent); if (s->v.children[1]) {
child_id = le32_to_cpu(s->v.children[0]); CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "deleting node with two children:\n");
bch2_snapshot_tree_keys_to_text(&msg.m, trans, id);
bch2_snapshot_delete_nodes_to_text(&msg.m, &c->snapshots.delete, true);
return -EINVAL;
}
u32 parent_id = le32_to_cpu(s->v.parent);
u32 child_id = le32_to_cpu(s->v.children[0]);
if (parent_id) { if (parent_id) {
struct bkey_i_snapshot *parent = struct bkey_i_snapshot *parent =
@ -478,6 +601,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
return ret; return ret;
/* find entry in parent->children for node being deleted */ /* find entry in parent->children for node being deleted */
unsigned i;
for (i = 0; i < 2; i++) for (i = 0; i < 2; i++)
if (le32_to_cpu(parent->v.children[i]) == id) if (le32_to_cpu(parent->v.children[i]) == id)
break; break;
@ -493,6 +617,15 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
} }
if (child_id) { if (child_id) {
if (!delete_interior) {
CLASS(bch_log_msg, msg)(c);
prt_printf(&msg.m, "deleting interior node %llu with child %u at runtime:\n",
s->k.p.offset, child_id);
bch2_snapshot_tree_keys_to_text(&msg.m, trans, id);
bch2_snapshot_delete_nodes_to_text(&msg.m, &c->snapshots.delete, true);
return -EINVAL;
}
struct bkey_i_snapshot *child = struct bkey_i_snapshot *child =
bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, child_id), bch2_bkey_get_mut_typed(trans, BTREE_ID_snapshots, POS(0, child_id),
0, snapshot); 0, snapshot);
@ -503,12 +636,6 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
return ret; return ret;
child->v.parent = cpu_to_le32(parent_id); child->v.parent = cpu_to_le32(parent_id);
if (!child->v.parent) {
child->v.skip[0] = 0;
child->v.skip[1] = 0;
child->v.skip[2] = 0;
}
} }
if (!parent_id) { if (!parent_id) {
@ -517,9 +644,6 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
* snapshot_tree entry to point to the new root, or delete it if * snapshot_tree entry to point to the new root, or delete it if
* this is the last snapshot ID in this tree: * this is the last snapshot ID in this tree:
*/ */
BUG_ON(s->v.children[1]);
struct bkey_i_snapshot_tree *s_t = errptr_try(bch2_bkey_get_mut_typed(trans, struct bkey_i_snapshot_tree *s_t = errptr_try(bch2_bkey_get_mut_typed(trans,
BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s->v.tree)), BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s->v.tree)),
0, snapshot_tree)); 0, snapshot_tree));
@ -687,25 +811,6 @@ static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id)
return i ? i->live_child : 0; return i ? i->live_child : 0;
} }
static unsigned live_child(struct bch_fs *c, u32 start)
{
struct snapshot_delete *d = &c->snapshots.delete;
guard(rcu)();
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
for (u32 id = bch2_snapshot_tree_next(t, start);
id && id != start;
id = bch2_snapshot_tree_next(t, id))
if (bch2_snapshot_is_leaf(c, id) &&
bch2_snapshot_exists(c, id) &&
!snapshot_list_has_id(&d->delete_leaves, id) &&
!interior_delete_has_id(&d->delete_interior, id))
return id;
return 0;
}
static bool snapshot_id_dying(struct snapshot_delete *d, unsigned id) static bool snapshot_id_dying(struct snapshot_delete *d, unsigned id)
{ {
return snapshot_list_has_id(&d->delete_leaves, id) || return snapshot_list_has_id(&d->delete_leaves, id) ||
@ -895,40 +1000,42 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
return 0; return 0;
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct snapshot_delete *d = &c->snapshots.delete;
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
unsigned live_children = 0;
if (BCH_SNAPSHOT_SUBVOL(s.v) || if (BCH_SNAPSHOT_SUBVOL(s.v) ||
BCH_SNAPSHOT_NO_KEYS(s.v) ||
BCH_SNAPSHOT_DELETED(s.v)) BCH_SNAPSHOT_DELETED(s.v))
return 0; return 0;
struct snapshot_delete *d = &c->snapshots.delete;
guard(mutex)(&d->progress_lock); guard(mutex)(&d->progress_lock);
for (unsigned i = 0; i < 2; i++) {
u32 child = le32_to_cpu(s.v->children[i]);
live_children += child && u32 live_child = 0, nr_live_children = 0;
!snapshot_list_has_id(&d->delete_leaves, child); for (unsigned i = 0; i < 2; i++) {
u32 id = le32_to_cpu(s.v->children[i]);
if (id && !snapshot_list_has_id(&d->delete_leaves, id)) {
nr_live_children++;
live_child = interior_delete_has_id(&d->delete_interior, id) ?: id;
}
} }
u32 tree = bch2_snapshot_tree(c, s.k->p.offset); if (nr_live_children == 2 ||
(nr_live_children == 1 && BCH_SNAPSHOT_NO_KEYS(s.v)))
return 0;
if (live_children == 0) { try(snapshot_list_add_nodup(c, &d->deleting_from_trees,
try(snapshot_list_add_nodup(c, &d->deleting_from_trees, tree)); bch2_snapshot_tree(c, s.k->p.offset)));
if (!nr_live_children) {
try(snapshot_list_add(c, &d->delete_leaves, s.k->p.offset)); try(snapshot_list_add(c, &d->delete_leaves, s.k->p.offset));
} else if (live_children == 1) { } else {
struct snapshot_interior_delete n = { struct snapshot_interior_delete n = {
.id = s.k->p.offset, .id = s.k->p.offset,
.live_child = live_child(c, s.k->p.offset), .live_child = live_child,
}; };
if (!n.live_child) { if (n.id == n.live_child) {
bch_err(c, "error finding live child of snapshot %u", n.id); bch_err(c, "error finding live descendent of %llu", s.k->p.offset);
return -EINVAL; return -EINVAL;
} else {
try(snapshot_list_add_nodup(c, &d->deleting_from_trees, tree));
try(darray_push(&d->delete_interior, n));
} }
} }
@ -1009,33 +1116,6 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
return bch2_trans_update(trans, iter, &s->k_i, 0); return bch2_trans_update(trans, iter, &s->k_i, 0);
} }
static void bch2_snapshot_delete_nodes_to_text(struct printbuf *out, struct snapshot_delete *d)
{
prt_printf(out, "deleting from trees");
darray_for_each_max(d->deleting_from_trees, i, 10)
prt_printf(out, " %u", *i);
if (d->deleting_from_trees.nr > 10)
prt_str(out, " (many)");
prt_newline(out);
prt_printf(out, "deleting leaves");
darray_for_each_max(d->delete_leaves, i, 10)
prt_printf(out, " %u", *i);
if (d->delete_leaves.nr > 10)
prt_str(out, " (many)");
prt_newline(out);
prt_printf(out, "interior");
darray_for_each_max(d->delete_interior, i, 10)
prt_printf(out, " %u->%u", i->id, i->live_child);
if (d->delete_interior.nr > 10)
prt_str(out, " (many)");
prt_newline(out);
}
static int delete_dead_snapshots_locked(struct bch_fs *c) static int delete_dead_snapshots_locked(struct bch_fs *c)
{ {
CLASS(btree_trans, trans)(c); CLASS(btree_trans, trans)(c);
@ -1052,7 +1132,7 @@ static int delete_dead_snapshots_locked(struct bch_fs *c)
return 0; return 0;
CLASS(printbuf, buf)(); CLASS(printbuf, buf)();
bch2_snapshot_delete_nodes_to_text(&buf, d); bch2_snapshot_delete_nodes_to_text(&buf, d, false);
try(commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf))); try(commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf)));
try(!bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2) try(!bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)
@ -1061,7 +1141,7 @@ static int delete_dead_snapshots_locked(struct bch_fs *c)
darray_for_each(d->delete_leaves, i) darray_for_each(d->delete_leaves, i)
try(commit_do(trans, NULL, NULL, 0, try(commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, *i))); bch2_snapshot_node_delete(trans, *i, false)));
darray_for_each(d->delete_interior, i) darray_for_each(d->delete_interior, i)
try(commit_do(trans, NULL, NULL, 0, try(commit_do(trans, NULL, NULL, 0,
@ -1132,24 +1212,6 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *c)
enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots); enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
} }
void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c)
{
struct snapshot_delete *d = &c->snapshots.delete;
if (!d->running) {
prt_str(out, "(not running)");
return;
}
scoped_guard(mutex, &d->progress_lock) {
prt_printf(out, "Snapshot deletion v%u\n", d->version);
prt_str(out, "Progress: ");
bch2_progress_to_text(out, &d->progress);
prt_newline(out);
bch2_snapshot_delete_nodes_to_text(out, d);
}
}
int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
enum btree_id id, enum btree_id id,
struct bpos pos) struct bpos pos)
@ -1175,20 +1237,29 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
static int bch2_get_dead_interior_snapshots(struct btree_trans *trans, struct bkey_s_c k, static int bch2_get_dead_interior_snapshots(struct btree_trans *trans, struct bkey_s_c k,
interior_delete_list *delete) interior_delete_list *delete)
{ {
struct bch_fs *c = trans->c; if (k.k->type != KEY_TYPE_snapshot)
return 0;
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
if (BCH_SNAPSHOT_NO_KEYS(s.v)) {
u32 live_child = 0, nr_live_children = 0;
for (unsigned i = 0; i < 2; i++) {
u32 id = le32_to_cpu(s.v->children[i]);
if (id) {
nr_live_children++;
live_child = interior_delete_has_id(delete, id) ?: id;
}
}
if (nr_live_children != 1)
return 0;
if (k.k->type == KEY_TYPE_snapshot &&
BCH_SNAPSHOT_NO_KEYS(bkey_s_c_to_snapshot(k).v)) {
struct snapshot_interior_delete n = { struct snapshot_interior_delete n = {
.id = k.k->p.offset, .id = k.k->p.offset,
.live_child = live_child(c, k.k->p.offset), .live_child = live_child,
}; };
if (!n.live_child) {
bch_err(c, "error finding live child of snapshot %u", n.id);
return -EINVAL;
}
return darray_push(delete, n); return darray_push(delete, n);
} }
@ -1216,7 +1287,7 @@ int bch2_delete_dead_interior_snapshots(struct bch_fs *c)
darray_for_each(delete, i) { darray_for_each(delete, i) {
int ret = commit_do(trans, NULL, NULL, 0, int ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, i->id)); bch2_snapshot_node_delete(trans, i->id, true));
if (!bch2_err_matches(ret, EROFS)) if (!bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "deleting snapshot %u", i->id); bch_err_msg(c, ret, "deleting snapshot %u", i->id);
if (ret) if (ret)

View File

@ -16,7 +16,7 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *);
int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *);
void bch2_snapshot_to_text(struct printbuf *, struct bch_snapshot *); void bch2_snapshot_to_text(struct printbuf *, const struct bch_snapshot *);
void bch2_snapshot_key_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_snapshot_key_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c,
struct bkey_validate_context); struct bkey_validate_context);
@ -110,7 +110,6 @@ static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
return id; return id;
} }
u32 bch2_snapshot_oldest_subvol(struct bch_fs *, u32, snapshot_id_list *);
u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32); u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32);
static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
@ -232,7 +231,18 @@ static inline int snapshot_list_merge(struct bch_fs *c, snapshot_id_list *dst, s
return 0; return 0;
} }
u32 bch2_snapshot_tree_next(struct snapshot_table *, u32); u32 __bch2_snapshot_tree_next(struct snapshot_table *, u32, unsigned *);
u32 bch2_snapshot_tree_next(struct bch_fs *, u32, unsigned *);
#define __for_each_snapshot_child(_t, _start, _depth, _id) \
for (u32 _id = _start; \
_id && _id <= _start; \
_id = __bch2_snapshot_tree_next(_t, _id, _depth))
#define for_each_snapshot_child(_c, _start, _depth, _id) \
for (u32 _id = _start; \
_id && _id <= _start; \
_id = bch2_snapshot_tree_next(_c, _id, _depth))
int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot *s); struct bch_snapshot *s);

View File

@ -66,18 +66,16 @@ DEFINE_CLASS(_type, _type, darray_exit(&(_T)), (_type) {}, void)
#define DEFINE_DARRAY_CLASS_FREE_ITEM(_type, _free) \ #define DEFINE_DARRAY_CLASS_FREE_ITEM(_type, _free) \
DEFINE_CLASS(_type, _type, darray_exit_free_item(&(_T), _free), (_type) {}, void) DEFINE_CLASS(_type, _type, darray_exit_free_item(&(_T), _free), (_type) {}, void)
#define DEFINE_DARRAY(_type) \ #define DEFINE_DARRAY_NAMED(_name, _type) \
typedef DARRAY(_type) darray_##_type; \ typedef DARRAY(_type) _name; \
DEFINE_DARRAY_CLASS(darray_##_type) DEFINE_DARRAY_CLASS(_name)
#define DEFINE_DARRAY(_type) DEFINE_DARRAY_NAMED(darray_##_type, _type)
#define DEFINE_DARRAY_PREALLOCATED(_type, _nr) \ #define DEFINE_DARRAY_PREALLOCATED(_type, _nr) \
typedef DARRAY_PREALLOCATED(_type, _nr) darray_##_type; \ typedef DARRAY_PREALLOCATED(_type, _nr) darray_##_type; \
DEFINE_DARRAY_CLASS(darray_##_type) DEFINE_DARRAY_CLASS(darray_##_type)
#define DEFINE_DARRAY_NAMED(_name, _type) \
typedef DARRAY(_type) _name; \
DEFINE_DARRAY_CLASS(_name)
#define DEFINE_DARRAY_NAMED_FREE_ITEM(_name, _type, _free) \ #define DEFINE_DARRAY_NAMED_FREE_ITEM(_name, _type, _free) \
typedef DARRAY(_type) _name; \ typedef DARRAY(_type) _name; \
DEFINE_DARRAY_CLASS_FREE_ITEM(_name, _free) DEFINE_DARRAY_CLASS_FREE_ITEM(_name, _free)