From 6f8750c30eabbd5ccd87e10fc5f0d94278ed95c8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Oct 2021 16:27:01 -0400 Subject: [PATCH] Update bcachefs sources to b1899a0bd9 bcachefs: Move bch2_evict_subvolume_inodes() to fs.c --- .bcachefs_revision | 2 +- libbcachefs/bcachefs.h | 4 + libbcachefs/bcachefs_format.h | 1 + libbcachefs/buckets.c | 11 ++- libbcachefs/fs-common.c | 30 ++----- libbcachefs/fs-common.h | 2 +- libbcachefs/fs-ioctl.c | 2 +- libbcachefs/fs.c | 51 +++++++++++- libbcachefs/fs.h | 6 +- libbcachefs/fsck.c | 20 ++++- libbcachefs/inode.c | 6 +- libbcachefs/recovery.c | 20 ++--- libbcachefs/subvolume.c | 145 ++++++++++++++++++++++++++++------ libbcachefs/subvolume.h | 15 +++- libbcachefs/subvolume_types.h | 11 +++ libbcachefs/super.c | 7 -- 16 files changed, 250 insertions(+), 83 deletions(-) create mode 100644 libbcachefs/subvolume_types.h diff --git a/.bcachefs_revision b/.bcachefs_revision index faa5b410..49d95383 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -4c58f10d4897ff15849e6c028a9f665e03b8d4c0 +b1899a0bd9af8040b592cfdfe2df3c0c1869b3bb diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 0efb1aaa..0c13fa19 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -352,6 +352,7 @@ enum bch_time_stats { #include "quota_types.h" #include "rebalance_types.h" #include "replicas_types.h" +#include "subvolume_types.h" #include "super_types.h" /* Number of nodes btree coalesce will try to coalesce at once */ @@ -656,6 +657,9 @@ struct bch_fs { struct bch_snapshot_table __rcu *snapshot_table; struct mutex snapshot_table_lock; struct work_struct snapshot_delete_work; + struct work_struct snapshot_wait_for_pagecache_and_delete_work; + struct snapshot_id_list snapshots_unlinked; + struct mutex snapshots_unlinked_lock; /* BTREE CACHE */ struct bio_set btree_bio; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 0a78d0f1..9b1be714 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -970,6 +970,7 @@ LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1) * can delete it (or whether it should just be rm -rf'd) */ LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2) +LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3) /* Snapshots */ diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 45215d0a..40084edd 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -1711,6 +1711,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_i *n; __le64 *refcount; int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; + char buf[200]; int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink, POS(0, *idx), @@ -1730,17 +1731,19 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, refcount = bkey_refcount(n); if (!refcount) { + bch2_bkey_val_to_text(&PBUF(buf), c, p.s_c); bch2_fs_inconsistent(c, - "%llu:%llu len %u points to nonexistent indirect extent %llu", - p.k->p.inode, p.k->p.offset, p.k->size, *idx); + "nonexistent indirect extent at %llu while marking\n %s", + *idx, buf); ret = -EIO; goto err; } if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) { + bch2_bkey_val_to_text(&PBUF(buf), c, p.s_c); bch2_fs_inconsistent(c, - "%llu:%llu len %u idx %llu indirect extent refcount underflow", - p.k->p.inode, p.k->p.offset, p.k->size, *idx); + "indirect extent refcount underflow at %llu while marking\n %s", + *idx, buf); ret = -EIO; goto err; } diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c index c49de741..5f3429e9 100644 --- a/libbcachefs/fs-common.c +++ b/libbcachefs/fs-common.c @@ -239,7 +239,7 @@ int bch2_unlink_trans(struct btree_trans *trans, struct bch_inode_unpacked *dir_u, struct bch_inode_unpacked *inode_u, const struct qstr *name, - int deleting_snapshot) + bool deleting_snapshot) { struct bch_fs *c = trans->c; struct btree_iter dir_iter = { NULL }; @@ -267,35 +267,19 @@ int bch2_unlink_trans(struct btree_trans *trans, if (ret) goto err; - if (deleting_snapshot <= 0 && S_ISDIR(inode_u->bi_mode)) { + if (!deleting_snapshot && S_ISDIR(inode_u->bi_mode)) { ret = bch2_empty_dir_trans(trans, inum); if (ret) goto err; } - if (deleting_snapshot < 0 && - inode_u->bi_subvol) { - struct bch_subvolume s; - - ret = bch2_subvolume_get(trans, inode_u->bi_subvol, true, - BTREE_ITER_CACHED| - BTREE_ITER_WITH_UPDATES, - &s); - if (ret) - goto err; - - if (BCH_SUBVOLUME_SNAP(&s)) - deleting_snapshot = 1; + if (deleting_snapshot && !inode_u->bi_subvol) { + ret = -ENOENT; + goto err; } - if (deleting_snapshot == 1) { - if (!inode_u->bi_subvol) { - ret = -ENOENT; - goto err; - } - - ret = bch2_subvolume_delete(trans, inode_u->bi_subvol, - deleting_snapshot); + if (deleting_snapshot || inode_u->bi_subvol) { + ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol); if (ret) goto err; diff --git a/libbcachefs/fs-common.h b/libbcachefs/fs-common.h index 9bb0a967..dde23785 100644 --- a/libbcachefs/fs-common.h +++ b/libbcachefs/fs-common.h @@ -26,7 +26,7 @@ int bch2_link_trans(struct btree_trans *, int bch2_unlink_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, struct bch_inode_unpacked *, - const struct qstr *, int); + const struct qstr *, bool); int bch2_rename_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, diff --git a/libbcachefs/fs-ioctl.c b/libbcachefs/fs-ioctl.c index 513f7a7a..9f329a62 100644 --- a/libbcachefs/fs-ioctl.c +++ b/libbcachefs/fs-ioctl.c @@ -441,7 +441,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, dir = path.dentry->d_parent->d_inode; - ret = __bch2_unlink(dir, path.dentry, 1); + ret = __bch2_unlink(dir, path.dentry, true); if (!ret) { fsnotify_rmdir(dir, path.dentry); d_delete(path.dentry); diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 45a2af3f..7a778c62 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -489,7 +489,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir, } int __bch2_unlink(struct inode *vdir, struct dentry *dentry, - int deleting_snapshot) + bool deleting_snapshot) { struct bch_fs *c = vdir->i_sb->s_fs_info; struct bch_inode_info *dir = to_bch_ei(vdir); @@ -526,7 +526,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, static int bch2_unlink(struct inode *vdir, struct dentry *dentry) { - return __bch2_unlink(vdir, dentry, -1); + return __bch2_unlink(vdir, dentry, false); } static int bch2_symlink(struct user_namespace *mnt_userns, @@ -1310,6 +1310,53 @@ static void bch2_evict_inode(struct inode *vinode) } } +void bch2_evict_subvolume_inodes(struct bch_fs *c, + struct snapshot_id_list *s) +{ + struct super_block *sb = c->vfs_sb; + struct inode *inode; + + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || + (inode->i_state & I_FREEING)) + continue; + + d_mark_dontcache(inode); + d_prune_aliases(inode); + } + spin_unlock(&sb->s_inode_list_lock); +again: + cond_resched(); + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || + (inode->i_state & I_FREEING)) + continue; + + if (!(inode->i_state & I_DONTCACHE)) { + d_mark_dontcache(inode); + d_prune_aliases(inode); + } + + spin_lock(&inode->i_lock); + if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) && + !(inode->i_state & I_FREEING)) { + wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW); + DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + spin_unlock(&inode->i_lock); + spin_unlock(&sb->s_inode_list_lock); + schedule(); + finish_wait(wq, &wait.wq_entry); + goto again; + } + + spin_unlock(&inode->i_lock); + } + spin_unlock(&sb->s_inode_list_lock); +} + static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; diff --git a/libbcachefs/fs.h b/libbcachefs/fs.h index 48fc504e..bf62e80f 100644 --- a/libbcachefs/fs.h +++ b/libbcachefs/fs.h @@ -184,13 +184,17 @@ int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *, int bch2_setattr_nonsize(struct user_namespace *, struct bch_inode_info *, struct iattr *); -int __bch2_unlink(struct inode *, struct dentry *, int); +int __bch2_unlink(struct inode *, struct dentry *, bool); + +void bch2_evict_subvolume_inodes(struct bch_fs *, struct snapshot_id_list *); void bch2_vfs_exit(void); int bch2_vfs_init(void); #else +static inline void bch2_evict_subvolume_inodes(struct bch_fs *c, + struct snapshot_id_list *s) {} static inline void bch2_vfs_exit(void) {} static inline int bch2_vfs_init(void) { return 0; } diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 197b9079..6b3eecde 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -256,7 +256,7 @@ retry: /* Subvolume root? */ if (inode_u.bi_subvol) { - ret = bch2_subvolume_delete(trans, inode_u.bi_subvol, -1); + ret = bch2_subvolume_delete(trans, inode_u.bi_subvol); if (ret) goto err; } @@ -992,12 +992,28 @@ static int check_subvols(struct bch_fs *c) struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; + struct bkey_s_c_subvolume subvol; int ret; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); for_each_btree_key(&trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { + if (k.k->type != KEY_TYPE_subvolume) + continue; + + subvol = bkey_s_c_to_subvolume(k); + + if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_LAZY_RW, + bch2_subvolume_delete(&trans, iter.pos.offset)); + if (ret) { + bch_err(c, "error deleting subvolume %llu: %i", + iter.pos.offset, ret); + break; + } + } } bch2_trans_iter_exit(&trans, &iter); @@ -2258,7 +2274,7 @@ static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter) u->v.front_pad = 0; u->v.back_pad = 0; - return bch2_trans_update(trans, iter, &u->k_i, 0); + return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN); } static int fix_reflink_p(struct bch_fs *c) diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 9130d571..462c1f43 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -709,11 +709,7 @@ retry: bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u); /* Subvolume root? */ - if (inode_u.bi_subvol) { - ret = bch2_subvolume_delete(&trans, inode_u.bi_subvol, -1); - if (ret) - goto err; - } + BUG_ON(inode_u.bi_subvol); bkey_inode_generation_init(&delete.k_i); delete.k.p = iter.pos; diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 6bf9c48a..da9c3ea5 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -1081,15 +1081,17 @@ int bch2_fs_recovery(struct bch_fs *c) set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags); } - if (c->sb.version < bcachefs_metadata_version_inode_backpointers) { - bch_info(c, "version prior to inode backpointers, upgrade and fsck required"); - c->opts.version_upgrade = true; - c->opts.fsck = true; - c->opts.fix_errors = FSCK_OPT_YES; - } else if (c->sb.version < bcachefs_metadata_version_subvol_dirent) { - bch_info(c, "filesystem version is prior to subvol_dirent - upgrading"); - c->opts.version_upgrade = true; - c->opts.fsck = true; + if (!c->opts.nochanges) { + if (c->sb.version < bcachefs_metadata_version_inode_backpointers) { + bch_info(c, "version prior to inode backpointers, upgrade and fsck required"); + c->opts.version_upgrade = true; + c->opts.fsck = true; + c->opts.fix_errors = FSCK_OPT_YES; + } else if (c->sb.version < bcachefs_metadata_version_subvol_dirent) { + bch_info(c, "filesystem version is prior to subvol_dirent - upgrading"); + c->opts.version_upgrade = true; + c->opts.fsck = true; + } } ret = bch2_blacklist_table_initialize(c); diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index 9bd8d61c..4d385c9e 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -4,6 +4,7 @@ #include "btree_key_cache.h" #include "btree_update.h" #include "error.h" +#include "fs.h" #include "subvolume.h" /* Snapshot tree: */ @@ -541,23 +542,6 @@ err: return ret; } -/* List of snapshot IDs that are being deleted: */ -struct snapshot_id_list { - u32 nr; - u32 size; - u32 *d; -}; - -static bool snapshot_list_has_id(struct snapshot_id_list *s, u32 id) -{ - unsigned i; - - for (i = 0; i < s->nr; i++) - if (id == s->d[i]) - return true; - return false; -} - static int snapshot_id_add(struct snapshot_id_list *s, u32 id) { BUG_ON(snapshot_list_has_id(s, id)); @@ -819,9 +803,11 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol, return ret; } -/* XXX: mark snapshot id for deletion, walk btree and delete: */ -int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid, - int deleting_snapshot) +/* + * Delete subvolume, mark snapshot ID as deleted, queue up snapshot + * deletion/cleanup: + */ +int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) { struct btree_iter iter; struct bkey_s_c k; @@ -849,12 +835,6 @@ int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid, subvol = bkey_s_c_to_subvolume(k); snapid = le32_to_cpu(subvol.v->snapshot); - if (deleting_snapshot >= 0 && - deleting_snapshot != BCH_SUBVOLUME_SNAP(subvol.v)) { - ret = -ENOENT; - goto err; - } - delete = bch2_trans_kmalloc(trans, sizeof(*delete)); ret = PTR_ERR_OR_ZERO(delete); if (ret) @@ -880,6 +860,116 @@ err: return ret; } +void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) +{ + struct bch_fs *c = container_of(work, struct bch_fs, + snapshot_wait_for_pagecache_and_delete_work); + struct snapshot_id_list s; + u32 *id; + int ret = 0; + + while (!ret) { + mutex_lock(&c->snapshots_unlinked_lock); + s = c->snapshots_unlinked; + memset(&c->snapshots_unlinked, 0, sizeof(c->snapshots_unlinked)); + mutex_unlock(&c->snapshots_unlinked_lock); + + if (!s.nr) + break; + + bch2_evict_subvolume_inodes(c, &s); + + for (id = s.d; id < s.d + s.nr; id++) { + ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL, + bch2_subvolume_delete(&trans, *id)); + if (ret) { + bch_err(c, "error %i deleting subvolume %u", ret, *id); + break; + } + } + + kfree(s.d); + } + + percpu_ref_put(&c->writes); +} + +struct subvolume_unlink_hook { + struct btree_trans_commit_hook h; + u32 subvol; +}; + +int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, + struct btree_trans_commit_hook *_h) +{ + struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h); + struct bch_fs *c = trans->c; + int ret = 0; + + mutex_lock(&c->snapshots_unlinked_lock); + if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) + ret = snapshot_id_add(&c->snapshots_unlinked, h->subvol); + mutex_unlock(&c->snapshots_unlinked_lock); + + if (ret) + return ret; + + if (unlikely(!percpu_ref_tryget(&c->writes))) + return -EROFS; + + if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work)) + percpu_ref_put(&c->writes); + return 0; +} + +int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_i_subvolume *n; + struct subvolume_unlink_hook *h; + int ret = 0; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes, + POS(0, subvolid), + BTREE_ITER_CACHED| + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type != KEY_TYPE_subvolume) { + bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvolid); + ret = -EIO; + goto err; + } + + n = bch2_trans_kmalloc(trans, sizeof(*n)); + ret = PTR_ERR_OR_ZERO(n); + if (ret) + goto err; + + bkey_reassemble(&n->k_i, k); + SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); + + ret = bch2_trans_update(trans, &iter, &n->k_i, 0); + if (ret) + goto err; + + h = bch2_trans_kmalloc(trans, sizeof(*h)); + ret = PTR_ERR_OR_ZERO(h); + if (ret) + goto err; + + h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook; + h->subvol = subvolid; + bch2_trans_commit_hook(trans, &h->h); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + int bch2_subvolume_create(struct btree_trans *trans, u64 inode, u32 src_subvolid, u32 *new_subvolid, @@ -977,5 +1067,8 @@ err: int bch2_fs_subvolumes_init(struct bch_fs *c) { INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work); + INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work, + bch2_subvolume_wait_for_pagecache_and_delete); + mutex_init(&c->snapshots_unlinked_lock); return 0; } diff --git a/libbcachefs/subvolume.h b/libbcachefs/subvolume.h index f98c8c0d..b5067dc6 100644 --- a/libbcachefs/subvolume.h +++ b/libbcachefs/subvolume.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_SUBVOLUME_H #define _BCACHEFS_SUBVOLUME_H +#include "subvolume_types.h" + void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); const char *bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c); @@ -92,6 +94,16 @@ static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, return 0; } +static inline bool snapshot_list_has_id(struct snapshot_id_list *s, u32 id) +{ + unsigned i; + + for (i = 0; i < s->nr; i++) + if (id == s->d[i]) + return true; + return false; +} + int bch2_fs_snapshots_check(struct bch_fs *); void bch2_fs_snapshots_exit(struct bch_fs *); int bch2_fs_snapshots_start(struct bch_fs *); @@ -108,7 +120,8 @@ int bch2_subvolume_get(struct btree_trans *, unsigned, bool, int, struct bch_subvolume *); int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); -int bch2_subvolume_delete(struct btree_trans *, u32, int); +int bch2_subvolume_delete(struct btree_trans *, u32); +int bch2_subvolume_unlink(struct btree_trans *, u32); int bch2_subvolume_create(struct btree_trans *, u64, u32, u32 *, u32 *, bool); diff --git a/libbcachefs/subvolume_types.h b/libbcachefs/subvolume_types.h new file mode 100644 index 00000000..9410b958 --- /dev/null +++ b/libbcachefs/subvolume_types.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SUBVOLUME_TYPES_H +#define _BCACHEFS_SUBVOLUME_TYPES_H + +struct snapshot_id_list { + u32 nr; + u32 size; + u32 *d; +}; + +#endif /* _BCACHEFS_SUBVOLUME_TYPES_H */ diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 486a0199..e0c93cb5 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -405,13 +405,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) if (ret) goto err; - /* - * We need to write out a journal entry before we start doing btree - * updates, to ensure that on unclean shutdown new journal blacklist - * entries are created: - */ - bch2_journal_meta(&c->journal); - clear_bit(BCH_FS_ALLOC_CLEAN, &c->flags); for_each_rw_member(ca, c, i)