diff --git a/.bcachefs_revision b/.bcachefs_revision index 2279c3ab..47478b73 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -e82e65627960a46945b78a5e5e946b23b8f08972 +0415b63b198ccf8bf5eee3af73accd60e94ad63a diff --git a/Makefile b/Makefile index 04d466ea..9859b020 100644 --- a/Makefile +++ b/Makefile @@ -75,7 +75,7 @@ deb: all .PHONE: update-bcachefs-sources update-bcachefs-sources: - git rm -rf libbcachefs + git rm -rf --ignore-unmatch libbcachefs cp $(LINUX_DIR)/fs/bcachefs/*.[ch] libbcachefs/ cp $(LINUX_DIR)/include/trace/events/bcachefs.h include/trace/events/ echo `cd $(LINUX_DIR); git rev-parse HEAD` > .bcachefs_revision diff --git a/cmd_migrate.c b/cmd_migrate.c index 0e7a882a..e79f2e06 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -130,14 +130,14 @@ static void create_dirent(struct bch_fs *c, struct bch_hash_info parent_hash_info = bch2_hash_info_init(c, parent); struct qstr qname = { { { .len = strlen(name), } }, .name = name }; - int ret = bch2_dirent_create(c, parent->inum, &parent_hash_info, + int ret = bch2_dirent_create(c, parent->bi_inum, &parent_hash_info, mode_to_type(mode), &qname, inum, NULL, BCH_HASH_SET_MUST_CREATE); if (ret) die("error creating file: %s", strerror(-ret)); if (S_ISDIR(mode)) - parent->i_nlink++; + parent->bi_nlink++; } static void create_link(struct bch_fs *c, @@ -149,7 +149,7 @@ static void create_link(struct bch_fs *c, if (ret) die("error looking up hardlink: %s", strerror(-ret)); - inode.i_nlink++; + inode.bi_nlink++; update_inode(c, &inode); create_dirent(c, parent, name, inum, mode); @@ -171,7 +171,7 @@ static struct bch_inode_unpacked create_file(struct bch_fs *c, if (ret) die("error creating file: %s", strerror(-ret)); - create_dirent(c, parent, name, new_inode.inum, mode); + create_dirent(c, parent, name, new_inode.bi_inum, mode); return new_inode; } @@ -207,9 +207,9 @@ static const struct xattr_handler *xattr_resolve_name(const char **name) static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst, struct stat *src) { - dst->i_atime = timespec_to_bch2_time(c, src->st_atim); - dst->i_mtime = timespec_to_bch2_time(c, src->st_mtim); - dst->i_ctime = timespec_to_bch2_time(c, src->st_ctim); + dst->bi_atime = timespec_to_bch2_time(c, src->st_atim); + dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim); + dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim); } static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst, @@ -236,7 +236,7 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst, const struct xattr_handler *h = xattr_resolve_name(&attr); - int ret = __bch2_xattr_set(c, dst->inum, &hash_info, attr, + int ret = __bch2_xattr_set(c, dst->bi_inum, &hash_info, attr, val, val_size, 0, h->flags, NULL); if (ret < 0) die("error creating xattr: %s", strerror(-ret)); @@ -266,11 +266,11 @@ static void write_data(struct bch_fs *c, die("error reserving space in new filesystem: %s", strerror(-ret)); bch2_write_op_init(&op, c, res, c->write_points, - POS(dst_inode->inum, dst_offset >> 9), NULL, 0); + POS(dst_inode->bi_inum, dst_offset >> 9), NULL, 0); closure_call(&op.cl, bch2_write, NULL, &cl); closure_sync(&cl); - dst_inode->i_sectors += len >> 9; + dst_inode->bi_sectors += len >> 9; } static char buf[1 << 20] __aligned(PAGE_SIZE); @@ -316,7 +316,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst, length); e = bkey_extent_init(&k.k); - e->k.p.inode = dst->inum; + e->k.p.inode = dst->bi_inum; e->k.p.offset = logical + sectors; e->k.size = sectors; extent_ptr_append(e, (struct bch_extent_ptr) { @@ -340,7 +340,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst, bch2_disk_reservation_put(c, &res); - dst->i_sectors += sectors; + dst->bi_sectors += sectors; logical += sectors; physical += sectors; length -= sectors; @@ -453,7 +453,7 @@ static void copy_dir(struct copy_fs_state *s, stat.st_mode, stat.st_rdev); if (dst_inum) - *dst_inum = inode.inum; + *dst_inum = inode.bi_inum; copy_times(c, &inode, &stat); copy_xattrs(c, &inode, d->d_name); @@ -467,14 +467,14 @@ static void copy_dir(struct copy_fs_state *s, close(fd); break; case DT_REG: - inode.i_size = stat.st_size; + inode.bi_size = stat.st_size; fd = xopen(d->d_name, O_RDONLY|O_NOATIME); copy_file(c, &inode, fd, child_path, &s->extents); close(fd); break; case DT_LNK: - inode.i_size = stat.st_size; + inode.bi_size = stat.st_size; copy_link(c, &inode, d->d_name); break; @@ -555,7 +555,7 @@ static void reserve_old_fs_space(struct bch_fs *c, dst = create_file(c, root_inode, "old_migrated_filesystem", 0, 0, S_IFREG|0400, 0); - dst.i_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9; + dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9; ranges_sort_merge(extents); diff --git a/libbcachefs.c b/libbcachefs.c index 90cc54fb..56851a04 100644 --- a/libbcachefs.c +++ b/libbcachefs.c @@ -189,6 +189,8 @@ struct bch_sb *bch2_format(struct format_opts opts, SET_BCH_SB_STR_HASH_TYPE(sb, BCH_STR_HASH_SIPHASH); SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb, ilog2(opts.encoded_extent_max)); + SET_BCH_SB_POSIX_ACL(sb, 1); + struct timespec now; if (clock_gettime(CLOCK_REALTIME, &now)) die("error getting current time: %m"); diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 690f4b5b..2632d21c 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -1,4 +1,4 @@ -#ifndef NO_BCACHEFS_FS +#ifdef CONFIG_BCACHEFS_POSIX_ACL #include "bcachefs.h" @@ -8,8 +8,9 @@ #include #include -#include "xattr.h" #include "acl.h" +#include "fs.h" +#include "xattr.h" /* * Convert from filesystem to in-memory representation. @@ -134,9 +135,10 @@ fail: return ERR_PTR(-EINVAL); } -struct posix_acl *bch2_get_acl(struct inode *inode, int type) +struct posix_acl *bch2_get_acl(struct inode *vinode, int type) { - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(vinode); + struct bch_fs *c = inode->v.i_sb->s_fs_info; int name_index; char *value = NULL; struct posix_acl *acl; @@ -169,14 +171,15 @@ struct posix_acl *bch2_get_acl(struct inode *inode, int type) kfree(value); if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); + set_cached_acl(&inode->v, type, acl); return acl; } -int bch2_set_acl(struct inode *inode, struct posix_acl *acl, int type) +int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type) { - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(vinode); + struct bch_fs *c = inode->v.i_sb->s_fs_info; int name_index; void *value = NULL; size_t size = 0; @@ -186,12 +189,13 @@ int bch2_set_acl(struct inode *inode, struct posix_acl *acl, int type) case ACL_TYPE_ACCESS: name_index = BCH_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - ret = posix_acl_equiv_mode(acl, &inode->i_mode); + ret = posix_acl_equiv_mode(acl, &inode->v.i_mode); if (ret < 0) return ret; else { - inode->i_ctime = current_fs_time(inode->i_sb); - mark_inode_dirty(inode); + inode->v.i_ctime = + current_fs_time(inode->v.i_sb); + mark_inode_dirty(&inode->v); if (ret == 0) acl = NULL; } @@ -200,7 +204,7 @@ int bch2_set_acl(struct inode *inode, struct posix_acl *acl, int type) case ACL_TYPE_DEFAULT: name_index = BCH_XATTR_INDEX_POSIX_ACL_DEFAULT; - if (!S_ISDIR(inode->i_mode)) + if (!S_ISDIR(inode->v.i_mode)) return acl ? -EACCES : 0; break; @@ -222,9 +226,9 @@ int bch2_set_acl(struct inode *inode, struct posix_acl *acl, int type) ret = -E2BIG; if (!ret) - set_cached_acl(inode, type, acl); + set_cached_acl(&inode->v, type, acl); return ret; } -#endif /* NO_BCACHEFS_FS */ +#endif /* CONFIG_BCACHEFS_POSIX_ACL */ diff --git a/libbcachefs/acl.h b/libbcachefs/acl.h index 539bca7e..b721330e 100644 --- a/libbcachefs/acl.h +++ b/libbcachefs/acl.h @@ -1,7 +1,7 @@ #ifndef _BCACHEFS_ACL_H #define _BCACHEFS_ACL_H -#ifndef NO_BCACHEFS_FS +#ifdef CONFIG_BCACHEFS_POSIX_ACL #define BCH_ACL_VERSION 0x0001 @@ -54,6 +54,13 @@ struct posix_acl; extern struct posix_acl *bch2_get_acl(struct inode *, int); extern int bch2_set_acl(struct inode *, struct posix_acl *, int); -#endif /* NO_BCACHEFS_FS */ +#else + +static inline int bch2_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + return 0; +} + +#endif /* CONFIG_BCACHEFS_POSIX_ACL */ #endif /* _BCACHEFS_ACL_H */ diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index dce8714b..1828bfdf 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -367,7 +367,7 @@ struct bch_dev { uuid_le uuid; char name[BDEVNAME_SIZE]; - struct bcache_superblock disk_sb; + struct bch_sb_handle disk_sb; int sb_write_error; struct bch_devs_mask self; @@ -445,6 +445,7 @@ struct bch_dev { * won't automatically reattach). */ enum { + BCH_FS_ALLOC_READ_DONE, BCH_FS_INITIAL_GC_DONE, BCH_FS_EMERGENCY_RO, BCH_FS_WRITE_DISABLE_COMPLETE, @@ -517,14 +518,11 @@ struct bch_fs { uuid_le uuid; uuid_le user_uuid; - u16 block_size; - u16 btree_node_size; u16 encoded_extent_max; u8 nr_devices; u8 clean; - u8 str_hash_type; u8 encryption_type; u64 time_base_lo; @@ -796,7 +794,7 @@ static inline unsigned bucket_bytes(const struct bch_dev *ca) static inline unsigned block_bytes(const struct bch_fs *c) { - return c->sb.block_size << 9; + return c->opts.block_size << 9; } #endif /* _BCACHEFS_H */ diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 463789d6..16a1edd1 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -9,45 +9,29 @@ #include #include -#define LE32_BITMASK(name, type, field, offset, end) \ +#define LE_BITMASK(_bits, name, type, field, offset, end) \ static const unsigned name##_OFFSET = offset; \ static const unsigned name##_BITS = (end - offset); \ -static const __u64 name##_MAX = (1ULL << (end - offset)) - 1; \ +static const __u##_bits name##_MAX = (1ULL << (end - offset)) - 1; \ \ static inline __u64 name(const type *k) \ { \ - return (__le32_to_cpu(k->field) >> offset) & \ + return (__le##_bits##_to_cpu(k->field) >> offset) & \ ~(~0ULL << (end - offset)); \ } \ \ static inline void SET_##name(type *k, __u64 v) \ { \ - __u64 new = __le32_to_cpu(k->field); \ + __u##_bits new = __le##_bits##_to_cpu(k->field); \ \ new &= ~(~(~0ULL << (end - offset)) << offset); \ new |= (v & ~(~0ULL << (end - offset))) << offset; \ - k->field = __cpu_to_le32(new); \ + k->field = __cpu_to_le##_bits(new); \ } -#define LE64_BITMASK(name, type, field, offset, end) \ -static const unsigned name##_OFFSET = offset; \ -static const unsigned name##_BITS = (end - offset); \ -static const __u64 name##_MAX = (1ULL << (end - offset)) - 1; \ - \ -static inline __u64 name(const type *k) \ -{ \ - return (__le64_to_cpu(k->field) >> offset) & \ - ~(~0ULL << (end - offset)); \ -} \ - \ -static inline void SET_##name(type *k, __u64 v) \ -{ \ - __u64 new = __le64_to_cpu(k->field); \ - \ - new &= ~(~(~0ULL << (end - offset)) << offset); \ - new |= (v & ~(~0ULL << (end - offset))) << offset; \ - k->field = __cpu_to_le64(new); \ -} +#define LE16_BITMASK(n, t, f, o, e) LE_BITMASK(16, n, t, f, o, e) +#define LE32_BITMASK(n, t, f, o, e) LE_BITMASK(32, n, t, f, o, e) +#define LE64_BITMASK(n, t, f, o, e) LE_BITMASK(64, n, t, f, o, e) struct bkey_format { __u8 key_u64s; @@ -592,9 +576,9 @@ enum bch_inode_types { struct bch_inode { struct bch_val v; - __le64 i_hash_seed; - __le32 i_flags; - __le16 i_mode; + __le64 bi_hash_seed; + __le32 bi_flags; + __le16 bi_mode; __u8 fields[0]; } __attribute__((packed, aligned(8))); BKEY_VAL_TYPE(inode, BCH_INODE_FS); @@ -602,24 +586,23 @@ BKEY_VAL_TYPE(inode, BCH_INODE_FS); struct bch_inode_generation { struct bch_val v; - __le32 i_generation; + __le32 bi_generation; __le32 pad; } __attribute__((packed, aligned(8))); BKEY_VAL_TYPE(inode_generation, BCH_INODE_GENERATION); - #define BCH_INODE_FIELDS() \ - BCH_INODE_FIELD(i_atime, 64) \ - BCH_INODE_FIELD(i_ctime, 64) \ - BCH_INODE_FIELD(i_mtime, 64) \ - BCH_INODE_FIELD(i_otime, 64) \ - BCH_INODE_FIELD(i_size, 64) \ - BCH_INODE_FIELD(i_sectors, 64) \ - BCH_INODE_FIELD(i_uid, 32) \ - BCH_INODE_FIELD(i_gid, 32) \ - BCH_INODE_FIELD(i_nlink, 32) \ - BCH_INODE_FIELD(i_generation, 32) \ - BCH_INODE_FIELD(i_dev, 32) + BCH_INODE_FIELD(bi_atime, 64) \ + BCH_INODE_FIELD(bi_ctime, 64) \ + BCH_INODE_FIELD(bi_mtime, 64) \ + BCH_INODE_FIELD(bi_otime, 64) \ + BCH_INODE_FIELD(bi_size, 64) \ + BCH_INODE_FIELD(bi_sectors, 64) \ + BCH_INODE_FIELD(bi_uid, 32) \ + BCH_INODE_FIELD(bi_gid, 32) \ + BCH_INODE_FIELD(bi_nlink, 32) \ + BCH_INODE_FIELD(bi_generation, 32) \ + BCH_INODE_FIELD(bi_dev, 32) enum { /* @@ -650,8 +633,8 @@ enum { #define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY) #define BCH_INODE_HAS_XATTRS (1 << __BCH_INODE_HAS_XATTRS) -LE32_BITMASK(INODE_STR_HASH, struct bch_inode, i_flags, 20, 24); -LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, i_flags, 24, 32); +LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); +LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 32); struct bch_inode_blockdev { struct bch_val v; @@ -960,6 +943,8 @@ struct bch_sb { * algorithm in use, if/when we get more than one */ +LE16_BITMASK(BCH_SB_BLOCK_SIZE, struct bch_sb, block_size, 0, 16); + LE64_BITMASK(BCH_SB_INITIALIZED, struct bch_sb, flags[0], 0, 1); LE64_BITMASK(BCH_SB_CLEAN, struct bch_sb, flags[0], 1, 2); LE64_BITMASK(BCH_SB_CSUM_TYPE, struct bch_sb, flags[0], 2, 8); @@ -976,7 +961,7 @@ LE64_BITMASK(BCH_SB_DATA_CSUM_TYPE, struct bch_sb, flags[0], 44, 48); LE64_BITMASK(BCH_SB_META_REPLICAS_WANT, struct bch_sb, flags[0], 48, 52); LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT, struct bch_sb, flags[0], 52, 56); -/* 56-64 unused, was REPLICAS_HAVE */ +LE64_BITMASK(BCH_SB_POSIX_ACL, struct bch_sb, flags[0], 56, 57); LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4); LE64_BITMASK(BCH_SB_COMPRESSION_TYPE, struct bch_sb, flags[1], 4, 8); diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index ce10a4a9..5e836acd 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -51,7 +51,7 @@ static inline bool btree_node_hashed(struct btree *b) static inline size_t btree_bytes(struct bch_fs *c) { - return c->sb.btree_node_size << 9; + return c->opts.btree_node_size << 9; } static inline size_t btree_max_u64s(struct bch_fs *c) @@ -71,7 +71,7 @@ static inline size_t btree_pages(struct bch_fs *c) static inline unsigned btree_blocks(struct bch_fs *c) { - return c->sb.btree_node_size >> c->block_bits; + return c->opts.btree_node_size >> c->block_bits; } #define BTREE_SPLIT_THRESHOLD(c) (btree_blocks(c) * 3 / 4) diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 2bd2887a..302546f2 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -116,7 +116,7 @@ static u8 bch2_btree_mark_key(struct bch_fs *c, enum bkey_type type, { switch (type) { case BKEY_TYPE_BTREE: - bch2_gc_mark_key(c, k, c->sb.btree_node_size, true, flags); + bch2_gc_mark_key(c, k, c->opts.btree_node_size, true, flags); return 0; case BKEY_TYPE_EXTENTS: bch2_gc_mark_key(c, k, k.k->size, false, flags); @@ -386,7 +386,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) for_each_pending_btree_node_free(c, as, d) if (d->index_update_done) __bch2_mark_key(c, bkey_i_to_s_c(&d->key), - c->sb.btree_node_size, true, + c->opts.btree_node_size, true, &stats, 0, BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE); /* diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 0eb27eae..e0735afa 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -916,7 +916,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b, return 0; } - if (b->written + sectors > c->sb.btree_node_size) { + if (b->written + sectors > c->opts.btree_node_size) { btree_node_error(c, b, "bset past end of btree node"); i->u64s = 0; return 0; @@ -1034,7 +1034,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b) if (bch2_meta_read_fault("btree")) goto err; - while (b->written < c->sb.btree_node_size) { + while (b->written < c->opts.btree_node_size) { unsigned sectors, whiteout_u64s = 0; if (!b->written) { @@ -1528,7 +1528,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, BUG_ON(!list_empty(&b->write_blocked)); BUG_ON((b->will_make_reachable != NULL) != !b->written); - BUG_ON(b->written >= c->sb.btree_node_size); + BUG_ON(b->written >= c->opts.btree_node_size); BUG_ON(bset_written(b, btree_bset_last(b))); BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c)); BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format))); @@ -1612,7 +1612,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, memset(data + bytes_to_write, 0, (sectors_to_write << 9) - bytes_to_write); - BUG_ON(b->written + sectors_to_write > c->sb.btree_node_size); + BUG_ON(b->written + sectors_to_write > c->opts.btree_node_size); BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); BUG_ON(i->seq != b->data->keys.seq); diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 98e85627..922a4863 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -181,7 +181,7 @@ found: */ replicas = bch2_extent_nr_dirty_ptrs(k); if (replicas) - stats->s[replicas - 1].data[S_META] -= c->sb.btree_node_size; + stats->s[replicas - 1].data[S_META] -= c->opts.btree_node_size; /* * We're dropping @k from the btree, but it's still live until the @@ -208,7 +208,7 @@ found: struct bch_fs_usage tmp = { 0 }; bch2_mark_key(c, bkey_i_to_s_c(&d->key), - -c->sb.btree_node_size, true, b + -c->opts.btree_node_size, true, b ? gc_pos_btree_node(b) : gc_pos_btree_root(as->btree_id), &tmp, 0); @@ -285,7 +285,7 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c, BUG_ON(!pending->index_update_done); bch2_mark_key(c, bkey_i_to_s_c(&pending->key), - -c->sb.btree_node_size, true, + -c->opts.btree_node_size, true, gc_phase(GC_PHASE_PENDING_DELETE), &stats, 0); /* @@ -337,7 +337,7 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, retry: /* alloc_sectors is weird, I suppose */ bkey_extent_init(&tmp.k); - tmp.k.k.size = c->sb.btree_node_size, + tmp.k.k.size = c->opts.btree_node_size, ob = bch2_alloc_sectors(c, &c->btree_write_point, bkey_i_to_extent(&tmp.k), @@ -347,7 +347,7 @@ retry: if (IS_ERR(ob)) return ERR_CAST(ob); - if (tmp.k.k.size < c->sb.btree_node_size) { + if (tmp.k.k.size < c->opts.btree_node_size) { bch2_open_bucket_put(c, ob); goto retry; } @@ -491,7 +491,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c, struct btree_reserve *reserve; struct btree *b; struct disk_reservation disk_res = { 0, 0 }; - unsigned sectors = nr_nodes * c->sb.btree_node_size; + unsigned sectors = nr_nodes * c->opts.btree_node_size; int ret, disk_res_flags = BCH_DISK_RESERVATION_GC_LOCK_HELD| BCH_DISK_RESERVATION_METADATA; @@ -1035,7 +1035,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) __bch2_btree_set_root_inmem(c, b); bch2_mark_key(c, bkey_i_to_s_c(&b->key), - c->sb.btree_node_size, true, + c->opts.btree_node_size, true, gc_pos_btree_root(b->btree_id), &stats, 0); @@ -1120,7 +1120,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b if (bkey_extent_is_data(&insert->k)) bch2_mark_key(c, bkey_i_to_s_c(insert), - c->sb.btree_node_size, true, + c->opts.btree_node_size, true, gc_pos_btree_node(b), &stats, 0); while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) && @@ -1901,7 +1901,7 @@ retry: bch2_btree_node_lock_write(b, &iter); bch2_mark_key(c, bkey_i_to_s_c(&new_key->k_i), - c->sb.btree_node_size, true, + c->opts.btree_node_size, true, gc_pos_btree_root(b->btree_id), &stats, 0); bch2_btree_node_free_index(as, NULL, diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h index 8f75963b..e129b24e 100644 --- a/libbcachefs/btree_update_interior.h +++ b/libbcachefs/btree_update_interior.h @@ -263,7 +263,7 @@ static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c, unsigned used = bset_byte_offset(b, vstruct_end(i)) / sizeof(u64) + b->whiteout_u64s + b->uncompacted_whiteout_u64s; - unsigned total = c->sb.btree_node_size << 6; + unsigned total = c->opts.btree_node_size << 6; EBUG_ON(used > total); diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 9be11217..fbc31012 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -469,6 +469,7 @@ static void bch2_mark_pointer(struct bch_fs *c, * checked the gen */ if (gen_after(new.gen, ptr->gen)) { + BUG_ON(!test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags)); EBUG_ON(!ptr->cached && test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)); return; diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c index f2883e1f..01bdc867 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/checksum.c @@ -417,9 +417,9 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key) } #endif -static int bch2_decrypt_sb_key(struct bch_fs *c, - struct bch_sb_field_crypt *crypt, - struct bch_key *key) +int bch2_decrypt_sb_key(struct bch_fs *c, + struct bch_sb_field_crypt *crypt, + struct bch_key *key) { struct bch_encrypted_key sb_key = crypt->key; struct bch_key user_key; diff --git a/libbcachefs/checksum.h b/libbcachefs/checksum.h index 15d15b92..e8f6ef41 100644 --- a/libbcachefs/checksum.h +++ b/libbcachefs/checksum.h @@ -40,6 +40,9 @@ struct bch_csum bch2_checksum_bio(struct bch_fs *, unsigned, void bch2_encrypt_bio(struct bch_fs *, unsigned, struct nonce, struct bio *); +int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *, + struct bch_key *); + int bch2_disable_encryption(struct bch_fs *); int bch2_enable_encryption(struct bch_fs *, bool); diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c index fba36c82..c8a03c7f 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/compress.c @@ -423,7 +423,7 @@ void bch2_bio_compress(struct bch_fs *c, /* If it's only one block, don't bother trying to compress: */ if (*compression_type != BCH_COMPRESSION_NONE && - bio_sectors(src) > c->sb.block_size && + bio_sectors(src) > c->opts.block_size && !__bio_compress(c, dst, dst_len, src, src_len, compression_type)) goto out; diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index 056715bc..a900d397 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -192,25 +192,23 @@ static void dirent_copy_target(struct bkey_i_dirent *dst, dst->v.d_type = src.v->d_type; } -static struct bpos bch2_dirent_pos(struct bch_inode_info *ei, +static struct bpos bch2_dirent_pos(struct bch_inode_info *inode, const struct qstr *name) { - return POS(ei->vfs_inode.i_ino, bch2_dirent_hash(&ei->str_hash, name)); + return POS(inode->v.i_ino, bch2_dirent_hash(&inode->ei_str_hash, name)); } int bch2_dirent_rename(struct bch_fs *c, - struct inode *src_dir, const struct qstr *src_name, - struct inode *dst_dir, const struct qstr *dst_name, - u64 *journal_seq, enum bch_rename_mode mode) + struct bch_inode_info *src_dir, const struct qstr *src_name, + struct bch_inode_info *dst_dir, const struct qstr *dst_name, + u64 *journal_seq, enum bch_rename_mode mode) { - struct bch_inode_info *src_ei = to_bch_ei(src_dir); - struct bch_inode_info *dst_ei = to_bch_ei(dst_dir); struct btree_iter src_iter, dst_iter, whiteout_iter; struct bkey_s_c old_src, old_dst; struct bkey delete; struct bkey_i_dirent *new_src = NULL, *new_dst = NULL; - struct bpos src_pos = bch2_dirent_pos(src_ei, src_name); - struct bpos dst_pos = bch2_dirent_pos(dst_ei, dst_name); + struct bpos src_pos = bch2_dirent_pos(src_dir, src_name); + struct bpos dst_pos = bch2_dirent_pos(dst_dir, dst_name); bool need_whiteout; int ret = -ENOMEM; @@ -241,13 +239,13 @@ retry: * in bch_hash_set) - we never move existing dirents to different slot: */ old_src = bch2_hash_lookup_at(bch2_dirent_hash_desc, - &src_ei->str_hash, + &src_dir->ei_str_hash, &src_iter, src_name); if ((ret = btree_iter_err(old_src))) goto err; ret = bch2_hash_needs_whiteout(bch2_dirent_hash_desc, - &src_ei->str_hash, + &src_dir->ei_str_hash, &whiteout_iter, &src_iter); if (ret < 0) goto err; @@ -261,7 +259,7 @@ retry: old_dst = mode == BCH_RENAME ? bch2_hash_hole_at(bch2_dirent_hash_desc, &dst_iter) : bch2_hash_lookup_at(bch2_dirent_hash_desc, - &dst_ei->str_hash, + &dst_dir->ei_str_hash, &dst_iter, dst_name); if ((ret = btree_iter_err(old_dst))) goto err; @@ -395,7 +393,7 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum) int bch2_readdir(struct bch_fs *c, struct file *file, struct dir_context *ctx) { - struct inode *inode = file_inode(file); + struct bch_inode_info *inode = file_bch_inode(file); struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_dirent dirent; @@ -404,29 +402,21 @@ int bch2_readdir(struct bch_fs *c, struct file *file, if (!dir_emit_dots(file, ctx)) return 0; - pr_debug("listing for %lu from %llu", inode->i_ino, ctx->pos); - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, - POS(inode->i_ino, ctx->pos), 0, k) { + POS(inode->v.i_ino, ctx->pos), 0, k) { if (k.k->type != BCH_DIRENT) continue; dirent = bkey_s_c_to_dirent(k); - pr_debug("saw %llu:%llu (%s) -> %llu", - k.k->p.inode, k.k->p.offset, - dirent.v->d_name, dirent.v->d_inum); - - if (bkey_cmp(k.k->p, POS(inode->i_ino, ctx->pos)) < 0) + if (bkey_cmp(k.k->p, POS(inode->v.i_ino, ctx->pos)) < 0) continue; - if (k.k->p.inode > inode->i_ino) + if (k.k->p.inode > inode->v.i_ino) break; len = bch2_dirent_name_bytes(dirent); - pr_debug("emitting %s", dirent.v->d_name); - /* * XXX: dir_emit() can fault and block, while we're holding * locks diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h index 9fe3d8f6..98405b5b 100644 --- a/libbcachefs/dirent.h +++ b/libbcachefs/dirent.h @@ -11,6 +11,7 @@ struct file; struct dir_context; struct bch_fs; struct bch_hash_info; +struct bch_inode_info; unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent); int bch2_dirent_create(struct bch_fs *c, u64, const struct bch_hash_info *, @@ -25,8 +26,8 @@ enum bch_rename_mode { }; int bch2_dirent_rename(struct bch_fs *, - struct inode *, const struct qstr *, - struct inode *, const struct qstr *, + struct bch_inode_info *, const struct qstr *, + struct bch_inode_info *, const struct qstr *, u64 *, enum bch_rename_mode); u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *, diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 9936d0ff..7c641bda 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -575,7 +575,7 @@ static const char *bch2_btree_ptr_invalid(const struct bch_fs *c, extent_for_each_ptr_crc(e, ptr, crc) { reason = extent_ptr_invalid(c, e, ptr, - c->sb.btree_node_size, + c->opts.btree_node_size, true); if (reason) return reason; @@ -610,6 +610,9 @@ static void btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, g = PTR_BUCKET(ca, ptr); replicas++; + if (!test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags)) + continue; + err = "stale"; if (ptr_stale(ca, ptr)) goto err; diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 6828221a..5eb62f9d 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -62,41 +62,41 @@ static int write_invalidate_inode_pages_range(struct address_space *mapping, /* i_size updates: */ -static int inode_set_size(struct bch_inode_info *ei, +static int inode_set_size(struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { loff_t *new_i_size = p; - lockdep_assert_held(&ei->update_lock); + lockdep_assert_held(&inode->ei_update_lock); - bi->i_size = *new_i_size; + bi->bi_size = *new_i_size; - if (atomic_long_read(&ei->i_size_dirty_count)) - bi->i_flags |= BCH_INODE_I_SIZE_DIRTY; + if (atomic_long_read(&inode->ei_size_dirty_count)) + bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY; else - bi->i_flags &= ~BCH_INODE_I_SIZE_DIRTY; + bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; return 0; } static int __must_check bch2_write_inode_size(struct bch_fs *c, - struct bch_inode_info *ei, - loff_t new_size) + struct bch_inode_info *inode, + loff_t new_size) { - return __bch2_write_inode(c, ei, inode_set_size, &new_size); + return __bch2_write_inode(c, inode, inode_set_size, &new_size); } -static inline void i_size_dirty_put(struct bch_inode_info *ei) +static inline void i_size_dirty_put(struct bch_inode_info *inode) { - atomic_long_dec_bug(&ei->i_size_dirty_count); + atomic_long_dec_bug(&inode->ei_size_dirty_count); } -static inline void i_size_dirty_get(struct bch_inode_info *ei) +static inline void i_size_dirty_get(struct bch_inode_info *inode) { - lockdep_assert_held(&ei->vfs_inode.i_rwsem); + lockdep_assert_held(&inode->v.i_rwsem); - atomic_long_inc(&ei->i_size_dirty_count); + atomic_long_inc(&inode->ei_size_dirty_count); } /* i_sectors accounting: */ @@ -114,63 +114,63 @@ i_sectors_hook_fn(struct extent_insert_hook *hook, int sign = bkey_extent_is_allocation(&insert->k) - (k.k && bkey_extent_is_allocation(k.k)); - EBUG_ON(!(h->ei->i_flags & BCH_INODE_I_SECTORS_DIRTY)); - EBUG_ON(!atomic_long_read(&h->ei->i_sectors_dirty_count)); + EBUG_ON(!(h->inode->ei_flags & BCH_INODE_I_SECTORS_DIRTY)); + EBUG_ON(!atomic_long_read(&h->inode->ei_sectors_dirty_count)); h->sectors += sectors * sign; return BTREE_HOOK_DO_INSERT; } -static int inode_set_i_sectors_dirty(struct bch_inode_info *ei, - struct bch_inode_unpacked *bi, void *p) +static int inode_set_i_sectors_dirty(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, void *p) { - BUG_ON(bi->i_flags & BCH_INODE_I_SECTORS_DIRTY); + BUG_ON(bi->bi_flags & BCH_INODE_I_SECTORS_DIRTY); - bi->i_flags |= BCH_INODE_I_SECTORS_DIRTY; + bi->bi_flags |= BCH_INODE_I_SECTORS_DIRTY; return 0; } -static int inode_clear_i_sectors_dirty(struct bch_inode_info *ei, +static int inode_clear_i_sectors_dirty(struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { - BUG_ON(!(bi->i_flags & BCH_INODE_I_SECTORS_DIRTY)); + BUG_ON(!(bi->bi_flags & BCH_INODE_I_SECTORS_DIRTY)); - bi->i_sectors = atomic64_read(&ei->i_sectors); - bi->i_flags &= ~BCH_INODE_I_SECTORS_DIRTY; + bi->bi_sectors = atomic64_read(&inode->ei_sectors); + bi->bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY; return 0; } -static void i_sectors_dirty_put(struct bch_inode_info *ei, +static void i_sectors_dirty_put(struct bch_fs *c, + struct bch_inode_info *inode, struct i_sectors_hook *h) { - struct inode *inode = &ei->vfs_inode; - if (h->sectors) { - spin_lock(&inode->i_lock); - inode->i_blocks += h->sectors; - spin_unlock(&inode->i_lock); + spin_lock(&inode->v.i_lock); + inode->v.i_blocks += h->sectors; + spin_unlock(&inode->v.i_lock); - atomic64_add(h->sectors, &ei->i_sectors); - EBUG_ON(atomic64_read(&ei->i_sectors) < 0); + atomic64_add(h->sectors, &inode->ei_sectors); + EBUG_ON(atomic64_read(&inode->ei_sectors) < 0); } - EBUG_ON(atomic_long_read(&ei->i_sectors_dirty_count) <= 0); + EBUG_ON(atomic_long_read(&inode->ei_sectors_dirty_count) <= 0); - mutex_lock(&ei->update_lock); + mutex_lock(&inode->ei_update_lock); - if (atomic_long_dec_and_test(&ei->i_sectors_dirty_count)) { - struct bch_fs *c = ei->vfs_inode.i_sb->s_fs_info; - int ret = __bch2_write_inode(c, ei, inode_clear_i_sectors_dirty, NULL); + if (atomic_long_dec_and_test(&inode->ei_sectors_dirty_count)) { + int ret = __bch2_write_inode(c, inode, + inode_clear_i_sectors_dirty, NULL); ret = ret; } - mutex_unlock(&ei->update_lock); + mutex_unlock(&inode->ei_update_lock); } -static int __must_check i_sectors_dirty_get(struct bch_inode_info *ei, +static int __must_check i_sectors_dirty_get(struct bch_fs *c, + struct bch_inode_info *inode, struct i_sectors_hook *h) { int ret = 0; @@ -178,24 +178,22 @@ static int __must_check i_sectors_dirty_get(struct bch_inode_info *ei, h->hook.fn = i_sectors_hook_fn; h->sectors = 0; #ifdef CONFIG_BCACHEFS_DEBUG - h->ei = ei; + h->inode = inode; #endif - if (atomic_long_inc_not_zero(&ei->i_sectors_dirty_count)) + if (atomic_long_inc_not_zero(&inode->ei_sectors_dirty_count)) return 0; - mutex_lock(&ei->update_lock); + mutex_lock(&inode->ei_update_lock); - if (!(ei->i_flags & BCH_INODE_I_SECTORS_DIRTY)) { - struct bch_fs *c = ei->vfs_inode.i_sb->s_fs_info; - - ret = __bch2_write_inode(c, ei, inode_set_i_sectors_dirty, NULL); - } + if (!(inode->ei_flags & BCH_INODE_I_SECTORS_DIRTY)) + ret = __bch2_write_inode(c, inode, inode_set_i_sectors_dirty, + NULL); if (!ret) - atomic_long_inc(&ei->i_sectors_dirty_count); + atomic_long_inc(&inode->ei_sectors_dirty_count); - mutex_unlock(&ei->update_lock); + mutex_unlock(&inode->ei_update_lock); return ret; } @@ -219,8 +217,7 @@ bchfs_extent_update_hook(struct extent_insert_hook *hook, { struct bchfs_extent_trans_hook *h = container_of(hook, struct bchfs_extent_trans_hook, hook); - struct bch_inode_info *ei = h->op->ei; - struct inode *inode = &ei->vfs_inode; + struct bch_inode_info *inode = h->op->inode; int sign = bkey_extent_is_allocation(&insert->k) - (k.k && bkey_extent_is_allocation(k.k)); s64 sectors = (s64) (next_pos.offset - committed_pos.offset) * sign; @@ -229,22 +226,22 @@ bchfs_extent_update_hook(struct extent_insert_hook *hook, BUG_ON((next_pos.offset << 9) > round_up(offset, PAGE_SIZE)); - /* XXX: ei->i_size locking */ - if (offset > ei->i_size) { - BUG_ON(ei->i_flags & BCH_INODE_I_SIZE_DIRTY); + /* XXX: inode->i_size locking */ + if (offset > inode->ei_size) { + BUG_ON(inode->ei_flags & BCH_INODE_I_SIZE_DIRTY); if (!h->need_inode_update) { h->need_inode_update = true; return BTREE_HOOK_RESTART_TRANS; } - h->inode_u.i_size = offset; + h->inode_u.bi_size = offset; do_pack = true; - ei->i_size = offset; + inode->ei_size = offset; if (h->op->is_dio) - i_size_write(inode, offset); + i_size_write(&inode->v, offset); } if (sectors) { @@ -253,17 +250,17 @@ bchfs_extent_update_hook(struct extent_insert_hook *hook, return BTREE_HOOK_RESTART_TRANS; } - h->inode_u.i_sectors += sectors; + h->inode_u.bi_sectors += sectors; do_pack = true; - atomic64_add(sectors, &ei->i_sectors); + atomic64_add(sectors, &inode->ei_sectors); h->op->sectors_added += sectors; if (h->op->is_dio) { - spin_lock(&inode->i_lock); - inode->i_blocks += sectors; - spin_unlock(&inode->i_lock); + spin_lock(&inode->v.i_lock); + inode->v.i_blocks += sectors; + spin_unlock(&inode->v.i_lock); } } @@ -283,7 +280,7 @@ static int bchfs_write_index_update(struct bch_write_op *wop) struct bkey_i *k = bch2_keylist_front(keys); int ret; - BUG_ON(k->k.p.inode != op->ei->vfs_inode.i_ino); + BUG_ON(k->k.p.inode != op->inode->v.i_ino); bch2_btree_iter_init(&extent_iter, wop->c, BTREE_ID_EXTENTS, bkey_start_pos(&bch2_keylist_front(keys)->k), @@ -301,9 +298,9 @@ static int bchfs_write_index_update(struct bch_write_op *wop) if (ret) goto err; - /* XXX: ei->i_size locking */ + /* XXX: inode->i_size locking */ k = bch2_keylist_front(keys); - if (min(k->k.p.offset << 9, op->new_i_size) > op->ei->i_size) + if (min(k->k.p.offset << 9, op->new_i_size) > op->inode->ei_size) hook.need_inode_update = true; if (hook.need_inode_update) { @@ -477,8 +474,8 @@ static int bch2_get_page_reservation(struct bch_fs *c, struct page *page, static void bch2_clear_page_bits(struct page *page) { - struct inode *inode = page->mapping->host; - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(page->mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; struct disk_reservation res = { .sectors = PAGE_SECTORS }; struct bch_page_state s; @@ -489,9 +486,9 @@ static void bch2_clear_page_bits(struct page *page) ClearPagePrivate(page); if (s.dirty_sectors) { - spin_lock(&inode->i_lock); - inode->i_blocks -= s.dirty_sectors; - spin_unlock(&inode->i_lock); + spin_lock(&inode->v.i_lock); + inode->v.i_blocks -= s.dirty_sectors; + spin_unlock(&inode->v.i_lock); } if (s.reserved) @@ -507,11 +504,11 @@ int bch2_set_page_dirty(struct page *page) ); if (old.dirty_sectors != new.dirty_sectors) { - struct inode *inode = page->mapping->host; + struct bch_inode_info *inode = to_bch_ei(page->mapping->host); - spin_lock(&inode->i_lock); - inode->i_blocks += new.dirty_sectors - old.dirty_sectors; - spin_unlock(&inode->i_lock); + spin_lock(&inode->v.i_lock); + inode->v.i_blocks += new.dirty_sectors - old.dirty_sectors; + spin_unlock(&inode->v.i_lock); } return __set_page_dirty_nobuffers(page); @@ -698,7 +695,7 @@ static void readpage_bio_extend(struct readpages_iter *iter, } static void bchfs_read(struct bch_fs *c, struct btree_iter *iter, - struct bch_read_bio *rbio, u64 inode, + struct bch_read_bio *rbio, u64 inum, struct readpages_iter *readpages_iter) { struct bio *bio = &rbio->bio; @@ -712,7 +709,7 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter, unsigned bytes; bool is_last; - bch2_btree_iter_set_pos(iter, POS(inode, bio->bi_iter.bi_sector)); + bch2_btree_iter_set_pos(iter, POS(inum, bio->bi_iter.bi_sector)); k = bch2_btree_iter_peek_with_holes(iter); BUG_ON(!k.k); @@ -779,10 +776,10 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter, } int bch2_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) + struct list_head *pages, unsigned nr_pages) { - struct inode *inode = mapping->host; - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_iter iter; struct page *page; struct readpages_iter readpages_iter = { @@ -809,7 +806,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping, rbio->bio.bi_end_io = bch2_readpages_end_io; bio_add_page_contig(&rbio->bio, page); - bchfs_read(c, &iter, rbio, inode->i_ino, &readpages_iter); + bchfs_read(c, &iter, rbio, inode->v.i_ino, &readpages_iter); } if (current->pagecache_lock != &mapping->add_lock) @@ -819,7 +816,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping, } static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, - u64 inode, struct page *page) + u64 inum, struct page *page) { struct btree_iter iter; @@ -844,20 +841,19 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, bio_add_page_contig(&rbio->bio, page); bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0); - bchfs_read(c, &iter, rbio, inode, NULL); + bchfs_read(c, &iter, rbio, inum, NULL); } int bch2_readpage(struct file *file, struct page *page) { - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(page->mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_read_bio *rbio; rbio = to_rbio(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read)); rbio->bio.bi_end_io = bch2_readpages_end_io; - __bchfs_readpage(c, rbio, inode->i_ino, page); + __bchfs_readpage(c, rbio, inode->v.i_ino, page); return 0; } @@ -921,11 +917,11 @@ static void bch2_writepage_io_done(struct closure *cl) * before calling end_page_writeback: */ if (io->op.sectors_added) { - struct inode *inode = &io->op.ei->vfs_inode; + struct bch_inode_info *inode = io->op.inode; - spin_lock(&inode->i_lock); - inode->i_blocks += io->op.sectors_added; - spin_unlock(&inode->i_lock); + spin_lock(&inode->v.i_lock); + inode->v.i_blocks += io->op.sectors_added; + spin_unlock(&inode->v.i_lock); } bio_for_each_segment_all(bvec, bio, i) @@ -954,10 +950,10 @@ static void bch2_writepage_do_io(struct bch_writepage_state *w) */ static void bch2_writepage_io_alloc(struct bch_fs *c, struct bch_writepage_state *w, - struct bch_inode_info *ei, + struct bch_inode_info *inode, struct page *page) { - u64 inum = ei->vfs_inode.i_ino; + u64 inum = inode->v.i_ino; unsigned nr_replicas = page_state(page)->nr_replicas; EBUG_ON(!nr_replicas); @@ -971,17 +967,17 @@ alloc_io: struct bch_writepage_io, op.op.wbio.bio); closure_init(&w->io->cl, NULL); - w->io->op.ei = ei; + w->io->op.inode = inode; w->io->op.sectors_added = 0; w->io->op.is_dio = false; bch2_write_op_init(&w->io->op.op, c, - (struct disk_reservation) { + (struct disk_reservation) { .nr_replicas = c->opts.data_replicas, - }, - foreground_write_point(c, ei->last_dirtied), - POS(inum, 0), - &ei->journal_seq, - BCH_WRITE_THROTTLE); + }, + foreground_write_point(c, inode->ei_last_dirtied), + POS(inum, 0), + &inode->ei_journal_seq, + BCH_WRITE_THROTTLE); w->io->op.op.index_update_fn = bchfs_write_index_update; } @@ -995,18 +991,17 @@ alloc_io: * We shouldn't ever be handed pages for multiple inodes in a single * pass - right? */ - BUG_ON(ei != w->io->op.ei); + BUG_ON(inode != w->io->op.inode); } static int __bch2_writepage(struct bch_fs *c, struct page *page, struct writeback_control *wbc, struct bch_writepage_state *w) { - struct inode *inode = page->mapping->host; - struct bch_inode_info *ei = to_bch_ei(inode); + struct bch_inode_info *inode = to_bch_ei(page->mapping->host); struct bch_page_state new, old; unsigned offset; - loff_t i_size = i_size_read(inode); + loff_t i_size = i_size_read(&inode->v); pgoff_t end_index = i_size >> PAGE_SHIFT; EBUG_ON(!PageUptodate(page)); @@ -1031,7 +1026,7 @@ static int __bch2_writepage(struct bch_fs *c, struct page *page, */ zero_user_segment(page, offset, PAGE_SIZE); do_io: - bch2_writepage_io_alloc(c, w, ei, page); + bch2_writepage_io_alloc(c, w, inode, page); /* while page is locked: */ w->io->op.new_i_size = i_size; @@ -1233,8 +1228,8 @@ static void bch2_read_single_page_end_io(struct bio *bio) static int bch2_read_single_page(struct page *page, struct address_space *mapping) { - struct inode *inode = mapping->host; - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_read_bio *rbio; int ret; DECLARE_COMPLETION_ONSTACK(done); @@ -1243,7 +1238,7 @@ static int bch2_read_single_page(struct page *page, rbio->bio.bi_private = &done; rbio->bio.bi_end_io = bch2_read_single_page_end_io; - __bchfs_readpage(c, rbio, inode->i_ino, page); + __bchfs_readpage(c, rbio, inode->v.i_ino, page); wait_for_completion(&done); ret = rbio->bio.bi_error; @@ -1260,14 +1255,14 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - struct inode *inode = mapping->host; - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; pgoff_t index = pos >> PAGE_SHIFT; unsigned offset = pos & (PAGE_SIZE - 1); struct page *page; int ret = -ENOMEM; - BUG_ON(inode_unhashed(mapping->host)); + BUG_ON(inode_unhashed(&inode->v)); /* Not strictly necessary - same reason as mkwrite(): */ pagecache_add_get(&mapping->add_lock); @@ -1283,13 +1278,13 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, if (len == PAGE_SIZE) goto out; - if (!offset && pos + len >= inode->i_size) { + if (!offset && pos + len >= inode->v.i_size) { zero_user_segment(page, len, PAGE_SIZE); flush_dcache_page(page); goto out; } - if (index > inode->i_size >> PAGE_SHIFT) { + if (index > inode->v.i_size >> PAGE_SHIFT) { zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE); flush_dcache_page(page); goto out; @@ -1329,11 +1324,10 @@ int bch2_write_end(struct file *filp, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { - struct inode *inode = page->mapping->host; - struct bch_inode_info *ei = to_bch_ei(inode); - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(page->mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; - lockdep_assert_held(&inode->i_rwsem); + lockdep_assert_held(&inode->v.i_rwsem); if (unlikely(copied < len && !PageUptodate(page))) { /* @@ -1346,8 +1340,8 @@ int bch2_write_end(struct file *filp, struct address_space *mapping, copied = 0; } - if (pos + copied > inode->i_size) - i_size_write(inode, pos + copied); + if (pos + copied > inode->v.i_size) + i_size_write(&inode->v, pos + copied); if (copied) { if (!PageUptodate(page)) @@ -1355,7 +1349,7 @@ int bch2_write_end(struct file *filp, struct address_space *mapping, if (!PageDirty(page)) set_page_dirty(page); - ei->last_dirtied = (unsigned long) current; + inode->ei_last_dirtied = (unsigned long) current; } else { bch2_put_page_reservation(c, page); } @@ -1394,7 +1388,7 @@ static void bch2_direct_IO_read_split_endio(struct bio *bio) } static int bch2_direct_IO_read(struct bch_fs *c, struct kiocb *req, - struct file *file, struct inode *inode, + struct file *file, struct bch_inode_info *inode, struct iov_iter *iter, loff_t offset) { struct dio_read *dio; @@ -1406,7 +1400,7 @@ static int bch2_direct_IO_read(struct bch_fs *c, struct kiocb *req, return -EINVAL; ret = min_t(loff_t, iter->count, - max_t(loff_t, 0, i_size_read(inode) - offset)); + max_t(loff_t, 0, i_size_read(&inode->v) - offset)); iov_iter_truncate(iter, round_up(ret, block_bytes(c))); if (!ret) @@ -1464,7 +1458,7 @@ start: if (iter->count) closure_get(&dio->cl); - bch2_read(c, to_rbio(bio), inode->i_ino); + bch2_read(c, to_rbio(bio), inode->v.i_ino); } if (sync) { @@ -1482,13 +1476,13 @@ static long __bch2_dio_write_complete(struct dio_write *dio) { struct file *file = dio->req->ki_filp; struct address_space *mapping = file->f_mapping; - struct inode *inode = file->f_inode; + struct bch_inode_info *inode = file_bch_inode(file); long ret = dio->error ?: dio->written; bch2_disk_reservation_put(dio->c, &dio->res); __pagecache_block_put(&mapping->add_lock); - inode_dio_end(inode); + inode_dio_end(&inode->v); if (dio->iovec && dio->iovec != dio->inline_vecs) kfree(dio->iovec); @@ -1525,8 +1519,7 @@ static void bch2_dio_write_done(struct dio_write *dio) static void bch2_do_direct_IO_write(struct dio_write *dio) { struct file *file = dio->req->ki_filp; - struct inode *inode = file->f_inode; - struct bch_inode_info *ei = to_bch_ei(inode); + struct bch_inode_info *inode = file_bch_inode(file); struct bio *bio = &dio->iop.op.wbio.bio; unsigned flags = 0; int ret; @@ -1547,15 +1540,15 @@ static void bch2_do_direct_IO_write(struct dio_write *dio) return; } - dio->iop.ei = ei; + dio->iop.inode = inode; dio->iop.sectors_added = 0; dio->iop.is_dio = true; dio->iop.new_i_size = U64_MAX; bch2_write_op_init(&dio->iop.op, dio->c, dio->res, - foreground_write_point(dio->c, (unsigned long) current), - POS(inode->i_ino, (dio->offset + dio->written) >> 9), - &ei->journal_seq, - flags|BCH_WRITE_THROTTLE); + foreground_write_point(dio->c, (unsigned long) current), + POS(inode->v.i_ino, (dio->offset + dio->written) >> 9), + &inode->ei_journal_seq, + flags|BCH_WRITE_THROTTLE); dio->iop.op.index_update_fn = bchfs_write_index_update; dio->res.sectors -= bio_sectors(bio); @@ -1594,8 +1587,9 @@ static void bch2_dio_write_loop_async(struct closure *cl) } } -static int bch2_direct_IO_write(struct bch_fs *c, struct kiocb *req, - struct file *file, struct inode *inode, +static int bch2_direct_IO_write(struct bch_fs *c, + struct kiocb *req, struct file *file, + struct bch_inode_info *inode, struct iov_iter *iter, loff_t offset) { struct address_space *mapping = file->f_mapping; @@ -1604,7 +1598,7 @@ static int bch2_direct_IO_write(struct bch_fs *c, struct kiocb *req, ssize_t ret; bool sync = is_sync_kiocb(req); - lockdep_assert_held(&inode->i_rwsem); + lockdep_assert_held(&inode->v.i_rwsem); if (unlikely(!iter->count)) return 0; @@ -1626,7 +1620,7 @@ static int bch2_direct_IO_write(struct bch_fs *c, struct kiocb *req, dio->mm = current->mm; closure_init(&dio->cl, NULL); - if (offset + iter->count > inode->i_size) + if (offset + iter->count > inode->v.i_size) sync = true; /* @@ -1644,7 +1638,7 @@ static int bch2_direct_IO_write(struct bch_fs *c, struct kiocb *req, return ret; } - inode_dio_begin(inode); + inode_dio_begin(&inode->v); __pagecache_block_get(&mapping->add_lock); if (sync) { @@ -1685,8 +1679,8 @@ static int bch2_direct_IO_write(struct bch_fs *c, struct kiocb *req, ssize_t bch2_direct_IO(struct kiocb *req, struct iov_iter *iter) { struct file *file = req->ki_filp; - struct inode *inode = file->f_inode; - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = file_bch_inode(file); + struct bch_fs *c = inode->v.i_sb->s_fs_info; struct blk_plug plug; ssize_t ret; @@ -1703,8 +1697,8 @@ static ssize_t bch2_direct_write(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; - struct inode *inode = file->f_inode; - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = file_bch_inode(file); + struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = file->f_mapping; loff_t pos = iocb->ki_pos; ssize_t ret; @@ -1727,12 +1721,11 @@ err: static ssize_t __bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; + struct bch_inode_info *inode = file_bch_inode(file); ssize_t ret; /* We can write back this queue in page reclaim */ - current->backing_dev_info = inode_to_bdi(inode); + current->backing_dev_info = inode_to_bdi(&inode->v); ret = file_remove_privs(file); if (ret) goto out; @@ -1754,16 +1747,15 @@ out: ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) { - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; + struct bch_inode_info *inode = file_bch_inode(iocb->ki_filp); bool direct = iocb->ki_flags & IOCB_DIRECT; ssize_t ret; - inode_lock(inode); + inode_lock(&inode->v); ret = generic_write_checks(iocb, from); if (ret > 0) ret = __bch2_write_iter(iocb, from); - inode_unlock(inode); + inode_unlock(&inode->v); if (ret > 0 && !direct) ret = generic_write_sync(iocb, ret); @@ -1775,12 +1767,12 @@ int bch2_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; struct file *file = vmf->vma->vm_file; - struct inode *inode = file_inode(file); - struct address_space *mapping = inode->i_mapping; - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = file_bch_inode(file); + struct address_space *mapping = inode->v.i_mapping; + struct bch_fs *c = inode->v.i_sb->s_fs_info; int ret = VM_FAULT_LOCKED; - sb_start_pagefault(inode->i_sb); + sb_start_pagefault(inode->v.i_sb); file_update_time(file); /* @@ -1794,7 +1786,7 @@ int bch2_page_mkwrite(struct vm_fault *vmf) lock_page(page); if (page->mapping != mapping || - page_offset(page) > i_size_read(inode)) { + page_offset(page) > i_size_read(&inode->v)) { unlock_page(page); ret = VM_FAULT_NOPAGE; goto out; @@ -1812,7 +1804,7 @@ int bch2_page_mkwrite(struct vm_fault *vmf) out: if (current->pagecache_lock != &mapping->add_lock) pagecache_add_put(&mapping->add_lock); - sb_end_pagefault(inode->i_sb); + sb_end_pagefault(inode->v.i_sb); return ret; } @@ -1862,26 +1854,25 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage, int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct inode *inode = file->f_mapping->host; - struct bch_inode_info *ei = to_bch_ei(inode); - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = file_bch_inode(file); + struct bch_fs *c = inode->v.i_sb->s_fs_info; int ret; - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + ret = filemap_write_and_wait_range(inode->v.i_mapping, start, end); if (ret) return ret; if (c->opts.journal_flush_disabled) return 0; - return bch2_journal_flush_seq(&c->journal, ei->journal_seq); + return bch2_journal_flush_seq(&c->journal, inode->ei_journal_seq); } static int __bch2_truncate_page(struct address_space *mapping, pgoff_t index, loff_t start, loff_t end) { - struct inode *inode = mapping->host; - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; unsigned start_offset = start & (PAGE_SIZE - 1); unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1; struct page *page; @@ -1893,7 +1884,7 @@ static int __bch2_truncate_page(struct address_space *mapping, return 0; /* Above i_size? */ - if (index << PAGE_SHIFT >= inode->i_size) + if (index << PAGE_SHIFT >= inode->v.i_size) return 0; page = find_lock_page(mapping, index); @@ -1906,10 +1897,10 @@ static int __bch2_truncate_page(struct address_space *mapping, * page */ for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, - POS(inode->i_ino, + POS(inode->v.i_ino, index << PAGE_SECTOR_SHIFT), 0, k) { if (bkey_cmp(bkey_start_pos(k.k), - POS(inode->i_ino, + POS(inode->v.i_ino, (index + 1) << PAGE_SECTOR_SHIFT)) >= 0) break; @@ -1967,30 +1958,30 @@ static int bch2_truncate_page(struct address_space *mapping, loff_t from) from, from + PAGE_SIZE); } -int bch2_truncate(struct inode *inode, struct iattr *iattr) +int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) { - struct address_space *mapping = inode->i_mapping; - struct bch_inode_info *ei = to_bch_ei(inode); - struct bch_fs *c = inode->i_sb->s_fs_info; - bool shrink = iattr->ia_size <= inode->i_size; + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct address_space *mapping = inode->v.i_mapping; + bool shrink = iattr->ia_size <= inode->v.i_size; int ret = 0; - inode_dio_wait(inode); + inode_dio_wait(&inode->v); pagecache_block_get(&mapping->add_lock); - truncate_setsize(inode, iattr->ia_size); + truncate_setsize(&inode->v, iattr->ia_size); /* sync appends.. */ - /* XXX what protects ei->i_size? */ - if (iattr->ia_size > ei->i_size) - ret = filemap_write_and_wait_range(mapping, ei->i_size, S64_MAX); + /* XXX what protects inode->i_size? */ + if (iattr->ia_size > inode->ei_size) + ret = filemap_write_and_wait_range(mapping, + inode->ei_size, S64_MAX); if (ret) goto err_put_pagecache; - mutex_lock(&ei->update_lock); - i_size_dirty_get(ei); - ret = bch2_write_inode_size(c, ei, inode->i_size); - mutex_unlock(&ei->update_lock); + mutex_lock(&inode->ei_update_lock); + i_size_dirty_get(inode); + ret = bch2_write_inode_size(c, inode, inode->v.i_size); + mutex_unlock(&inode->ei_update_lock); if (unlikely(ret)) goto err; @@ -2005,71 +1996,73 @@ int bch2_truncate(struct inode *inode, struct iattr *iattr) struct i_sectors_hook i_sectors_hook; int ret; - ret = i_sectors_dirty_get(ei, &i_sectors_hook); + ret = i_sectors_dirty_get(c, inode, &i_sectors_hook); if (unlikely(ret)) goto err; - ret = bch2_truncate_page(inode->i_mapping, iattr->ia_size); + ret = bch2_truncate_page(inode->v.i_mapping, iattr->ia_size); if (unlikely(ret)) { - i_sectors_dirty_put(ei, &i_sectors_hook); + i_sectors_dirty_put(c, inode, &i_sectors_hook); goto err; } - ret = bch2_inode_truncate(c, inode->i_ino, + ret = bch2_inode_truncate(c, inode->v.i_ino, round_up(iattr->ia_size, PAGE_SIZE) >> 9, &i_sectors_hook.hook, - &ei->journal_seq); + &inode->ei_journal_seq); - i_sectors_dirty_put(ei, &i_sectors_hook); + i_sectors_dirty_put(c, inode, &i_sectors_hook); if (unlikely(ret)) goto err; } - mutex_lock(&ei->update_lock); - setattr_copy(inode, iattr); - inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); -err: + mutex_lock(&inode->ei_update_lock); + setattr_copy(&inode->v, iattr); + inode->v.i_mtime = inode->v.i_ctime = current_fs_time(inode->v.i_sb); +out: /* clear I_SIZE_DIRTY: */ - i_size_dirty_put(ei); - ret = bch2_write_inode_size(c, ei, inode->i_size); - mutex_unlock(&ei->update_lock); + i_size_dirty_put(inode); + ret = bch2_write_inode_size(c, inode, inode->v.i_size); + mutex_unlock(&inode->ei_update_lock); err_put_pagecache: pagecache_block_put(&mapping->add_lock); return ret; +err: + mutex_lock(&inode->ei_update_lock); + goto out; } -static long bch2_fpunch(struct inode *inode, loff_t offset, loff_t len) +static long bch2_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) { - struct address_space *mapping = inode->i_mapping; - struct bch_inode_info *ei = to_bch_ei(inode); - struct bch_fs *c = inode->i_sb->s_fs_info; - u64 ino = inode->i_ino; + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct address_space *mapping = inode->v.i_mapping; + u64 ino = inode->v.i_ino; u64 discard_start = round_up(offset, PAGE_SIZE) >> 9; u64 discard_end = round_down(offset + len, PAGE_SIZE) >> 9; int ret = 0; - inode_lock(inode); - inode_dio_wait(inode); + inode_lock(&inode->v); + inode_dio_wait(&inode->v); pagecache_block_get(&mapping->add_lock); - ret = __bch2_truncate_page(inode->i_mapping, - offset >> PAGE_SHIFT, - offset, offset + len); + ret = __bch2_truncate_page(mapping, + offset >> PAGE_SHIFT, + offset, offset + len); if (unlikely(ret)) goto out; if (offset >> PAGE_SHIFT != (offset + len) >> PAGE_SHIFT) { - ret = __bch2_truncate_page(inode->i_mapping, - (offset + len) >> PAGE_SHIFT, - offset, offset + len); + ret = __bch2_truncate_page(mapping, + (offset + len) >> PAGE_SHIFT, + offset, offset + len); if (unlikely(ret)) goto out; } - truncate_pagecache_range(inode, offset, offset + len - 1); + truncate_pagecache_range(&inode->v, offset, offset + len - 1); if (discard_start < discard_end) { struct disk_reservation disk_res; @@ -2078,7 +2071,7 @@ static long bch2_fpunch(struct inode *inode, loff_t offset, loff_t len) BUG_ON(bch2_disk_reservation_get(c, &disk_res, 0, 0)); - ret = i_sectors_dirty_get(ei, &i_sectors_hook); + ret = i_sectors_dirty_get(c, inode, &i_sectors_hook); if (unlikely(ret)) goto out; @@ -2089,23 +2082,23 @@ static long bch2_fpunch(struct inode *inode, loff_t offset, loff_t len) ZERO_VERSION, &disk_res, &i_sectors_hook.hook, - &ei->journal_seq); + &inode->ei_journal_seq); - i_sectors_dirty_put(ei, &i_sectors_hook); + i_sectors_dirty_put(c, inode, &i_sectors_hook); bch2_disk_reservation_put(c, &disk_res); } out: pagecache_block_put(&mapping->add_lock); - inode_unlock(inode); + inode_unlock(&inode->v); return ret; } -static long bch2_fcollapse(struct inode *inode, loff_t offset, loff_t len) +static long bch2_fcollapse(struct bch_inode_info *inode, + loff_t offset, loff_t len) { - struct address_space *mapping = inode->i_mapping; - struct bch_inode_info *ei = to_bch_ei(inode); - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct address_space *mapping = inode->v.i_mapping; struct btree_iter src; struct btree_iter dst; BKEY_PADDED(k) copy; @@ -2118,7 +2111,7 @@ static long bch2_fcollapse(struct inode *inode, loff_t offset, loff_t len) return -EINVAL; bch2_btree_iter_init(&dst, c, BTREE_ID_EXTENTS, - POS(inode->i_ino, offset >> 9), + POS(inode->v.i_ino, offset >> 9), BTREE_ITER_INTENT); /* position will be set from dst iter's position: */ bch2_btree_iter_init(&src, c, BTREE_ID_EXTENTS, POS_MIN, 0); @@ -2130,30 +2123,29 @@ static long bch2_fcollapse(struct inode *inode, loff_t offset, loff_t len) * locking for the extents btree itself, because we're using linked * iterators */ - inode_lock(inode); - inode_dio_wait(inode); + inode_lock(&inode->v); + inode_dio_wait(&inode->v); pagecache_block_get(&mapping->add_lock); ret = -EINVAL; - if (offset + len >= inode->i_size) + if (offset + len >= inode->v.i_size) goto err; - if (inode->i_size < len) + if (inode->v.i_size < len) goto err; - new_size = inode->i_size - len; + new_size = inode->v.i_size - len; - ret = write_invalidate_inode_pages_range(inode->i_mapping, - offset, LLONG_MAX); + ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); if (ret) goto err; - ret = i_sectors_dirty_get(ei, &i_sectors_hook); + ret = i_sectors_dirty_get(c, inode, &i_sectors_hook); if (ret) goto err; while (bkey_cmp(dst.pos, - POS(inode->i_ino, + POS(inode->v.i_ino, round_up(new_size, PAGE_SIZE) >> 9)) < 0) { struct disk_reservation disk_res; @@ -2183,10 +2175,10 @@ static long bch2_fcollapse(struct inode *inode, loff_t offset, loff_t len) BUG_ON(ret); ret = bch2_btree_insert_at(c, &disk_res, &i_sectors_hook.hook, - &ei->journal_seq, - BTREE_INSERT_ATOMIC| - BTREE_INSERT_NOFAIL, - BTREE_INSERT_ENTRY(&dst, ©.k)); + &inode->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOFAIL, + BTREE_INSERT_ENTRY(&dst, ©.k)); bch2_disk_reservation_put(c, &disk_res); btree_iter_err: if (ret < 0 && ret != -EINTR) @@ -2198,22 +2190,22 @@ btree_iter_err: bch2_btree_iter_unlock(&src); bch2_btree_iter_unlock(&dst); - ret = bch2_inode_truncate(c, inode->i_ino, + ret = bch2_inode_truncate(c, inode->v.i_ino, round_up(new_size, PAGE_SIZE) >> 9, &i_sectors_hook.hook, - &ei->journal_seq); + &inode->ei_journal_seq); if (ret) goto err_unwind; - i_sectors_dirty_put(ei, &i_sectors_hook); + i_sectors_dirty_put(c, inode, &i_sectors_hook); - mutex_lock(&ei->update_lock); - i_size_write(inode, new_size); - ret = bch2_write_inode_size(c, ei, inode->i_size); - mutex_unlock(&ei->update_lock); + mutex_lock(&inode->ei_update_lock); + i_size_write(&inode->v, new_size); + ret = bch2_write_inode_size(c, inode, inode->v.i_size); + mutex_unlock(&inode->ei_update_lock); pagecache_block_put(&mapping->add_lock); - inode_unlock(inode); + inode_unlock(&inode->v); return ret; err_unwind: @@ -2221,21 +2213,20 @@ err_unwind: * XXX: we've left data with multiple pointers... which isn't a _super_ * serious problem... */ - i_sectors_dirty_put(ei, &i_sectors_hook); + i_sectors_dirty_put(c, inode, &i_sectors_hook); err: bch2_btree_iter_unlock(&src); bch2_btree_iter_unlock(&dst); pagecache_block_put(&mapping->add_lock); - inode_unlock(inode); + inode_unlock(&inode->v); return ret; } -static long bch2_fallocate(struct inode *inode, int mode, +static long bch2_fallocate(struct bch_inode_info *inode, int mode, loff_t offset, loff_t len) { - struct address_space *mapping = inode->i_mapping; - struct bch_inode_info *ei = to_bch_ei(inode); - struct bch_fs *c = inode->i_sb->s_fs_info; + struct address_space *mapping = inode->v.i_mapping; + struct bch_fs *c = inode->v.i_sb->s_fs_info; struct i_sectors_hook i_sectors_hook; struct btree_iter iter; struct bpos end; @@ -2248,33 +2239,33 @@ static long bch2_fallocate(struct inode *inode, int mode, bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_INTENT); - inode_lock(inode); - inode_dio_wait(inode); + inode_lock(&inode->v); + inode_dio_wait(&inode->v); pagecache_block_get(&mapping->add_lock); if (!(mode & FALLOC_FL_KEEP_SIZE) && - new_size > inode->i_size) { - ret = inode_newsize_ok(inode, new_size); + new_size > inode->v.i_size) { + ret = inode_newsize_ok(&inode->v, new_size); if (ret) goto err; } if (mode & FALLOC_FL_ZERO_RANGE) { - ret = __bch2_truncate_page(inode->i_mapping, - offset >> PAGE_SHIFT, - offset, offset + len); + ret = __bch2_truncate_page(mapping, + offset >> PAGE_SHIFT, + offset, offset + len); if (!ret && offset >> PAGE_SHIFT != (offset + len) >> PAGE_SHIFT) - ret = __bch2_truncate_page(inode->i_mapping, - (offset + len) >> PAGE_SHIFT, - offset, offset + len); + ret = __bch2_truncate_page(mapping, + (offset + len) >> PAGE_SHIFT, + offset, offset + len); if (unlikely(ret)) goto err; - truncate_pagecache_range(inode, offset, offset + len - 1); + truncate_pagecache_range(&inode->v, offset, offset + len - 1); block_start = round_up(offset, PAGE_SIZE); block_end = round_down(offset + len, PAGE_SIZE); @@ -2283,10 +2274,10 @@ static long bch2_fallocate(struct inode *inode, int mode, block_end = round_up(offset + len, PAGE_SIZE); } - bch2_btree_iter_set_pos(&iter, POS(inode->i_ino, block_start >> 9)); - end = POS(inode->i_ino, block_end >> 9); + bch2_btree_iter_set_pos(&iter, POS(inode->v.i_ino, block_start >> 9)); + end = POS(inode->v.i_ino, block_end >> 9); - ret = i_sectors_dirty_get(ei, &i_sectors_hook); + ret = i_sectors_dirty_get(c, inode, &i_sectors_hook); if (unlikely(ret)) goto err; @@ -2335,7 +2326,7 @@ static long bch2_fallocate(struct inode *inode, int mode, } ret = bch2_btree_insert_at(c, &disk_res, &i_sectors_hook.hook, - &ei->journal_seq, + &inode->ei_journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL, BTREE_INSERT_ENTRY(&iter, &reservation.k_i)); @@ -2347,50 +2338,51 @@ btree_iter_err: } bch2_btree_iter_unlock(&iter); - i_sectors_dirty_put(ei, &i_sectors_hook); + i_sectors_dirty_put(c, inode, &i_sectors_hook); if (!(mode & FALLOC_FL_KEEP_SIZE) && - new_size > inode->i_size) { - i_size_write(inode, new_size); + new_size > inode->v.i_size) { + i_size_write(&inode->v, new_size); - mutex_lock(&ei->update_lock); - ret = bch2_write_inode_size(c, ei, inode->i_size); - mutex_unlock(&ei->update_lock); + mutex_lock(&inode->ei_update_lock); + ret = bch2_write_inode_size(c, inode, inode->v.i_size); + mutex_unlock(&inode->ei_update_lock); } /* blech */ if ((mode & FALLOC_FL_KEEP_SIZE) && (mode & FALLOC_FL_ZERO_RANGE) && - ei->i_size != inode->i_size) { + inode->ei_size != inode->v.i_size) { /* sync appends.. */ - ret = filemap_write_and_wait_range(mapping, ei->i_size, S64_MAX); + ret = filemap_write_and_wait_range(mapping, + inode->ei_size, S64_MAX); if (ret) goto err; - if (ei->i_size != inode->i_size) { - mutex_lock(&ei->update_lock); - ret = bch2_write_inode_size(c, ei, inode->i_size); - mutex_unlock(&ei->update_lock); + if (inode->ei_size != inode->v.i_size) { + mutex_lock(&inode->ei_update_lock); + ret = bch2_write_inode_size(c, inode, inode->v.i_size); + mutex_unlock(&inode->ei_update_lock); } } pagecache_block_put(&mapping->add_lock); - inode_unlock(inode); + inode_unlock(&inode->v); return 0; err_put_sectors_dirty: - i_sectors_dirty_put(ei, &i_sectors_hook); + i_sectors_dirty_put(c, inode, &i_sectors_hook); err: bch2_btree_iter_unlock(&iter); pagecache_block_put(&mapping->add_lock); - inode_unlock(inode); + inode_unlock(&inode->v); return ret; } long bch2_fallocate_dispatch(struct file *file, int mode, loff_t offset, loff_t len) { - struct inode *inode = file_inode(file); + struct bch_inode_info *inode = file_bch_inode(file); if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE))) return bch2_fallocate(inode, mode, offset, len); @@ -2412,11 +2404,11 @@ static bool page_is_data(struct page *page) page_state(page)->dirty_sectors); } -static loff_t bch2_next_pagecache_data(struct inode *inode, - loff_t start_offset, - loff_t end_offset) +static loff_t bch2_next_pagecache_data(struct inode *vinode, + loff_t start_offset, + loff_t end_offset) { - struct address_space *mapping = inode->i_mapping; + struct address_space *mapping = vinode->i_mapping; struct page *page; pgoff_t index; @@ -2444,20 +2436,20 @@ static loff_t bch2_next_pagecache_data(struct inode *inode, static loff_t bch2_seek_data(struct file *file, u64 offset) { - struct inode *inode = file->f_mapping->host; - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = file_bch_inode(file); + struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_iter iter; struct bkey_s_c k; u64 isize, next_data = MAX_LFS_FILESIZE; int ret; - isize = i_size_read(inode); + isize = i_size_read(&inode->v); if (offset >= isize) return -ENXIO; for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, - POS(inode->i_ino, offset >> 9), 0, k) { - if (k.k->p.inode != inode->i_ino) { + POS(inode->v.i_ino, offset >> 9), 0, k) { + if (k.k->p.inode != inode->v.i_ino) { break; } else if (bkey_extent_is_data(k.k)) { next_data = max(offset, bkey_start_offset(k.k) << 9); @@ -2471,7 +2463,8 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) return ret; if (next_data > offset) - next_data = bch2_next_pagecache_data(inode, offset, next_data); + next_data = bch2_next_pagecache_data(&inode->v, + offset, next_data); if (next_data > isize) return -ENXIO; @@ -2494,11 +2487,11 @@ static bool page_slot_is_data(struct address_space *mapping, pgoff_t index) return ret; } -static loff_t bch2_next_pagecache_hole(struct inode *inode, - loff_t start_offset, - loff_t end_offset) +static loff_t bch2_next_pagecache_hole(struct inode *vinode, + loff_t start_offset, + loff_t end_offset) { - struct address_space *mapping = inode->i_mapping; + struct address_space *mapping = vinode->i_mapping; pgoff_t index; for (index = start_offset >> PAGE_SHIFT; @@ -2513,26 +2506,26 @@ static loff_t bch2_next_pagecache_hole(struct inode *inode, static loff_t bch2_seek_hole(struct file *file, u64 offset) { - struct inode *inode = file->f_mapping->host; - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = file_bch_inode(file); + struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_iter iter; struct bkey_s_c k; u64 isize, next_hole = MAX_LFS_FILESIZE; int ret; - isize = i_size_read(inode); + isize = i_size_read(&inode->v); if (offset >= isize) return -ENXIO; for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, - POS(inode->i_ino, offset >> 9), + POS(inode->v.i_ino, offset >> 9), BTREE_ITER_WITH_HOLES, k) { - if (k.k->p.inode != inode->i_ino) { - next_hole = bch2_next_pagecache_hole(inode, + if (k.k->p.inode != inode->v.i_ino) { + next_hole = bch2_next_pagecache_hole(&inode->v, offset, MAX_LFS_FILESIZE); break; } else if (!bkey_extent_is_data(k.k)) { - next_hole = bch2_next_pagecache_hole(inode, + next_hole = bch2_next_pagecache_hole(&inode->v, max(offset, bkey_start_offset(k.k) << 9), k.k->p.offset << 9); diff --git a/libbcachefs/fs-io.h b/libbcachefs/fs-io.h index 9fdcb6b6..dfdc9b52 100644 --- a/libbcachefs/fs-io.h +++ b/libbcachefs/fs-io.h @@ -24,7 +24,7 @@ ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *); int bch2_fsync(struct file *, loff_t, loff_t, int); -int bch2_truncate(struct inode *, struct iattr *); +int bch2_truncate(struct bch_inode_info *, struct iattr *); long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t); loff_t bch2_llseek(struct file *, loff_t, int); @@ -38,55 +38,55 @@ int bch2_migrate_page(struct address_space *, struct page *, struct i_sectors_hook { struct extent_insert_hook hook; s64 sectors; - struct bch_inode_info *ei; + struct bch_inode_info *inode; }; struct bchfs_write_op { - struct bch_inode_info *ei; - s64 sectors_added; - bool is_dio; - u64 new_i_size; + struct bch_inode_info *inode; + s64 sectors_added; + bool is_dio; + u64 new_i_size; /* must be last: */ - struct bch_write_op op; + struct bch_write_op op; }; struct bch_writepage_io { - struct closure cl; + struct closure cl; /* must be last: */ - struct bchfs_write_op op; + struct bchfs_write_op op; }; extern struct bio_set *bch2_writepage_bioset; struct dio_write { - struct closure cl; - struct kiocb *req; - struct bch_fs *c; - long written; - long error; - loff_t offset; + struct closure cl; + struct kiocb *req; + struct bch_fs *c; + long written; + long error; + loff_t offset; - struct disk_reservation res; + struct disk_reservation res; - struct iovec *iovec; - struct iovec inline_vecs[UIO_FASTIOV]; - struct iov_iter iter; + struct iovec *iovec; + struct iovec inline_vecs[UIO_FASTIOV]; + struct iov_iter iter; - struct mm_struct *mm; + struct mm_struct *mm; /* must be last: */ - struct bchfs_write_op iop; + struct bchfs_write_op iop; }; extern struct bio_set *bch2_dio_write_bioset; struct dio_read { - struct closure cl; - struct kiocb *req; - long ret; - struct bch_read_bio rbio; + struct closure cl; + struct kiocb *req; + long ret; + struct bch_read_bio rbio; }; extern struct bio_set *bch2_dio_read_bioset; diff --git a/libbcachefs/fs-ioctl.c b/libbcachefs/fs-ioctl.c new file mode 100644 index 00000000..bd915fec --- /dev/null +++ b/libbcachefs/fs-ioctl.c @@ -0,0 +1,254 @@ +#ifndef NO_BCACHEFS_FS + +#include "bcachefs.h" +#include "chardev.h" +#include "fs.h" +#include "fs-ioctl.h" + +#include +#include + +#define FS_IOC_GOINGDOWN _IOR('X', 125, __u32) + +/* Inode flags: */ + +/* bcachefs inode flags -> vfs inode flags: */ +static const unsigned bch_flags_to_vfs[] = { + [__BCH_INODE_SYNC] = S_SYNC, + [__BCH_INODE_IMMUTABLE] = S_IMMUTABLE, + [__BCH_INODE_APPEND] = S_APPEND, + [__BCH_INODE_NOATIME] = S_NOATIME, +}; + +/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ +static const unsigned bch_flags_to_uflags[] = { + [__BCH_INODE_SYNC] = FS_SYNC_FL, + [__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL, + [__BCH_INODE_APPEND] = FS_APPEND_FL, + [__BCH_INODE_NODUMP] = FS_NODUMP_FL, + [__BCH_INODE_NOATIME] = FS_NOATIME_FL, +}; + +/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ +static const unsigned bch_flags_to_xflags[] = { + [__BCH_INODE_SYNC] = FS_XFLAG_SYNC, + [__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE, + [__BCH_INODE_APPEND] = FS_XFLAG_APPEND, + [__BCH_INODE_NODUMP] = FS_XFLAG_NODUMP, + [__BCH_INODE_NOATIME] = FS_XFLAG_NOATIME, + //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT; +}; + +#define map_flags(_map, _in) \ +({ \ + unsigned _i, _out = 0; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & (1 << _i)) \ + (_out) |= _map[_i]; \ + (_out); \ +}) + +#define map_flags_rev(_map, _in) \ +({ \ + unsigned _i, _out = 0; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & _map[_i]) { \ + (_out) |= 1 << _i; \ + (_in) &= ~_map[_i]; \ + } \ + (_out); \ +}) + +#define set_flags(_map, _in, _out) \ +do { \ + unsigned _i; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & (1 << _i)) \ + (_out) |= _map[_i]; \ + else \ + (_out) &= ~_map[_i]; \ +} while (0) + +/* Set VFS inode flags from bcachefs inode: */ +void bch2_inode_flags_to_vfs(struct bch_inode_info *inode) +{ + set_flags(bch_flags_to_vfs, inode->ei_flags, inode->v.i_flags); +} + +static int bch2_inode_flags_set(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + /* + * We're relying on btree locking here for exclusion with other ioctl + * calls - use the flags in the btree (@bi), not inode->i_flags: + */ + unsigned newflags = *((unsigned *) p); + unsigned oldflags = bi->bi_flags; + + if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) && + !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + if (!S_ISREG(inode->v.i_mode) && + !S_ISDIR(inode->v.i_mode) && + (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags) + return -EINVAL; + + bi->bi_flags = newflags; + inode->v.i_ctime = current_fs_time(inode->v.i_sb); + return 0; +} + +static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg) +{ + unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_flags); + + return put_user(flags, arg); +} + +static int bch2_ioc_setflags(struct bch_fs *c, + struct file *file, + struct bch_inode_info *inode, + void __user *arg) +{ + unsigned flags, uflags; + int ret; + + if (get_user(uflags, (int __user *) arg)) + return -EFAULT; + + flags = map_flags_rev(bch_flags_to_uflags, uflags); + if (uflags) + return -EOPNOTSUPP; + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + inode_lock(&inode->v); + if (!inode_owner_or_capable(&inode->v)) { + ret = -EACCES; + goto setflags_out; + } + + mutex_lock(&inode->ei_update_lock); + ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &flags); + + if (!ret) + bch2_inode_flags_to_vfs(inode); + mutex_unlock(&inode->ei_update_lock); + +setflags_out: + inode_unlock(&inode->v); + mnt_drop_write_file(file); + return ret; +} + +static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode, + struct fsxattr __user *arg) +{ + struct fsxattr fa = { 0 }; + + fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_flags); + + return copy_to_user(arg, &fa, sizeof(fa)); +} + +static int bch2_ioc_fssetxattr(struct bch_fs *c, + struct file *file, + struct bch_inode_info *inode, + struct fsxattr __user *arg) +{ + struct fsxattr fa; + unsigned flags; + int ret; + + if (copy_from_user(&fa, arg, sizeof(fa))) + return -EFAULT; + + flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags); + if (fa.fsx_xflags) + return -EOPNOTSUPP; + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + inode_lock(&inode->v); + if (!inode_owner_or_capable(&inode->v)) { + ret = -EACCES; + goto err; + } + + mutex_lock(&inode->ei_update_lock); + ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &flags); + if (!ret) + bch2_inode_flags_to_vfs(inode); + mutex_unlock(&inode->ei_update_lock); +err: + inode_unlock(&inode->v); + mnt_drop_write_file(file); + return ret; +} + +long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) +{ + struct bch_inode_info *inode = file_bch_inode(file); + struct super_block *sb = inode->v.i_sb; + struct bch_fs *c = sb->s_fs_info; + + switch (cmd) { + case FS_IOC_GETFLAGS: + return bch2_ioc_getflags(inode, (int __user *) arg); + + case FS_IOC_SETFLAGS: + return bch2_ioc_setflags(c, file, inode, (int __user *) arg); + + case FS_IOC_FSGETXATTR: + return bch2_ioc_fsgetxattr(inode, (void __user *) arg); + case FS_IOC_FSSETXATTR: + return bch2_ioc_fssetxattr(c, file, inode, (void __user *) arg); + + case FS_IOC_GETVERSION: + return -ENOTTY; + case FS_IOC_SETVERSION: + return -ENOTTY; + + case FS_IOC_GOINGDOWN: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + down_write(&sb->s_umount); + sb->s_flags |= MS_RDONLY; + bch2_fs_emergency_read_only(c); + up_write(&sb->s_umount); + return 0; + + default: + return bch2_fs_ioctl(c, cmd, (void __user *) arg); + } +} + +#ifdef CONFIG_COMPAT +long bch2_compat_fs_ioctl(struct file *file, unsigned cmd, unsigned long arg) +{ + /* These are just misnamed, they actually get/put from/to user an int */ + switch (cmd) { + case FS_IOC_GETFLAGS: + cmd = FS_IOC_GETFLAGS; + break; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; + break; + default: + return -ENOIOCTLCMD; + } + return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); +} +#endif + +#endif /* NO_BCACHEFS_FS */ diff --git a/libbcachefs/fs-ioctl.h b/libbcachefs/fs-ioctl.h new file mode 100644 index 00000000..c14e583d --- /dev/null +++ b/libbcachefs/fs-ioctl.h @@ -0,0 +1,9 @@ +#ifndef _BCACHEFS_FS_IOCTL_H +#define _BCACHEFS_FS_IOCTL_H + +void bch2_inode_flags_to_vfs(struct bch_inode_info *); + +long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long); +long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long); + +#endif /* _BCACHEFS_FS_IOCTL_H */ diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 76829f49..081ae140 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -9,6 +9,7 @@ #include "extents.h" #include "fs.h" #include "fs-io.h" +#include "fs-ioctl.h" #include "fsck.h" #include "inode.h" #include "journal.h" @@ -18,10 +19,8 @@ #include #include -#include #include #include -#include #include #include #include @@ -62,16 +61,15 @@ static void bch2_vfs_inode_init(struct bch_fs *, */ int __must_check __bch2_write_inode(struct bch_fs *c, - struct bch_inode_info *ei, + struct bch_inode_info *inode, inode_set_fn set, void *p) { struct btree_iter iter; - struct inode *inode = &ei->vfs_inode; struct bch_inode_unpacked inode_u; struct bkey_inode_buf inode_p; - u64 inum = inode->i_ino; - unsigned i_nlink = READ_ONCE(inode->i_nlink); + u64 inum = inode->v.i_ino; + unsigned i_nlink = READ_ONCE(inode->v.i_nlink); int ret; /* @@ -82,7 +80,7 @@ int __must_check __bch2_write_inode(struct bch_fs *c, if (!i_nlink) return 0; - lockdep_assert_held(&ei->update_lock); + lockdep_assert_held(&inode->ei_update_lock); bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inum, 0), BTREE_ITER_INTENT); @@ -107,33 +105,34 @@ int __must_check __bch2_write_inode(struct bch_fs *c, } if (set) { - ret = set(ei, &inode_u, p); + ret = set(inode, &inode_u, p); if (ret) goto out; } - BUG_ON(i_nlink < nlink_bias(inode->i_mode)); + BUG_ON(i_nlink < nlink_bias(inode->v.i_mode)); - inode_u.i_mode = inode->i_mode; - inode_u.i_uid = i_uid_read(inode); - inode_u.i_gid = i_gid_read(inode); - inode_u.i_nlink = i_nlink - nlink_bias(inode->i_mode); - inode_u.i_dev = inode->i_rdev; - inode_u.i_atime = timespec_to_bch2_time(c, inode->i_atime); - inode_u.i_mtime = timespec_to_bch2_time(c, inode->i_mtime); - inode_u.i_ctime = timespec_to_bch2_time(c, inode->i_ctime); + inode_u.bi_mode = inode->v.i_mode; + inode_u.bi_uid = i_uid_read(&inode->v); + inode_u.bi_gid = i_gid_read(&inode->v); + inode_u.bi_nlink= i_nlink - nlink_bias(inode->v.i_mode); + inode_u.bi_dev = inode->v.i_rdev; + inode_u.bi_atime= timespec_to_bch2_time(c, inode->v.i_atime); + inode_u.bi_mtime= timespec_to_bch2_time(c, inode->v.i_mtime); + inode_u.bi_ctime= timespec_to_bch2_time(c, inode->v.i_ctime); bch2_inode_pack(&inode_p, &inode_u); - ret = bch2_btree_insert_at(c, NULL, NULL, &ei->journal_seq, + ret = bch2_btree_insert_at(c, NULL, NULL, + &inode->ei_journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL, BTREE_INSERT_ENTRY(&iter, &inode_p.inode.k_i)); } while (ret == -EINTR); if (!ret) { - ei->i_size = inode_u.i_size; - ei->i_flags = inode_u.i_flags; + inode->ei_size = inode_u.bi_size; + inode->ei_flags = inode_u.bi_flags; } out: bch2_btree_iter_unlock(&iter); @@ -142,484 +141,475 @@ out: } int __must_check bch2_write_inode(struct bch_fs *c, - struct bch_inode_info *ei) + struct bch_inode_info *inode) { - return __bch2_write_inode(c, ei, NULL, NULL); + return __bch2_write_inode(c, inode, NULL, NULL); } -int bch2_inc_nlink(struct bch_fs *c, struct bch_inode_info *ei) +int bch2_inc_nlink(struct bch_fs *c, struct bch_inode_info *inode) { int ret; - mutex_lock(&ei->update_lock); - inc_nlink(&ei->vfs_inode); - ret = bch2_write_inode(c, ei); - mutex_unlock(&ei->update_lock); + mutex_lock(&inode->ei_update_lock); + inc_nlink(&inode->v); + ret = bch2_write_inode(c, inode); + mutex_unlock(&inode->ei_update_lock); return ret; } -int bch2_dec_nlink(struct bch_fs *c, struct bch_inode_info *ei) +int bch2_dec_nlink(struct bch_fs *c, struct bch_inode_info *inode) { int ret = 0; - mutex_lock(&ei->update_lock); - drop_nlink(&ei->vfs_inode); - ret = bch2_write_inode(c, ei); - mutex_unlock(&ei->update_lock); + mutex_lock(&inode->ei_update_lock); + drop_nlink(&inode->v); + ret = bch2_write_inode(c, inode); + mutex_unlock(&inode->ei_update_lock); return ret; } -static struct inode *bch2_vfs_inode_get(struct super_block *sb, u64 inum) +static struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) { - struct bch_fs *c = sb->s_fs_info; - struct inode *inode; struct bch_inode_unpacked inode_u; - struct bch_inode_info *ei; + struct bch_inode_info *inode; int ret; - pr_debug("inum %llu", inum); - - inode = iget_locked(sb, inum); + inode = to_bch_ei(iget_locked(c->vfs_sb, inum)); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) - return inode; + if (!(inode->v.i_state & I_NEW)) + return &inode->v; ret = bch2_inode_find_by_inum(c, inum, &inode_u); if (ret) { - iget_failed(inode); + iget_failed(&inode->v); return ERR_PTR(ret); } - ei = to_bch_ei(inode); - bch2_vfs_inode_init(c, ei, &inode_u); + bch2_vfs_inode_init(c, inode, &inode_u); - ei->journal_seq = bch2_inode_journal_seq(&c->journal, inum); + inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum); - unlock_new_inode(inode); + unlock_new_inode(&inode->v); - return inode; + return &inode->v; } -static struct inode *bch2_vfs_inode_create(struct bch_fs *c, - struct inode *parent, - umode_t mode, dev_t rdev) +static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c, + struct bch_inode_info *dir, + umode_t mode, dev_t rdev) { - struct inode *inode; struct posix_acl *default_acl = NULL, *acl = NULL; - struct bch_inode_info *ei; + struct bch_inode_info *inode; struct bch_inode_unpacked inode_u; int ret; - inode = new_inode(parent->i_sb); + inode = to_bch_ei(new_inode(c->vfs_sb)); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - inode_init_owner(inode, parent, mode); + inode_init_owner(&inode->v, &dir->v, mode); - ret = posix_acl_create(parent, &inode->i_mode, &default_acl, &acl); +#ifdef CONFIG_BCACHEFS_POSIX_ACL + ret = posix_acl_create(&dir->v, &inode->v.i_mode, &default_acl, &acl); if (ret) { - make_bad_inode(inode); + make_bad_inode(&inode->v); goto err; } +#endif - ei = to_bch_ei(inode); - - bch2_inode_init(c, &inode_u, i_uid_read(inode), - i_gid_read(inode), inode->i_mode, rdev); + bch2_inode_init(c, &inode_u, + i_uid_read(&inode->v), + i_gid_read(&inode->v), + inode->v.i_mode, rdev); ret = bch2_inode_create(c, &inode_u, - BLOCKDEV_INODE_MAX, 0, - &c->unused_inode_hint); + BLOCKDEV_INODE_MAX, 0, + &c->unused_inode_hint); if (unlikely(ret)) { /* * indicate to bch_evict_inode that the inode was never actually * created: */ - make_bad_inode(inode); + make_bad_inode(&inode->v); goto err; } - bch2_vfs_inode_init(c, ei, &inode_u); + bch2_vfs_inode_init(c, inode, &inode_u); if (default_acl) { - ret = bch2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + ret = bch2_set_acl(&inode->v, default_acl, ACL_TYPE_DEFAULT); if (unlikely(ret)) goto err; } if (acl) { - ret = bch2_set_acl(inode, acl, ACL_TYPE_ACCESS); + ret = bch2_set_acl(&inode->v, acl, ACL_TYPE_ACCESS); if (unlikely(ret)) goto err; } - insert_inode_hash(inode); + insert_inode_hash(&inode->v); atomic_long_inc(&c->nr_inodes); out: posix_acl_release(default_acl); posix_acl_release(acl); return inode; err: - clear_nlink(inode); - iput(inode); + clear_nlink(&inode->v); + iput(&inode->v); inode = ERR_PTR(ret); goto out; } -static int bch2_vfs_dirent_create(struct bch_fs *c, struct inode *dir, +static int bch2_vfs_dirent_create(struct bch_fs *c, + struct bch_inode_info *dir, u8 type, const struct qstr *name, - struct inode *dst) + u64 dst) { - struct bch_inode_info *dir_ei = to_bch_ei(dir); int ret; - ret = bch2_dirent_create(c, dir->i_ino, &dir_ei->str_hash, - type, name, dst->i_ino, - &dir_ei->journal_seq, + ret = bch2_dirent_create(c, dir->v.i_ino, &dir->ei_str_hash, + type, name, dst, + &dir->ei_journal_seq, BCH_HASH_SET_MUST_CREATE); if (unlikely(ret)) return ret; - dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); - mark_inode_dirty_sync(dir); + dir->v.i_mtime = dir->v.i_ctime = current_fs_time(c->vfs_sb); + mark_inode_dirty_sync(&dir->v); return 0; } -static int __bch2_create(struct inode *dir, struct dentry *dentry, +static int __bch2_create(struct bch_inode_info *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { - struct bch_inode_info *dir_ei = to_bch_ei(dir); - struct bch_fs *c = dir->i_sb->s_fs_info; - struct inode *inode; - struct bch_inode_info *ei; + struct bch_fs *c = dir->v.i_sb->s_fs_info; + struct bch_inode_info *inode; int ret; inode = bch2_vfs_inode_create(c, dir, mode, rdev); if (unlikely(IS_ERR(inode))) return PTR_ERR(inode); - ei = to_bch_ei(inode); - ret = bch2_vfs_dirent_create(c, dir, mode_to_type(mode), - &dentry->d_name, inode); + &dentry->d_name, inode->v.i_ino); if (unlikely(ret)) { - clear_nlink(inode); - iput(inode); + clear_nlink(&inode->v); + iput(&inode->v); return ret; } - if (dir_ei->journal_seq > ei->journal_seq) - ei->journal_seq = dir_ei->journal_seq; + if (dir->ei_journal_seq > inode->ei_journal_seq) + inode->ei_journal_seq = dir->ei_journal_seq; - d_instantiate(dentry, inode); + d_instantiate(dentry, &inode->v); return 0; } /* methods */ -static struct dentry *bch2_lookup(struct inode *dir, struct dentry *dentry, +static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, unsigned int flags) { - struct bch_fs *c = dir->i_sb->s_fs_info; - struct bch_inode_info *dir_ei = to_bch_ei(dir); - struct inode *inode = NULL; + struct bch_fs *c = vdir->i_sb->s_fs_info; + struct bch_inode_info *dir = to_bch_ei(vdir); + struct inode *vinode = NULL; u64 inum; - inum = bch2_dirent_lookup(c, dir->i_ino, - &dir_ei->str_hash, - &dentry->d_name); + inum = bch2_dirent_lookup(c, dir->v.i_ino, + &dir->ei_str_hash, + &dentry->d_name); if (inum) - inode = bch2_vfs_inode_get(dir->i_sb, inum); + vinode = bch2_vfs_inode_get(c, inum); - return d_splice_alias(inode, dentry); + return d_splice_alias(vinode, dentry); } -static int bch2_create(struct inode *dir, struct dentry *dentry, +static int bch2_create(struct inode *vdir, struct dentry *dentry, umode_t mode, bool excl) { - return __bch2_create(dir, dentry, mode|S_IFREG, 0); + return __bch2_create(to_bch_ei(vdir), dentry, mode|S_IFREG, 0); } -static int bch2_link(struct dentry *old_dentry, struct inode *dir, +static int bch2_link(struct dentry *old_dentry, struct inode *vdir, struct dentry *dentry) { - struct bch_fs *c = dir->i_sb->s_fs_info; - struct inode *inode = old_dentry->d_inode; - struct bch_inode_info *ei = to_bch_ei(inode); + struct bch_fs *c = vdir->i_sb->s_fs_info; + struct bch_inode_info *dir = to_bch_ei(vdir); + struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode); int ret; - lockdep_assert_held(&inode->i_rwsem); + lockdep_assert_held(&inode->v.i_rwsem); - inode->i_ctime = current_fs_time(dir->i_sb); + inode->v.i_ctime = current_fs_time(dir->v.i_sb); - ret = bch2_inc_nlink(c, ei); + ret = bch2_inc_nlink(c, inode); if (ret) return ret; - ihold(inode); + ihold(&inode->v); - ret = bch2_vfs_dirent_create(c, dir, mode_to_type(inode->i_mode), - &dentry->d_name, inode); + ret = bch2_vfs_dirent_create(c, dir, mode_to_type(inode->v.i_mode), + &dentry->d_name, inode->v.i_ino); if (unlikely(ret)) { - bch2_dec_nlink(c, ei); - iput(inode); + bch2_dec_nlink(c, inode); + iput(&inode->v); return ret; } - d_instantiate(dentry, inode); + d_instantiate(dentry, &inode->v); return 0; } -static int bch2_unlink(struct inode *dir, struct dentry *dentry) +static int bch2_unlink(struct inode *vdir, struct dentry *dentry) { - struct bch_fs *c = dir->i_sb->s_fs_info; - struct bch_inode_info *dir_ei = to_bch_ei(dir); - struct inode *inode = dentry->d_inode; - struct bch_inode_info *ei = to_bch_ei(inode); + struct bch_fs *c = vdir->i_sb->s_fs_info; + struct bch_inode_info *dir = to_bch_ei(vdir); + struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); int ret; - lockdep_assert_held(&inode->i_rwsem); + lockdep_assert_held(&inode->v.i_rwsem); - ret = bch2_dirent_delete(c, dir->i_ino, &dir_ei->str_hash, - &dentry->d_name, &dir_ei->journal_seq); + ret = bch2_dirent_delete(c, dir->v.i_ino, &dir->ei_str_hash, + &dentry->d_name, &dir->ei_journal_seq); if (ret) return ret; - if (dir_ei->journal_seq > ei->journal_seq) - ei->journal_seq = dir_ei->journal_seq; + if (dir->ei_journal_seq > inode->ei_journal_seq) + inode->ei_journal_seq = dir->ei_journal_seq; - inode->i_ctime = dir->i_ctime; + inode->v.i_ctime = dir->v.i_ctime; - if (S_ISDIR(inode->i_mode)) { - bch2_dec_nlink(c, dir_ei); - drop_nlink(inode); + if (S_ISDIR(inode->v.i_mode)) { + bch2_dec_nlink(c, dir); + drop_nlink(&inode->v); } - bch2_dec_nlink(c, ei); + bch2_dec_nlink(c, inode); return 0; } -static int bch2_symlink(struct inode *dir, struct dentry *dentry, +static int bch2_symlink(struct inode *vdir, struct dentry *dentry, const char *symname) { - struct bch_fs *c = dir->i_sb->s_fs_info; - struct inode *inode; - struct bch_inode_info *ei, *dir_ei = to_bch_ei(dir); + struct bch_fs *c = vdir->i_sb->s_fs_info; + struct bch_inode_info *dir = to_bch_ei(vdir), *inode; int ret; inode = bch2_vfs_inode_create(c, dir, S_IFLNK|S_IRWXUGO, 0); if (unlikely(IS_ERR(inode))) return PTR_ERR(inode); - ei = to_bch_ei(inode); - - inode_lock(inode); - ret = page_symlink(inode, symname, strlen(symname) + 1); - inode_unlock(inode); + inode_lock(&inode->v); + ret = page_symlink(&inode->v, symname, strlen(symname) + 1); + inode_unlock(&inode->v); if (unlikely(ret)) goto err; - ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); + ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX); if (unlikely(ret)) goto err; /* XXX: racy */ - if (dir_ei->journal_seq < ei->journal_seq) - dir_ei->journal_seq = ei->journal_seq; + if (dir->ei_journal_seq < inode->ei_journal_seq) + dir->ei_journal_seq = inode->ei_journal_seq; - ret = bch2_vfs_dirent_create(c, dir, DT_LNK, &dentry->d_name, inode); + ret = bch2_vfs_dirent_create(c, dir, DT_LNK, &dentry->d_name, + inode->v.i_ino); if (unlikely(ret)) goto err; - d_instantiate(dentry, inode); + d_instantiate(dentry, &inode->v); return 0; err: - clear_nlink(inode); - iput(inode); + clear_nlink(&inode->v); + iput(&inode->v); return ret; } -static int bch2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +static int bch2_mkdir(struct inode *vdir, struct dentry *dentry, umode_t mode) { - struct bch_fs *c = dir->i_sb->s_fs_info; + struct bch_fs *c = vdir->i_sb->s_fs_info; + struct bch_inode_info *dir = to_bch_ei(vdir); int ret; - lockdep_assert_held(&dir->i_rwsem); + lockdep_assert_held(&dir->v.i_rwsem); ret = __bch2_create(dir, dentry, mode|S_IFDIR, 0); if (unlikely(ret)) return ret; - bch2_inc_nlink(c, to_bch_ei(dir)); + bch2_inc_nlink(c, dir); return 0; } -static int bch2_rmdir(struct inode *dir, struct dentry *dentry) +static int bch2_rmdir(struct inode *vdir, struct dentry *dentry) { - struct bch_fs *c = dir->i_sb->s_fs_info; - struct inode *inode = dentry->d_inode; + struct bch_fs *c = vdir->i_sb->s_fs_info; - if (bch2_empty_dir(c, inode->i_ino)) + if (bch2_empty_dir(c, dentry->d_inode->i_ino)) return -ENOTEMPTY; - return bch2_unlink(dir, dentry); + return bch2_unlink(vdir, dentry); } -static int bch2_mknod(struct inode *dir, struct dentry *dentry, +static int bch2_mknod(struct inode *vdir, struct dentry *dentry, umode_t mode, dev_t rdev) { - return __bch2_create(dir, dentry, mode, rdev); + return __bch2_create(to_bch_ei(vdir), dentry, mode, rdev); } -static int bch2_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +static int bch2_rename(struct bch_fs *c, + struct bch_inode_info *old_dir, + struct dentry *old_dentry, + struct bch_inode_info *new_dir, + struct dentry *new_dentry) { - struct bch_fs *c = old_dir->i_sb->s_fs_info; - struct inode *old_inode = old_dentry->d_inode; - struct bch_inode_info *ei = to_bch_ei(old_inode); - struct inode *new_inode = new_dentry->d_inode; - struct timespec now = current_fs_time(old_dir->i_sb); + struct bch_inode_info *old_inode = to_bch_ei(old_dentry->d_inode); + struct bch_inode_info *new_inode = to_bch_ei(new_dentry->d_inode); + struct timespec now = current_fs_time(old_dir->v.i_sb); int ret; - lockdep_assert_held(&old_dir->i_rwsem); - lockdep_assert_held(&new_dir->i_rwsem); + lockdep_assert_held(&old_dir->v.i_rwsem); + lockdep_assert_held(&new_dir->v.i_rwsem); if (new_inode) - filemap_write_and_wait_range(old_inode->i_mapping, + filemap_write_and_wait_range(old_inode->v.i_mapping, 0, LLONG_MAX); - if (new_inode && S_ISDIR(old_inode->i_mode)) { - lockdep_assert_held(&new_inode->i_rwsem); + if (new_inode && S_ISDIR(old_inode->v.i_mode)) { + lockdep_assert_held(&new_inode->v.i_rwsem); - if (!S_ISDIR(new_inode->i_mode)) + if (!S_ISDIR(new_inode->v.i_mode)) return -ENOTDIR; - if (bch2_empty_dir(c, new_inode->i_ino)) + if (bch2_empty_dir(c, new_inode->v.i_ino)) return -ENOTEMPTY; ret = bch2_dirent_rename(c, - old_dir, &old_dentry->d_name, - new_dir, &new_dentry->d_name, - &ei->journal_seq, BCH_RENAME_OVERWRITE); + old_dir, &old_dentry->d_name, + new_dir, &new_dentry->d_name, + &old_inode->ei_journal_seq, BCH_RENAME_OVERWRITE); if (unlikely(ret)) return ret; - clear_nlink(new_inode); - bch2_dec_nlink(c, to_bch_ei(old_dir)); + clear_nlink(&new_inode->v); + bch2_dec_nlink(c, old_dir); } else if (new_inode) { - lockdep_assert_held(&new_inode->i_rwsem); + lockdep_assert_held(&new_inode->v.i_rwsem); ret = bch2_dirent_rename(c, - old_dir, &old_dentry->d_name, - new_dir, &new_dentry->d_name, - &ei->journal_seq, BCH_RENAME_OVERWRITE); + old_dir, &old_dentry->d_name, + new_dir, &new_dentry->d_name, + &old_inode->ei_journal_seq, BCH_RENAME_OVERWRITE); if (unlikely(ret)) return ret; - new_inode->i_ctime = now; - bch2_dec_nlink(c, to_bch_ei(new_inode)); - } else if (S_ISDIR(old_inode->i_mode)) { + new_inode->v.i_ctime = now; + bch2_dec_nlink(c, new_inode); + } else if (S_ISDIR(old_inode->v.i_mode)) { ret = bch2_dirent_rename(c, - old_dir, &old_dentry->d_name, - new_dir, &new_dentry->d_name, - &ei->journal_seq, BCH_RENAME); + old_dir, &old_dentry->d_name, + new_dir, &new_dentry->d_name, + &old_inode->ei_journal_seq, BCH_RENAME); if (unlikely(ret)) return ret; - bch2_inc_nlink(c, to_bch_ei(new_dir)); - bch2_dec_nlink(c, to_bch_ei(old_dir)); + bch2_inc_nlink(c, new_dir); + bch2_dec_nlink(c, old_dir); } else { ret = bch2_dirent_rename(c, - old_dir, &old_dentry->d_name, - new_dir, &new_dentry->d_name, - &ei->journal_seq, BCH_RENAME); + old_dir, &old_dentry->d_name, + new_dir, &new_dentry->d_name, + &old_inode->ei_journal_seq, BCH_RENAME); if (unlikely(ret)) return ret; } - old_dir->i_ctime = old_dir->i_mtime = now; - new_dir->i_ctime = new_dir->i_mtime = now; - mark_inode_dirty_sync(old_dir); - mark_inode_dirty_sync(new_dir); + old_dir->v.i_ctime = old_dir->v.i_mtime = now; + new_dir->v.i_ctime = new_dir->v.i_mtime = now; + mark_inode_dirty_sync(&old_dir->v); + mark_inode_dirty_sync(&new_dir->v); - old_inode->i_ctime = now; - mark_inode_dirty_sync(old_inode); + old_inode->v.i_ctime = now; + mark_inode_dirty_sync(&old_inode->v); return 0; } -static int bch2_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +static int bch2_rename_exchange(struct bch_fs *c, + struct bch_inode_info *old_dir, + struct dentry *old_dentry, + struct bch_inode_info *new_dir, + struct dentry *new_dentry) { - struct bch_fs *c = old_dir->i_sb->s_fs_info; - struct inode *old_inode = old_dentry->d_inode; - struct inode *new_inode = new_dentry->d_inode; - struct bch_inode_info *ei = to_bch_ei(old_inode); - struct timespec now = current_fs_time(old_dir->i_sb); + struct bch_inode_info *old_inode = to_bch_ei(old_dentry->d_inode); + struct bch_inode_info *new_inode = to_bch_ei(new_dentry->d_inode); + struct timespec now = current_fs_time(old_dir->v.i_sb); int ret; ret = bch2_dirent_rename(c, - old_dir, &old_dentry->d_name, - new_dir, &new_dentry->d_name, - &ei->journal_seq, BCH_RENAME_EXCHANGE); + old_dir, &old_dentry->d_name, + new_dir, &new_dentry->d_name, + &old_inode->ei_journal_seq, BCH_RENAME_EXCHANGE); if (unlikely(ret)) return ret; - if (S_ISDIR(old_inode->i_mode) != - S_ISDIR(new_inode->i_mode)) { - if (S_ISDIR(old_inode->i_mode)) { - bch2_inc_nlink(c, to_bch_ei(new_dir)); - bch2_dec_nlink(c, to_bch_ei(old_dir)); + if (S_ISDIR(old_inode->v.i_mode) != + S_ISDIR(new_inode->v.i_mode)) { + if (S_ISDIR(old_inode->v.i_mode)) { + bch2_inc_nlink(c, new_dir); + bch2_dec_nlink(c, old_dir); } else { - bch2_dec_nlink(c, to_bch_ei(new_dir)); - bch2_inc_nlink(c, to_bch_ei(old_dir)); + bch2_dec_nlink(c, new_dir); + bch2_inc_nlink(c, old_dir); } } - old_dir->i_ctime = old_dir->i_mtime = now; - new_dir->i_ctime = new_dir->i_mtime = now; - mark_inode_dirty_sync(old_dir); - mark_inode_dirty_sync(new_dir); + old_dir->v.i_ctime = old_dir->v.i_mtime = now; + new_dir->v.i_ctime = new_dir->v.i_mtime = now; + mark_inode_dirty_sync(&old_dir->v); + mark_inode_dirty_sync(&new_dir->v); - old_inode->i_ctime = now; - new_inode->i_ctime = now; - mark_inode_dirty_sync(old_inode); - mark_inode_dirty_sync(new_inode); + old_inode->v.i_ctime = now; + new_inode->v.i_ctime = now; + mark_inode_dirty_sync(&old_inode->v); + mark_inode_dirty_sync(&new_inode->v); return 0; } -static int bch2_rename2(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, +static int bch2_rename2(struct inode *old_vdir, struct dentry *old_dentry, + struct inode *new_vdir, struct dentry *new_dentry, unsigned flags) { + struct bch_fs *c = old_vdir->i_sb->s_fs_info; + struct bch_inode_info *old_dir = to_bch_ei(old_vdir); + struct bch_inode_info *new_dir = to_bch_ei(new_vdir); + if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE)) return -EINVAL; if (flags & RENAME_EXCHANGE) - return bch2_rename_exchange(old_dir, old_dentry, - new_dir, new_dentry); + return bch2_rename_exchange(c, old_dir, old_dentry, + new_dir, new_dentry); - return bch2_rename(old_dir, old_dentry, new_dir, new_dentry); + return bch2_rename(c, old_dir, old_dentry, new_dir, new_dentry); } static int bch2_setattr(struct dentry *dentry, struct iattr *iattr) { - struct inode *inode = dentry->d_inode; - struct bch_inode_info *ei = to_bch_ei(inode); - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + struct bch_fs *c = inode->v.i_sb->s_fs_info; int ret = 0; - lockdep_assert_held(&inode->i_rwsem); - - pr_debug("i_size was %llu update has %llu", - inode->i_size, iattr->ia_size); + lockdep_assert_held(&inode->v.i_rwsem); ret = setattr_prepare(dentry, iattr); if (ret) @@ -628,32 +618,33 @@ static int bch2_setattr(struct dentry *dentry, struct iattr *iattr) if (iattr->ia_valid & ATTR_SIZE) { ret = bch2_truncate(inode, iattr); } else { - mutex_lock(&ei->update_lock); - setattr_copy(inode, iattr); - ret = bch2_write_inode(c, ei); - mutex_unlock(&ei->update_lock); + mutex_lock(&inode->ei_update_lock); + setattr_copy(&inode->v, iattr); + ret = bch2_write_inode(c, inode); + mutex_unlock(&inode->ei_update_lock); } if (unlikely(ret)) return ret; if (iattr->ia_valid & ATTR_MODE) - ret = posix_acl_chmod(inode, inode->i_mode); + ret = posix_acl_chmod(&inode->v, inode->v.i_mode); return ret; } -static int bch2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode) { - struct bch_fs *c = dir->i_sb->s_fs_info; - struct inode *inode; + struct bch_fs *c = vdir->i_sb->s_fs_info; + struct bch_inode_info *dir = to_bch_ei(vdir); + struct bch_inode_info *inode; /* XXX: i_nlink should be 0? */ inode = bch2_vfs_inode_create(c, dir, mode, 0); if (unlikely(IS_ERR(inode))) return PTR_ERR(inode); - d_tmpfile(dentry, inode); + d_tmpfile(dentry, &inode->v); return 0; } @@ -700,10 +691,11 @@ static int bch2_fill_extent(struct fiemap_extent_info *info, } } -static int bch2_fiemap(struct inode *inode, struct fiemap_extent_info *info, +static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, u64 start, u64 len) { - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_fs *c = vinode->i_sb->s_fs_info; + struct bch_inode_info *ei = to_bch_ei(vinode); struct btree_iter iter; struct bkey_s_c k; BKEY_PADDED(k) tmp; @@ -714,11 +706,11 @@ static int bch2_fiemap(struct inode *inode, struct fiemap_extent_info *info, return -EINVAL; for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, - POS(inode->i_ino, start >> 9), 0, k) + POS(ei->v.i_ino, start >> 9), 0, k) if (bkey_extent_is_data(k.k) || k.k->type == BCH_RESERVATION) { if (bkey_cmp(bkey_start_pos(k.k), - POS(inode->i_ino, (start + len) >> 9)) >= 0) + POS(ei->v.i_ino, (start + len) >> 9)) >= 0) break; if (have_extent) { @@ -752,175 +744,6 @@ static int bch2_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -/* Inode flags: */ - -static const unsigned bch_inode_flags_to_vfs_flags_map[] = { - [__BCH_INODE_SYNC] = S_SYNC, - [__BCH_INODE_IMMUTABLE] = S_IMMUTABLE, - [__BCH_INODE_APPEND] = S_APPEND, - [__BCH_INODE_NOATIME] = S_NOATIME, -}; - -static const unsigned bch_inode_flags_to_user_flags_map[] = { - [__BCH_INODE_SYNC] = FS_SYNC_FL, - [__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL, - [__BCH_INODE_APPEND] = FS_APPEND_FL, - [__BCH_INODE_NODUMP] = FS_NODUMP_FL, - [__BCH_INODE_NOATIME] = FS_NOATIME_FL, -}; - -/* Set VFS inode flags from bcachefs inode: */ -static void bch2_inode_flags_to_vfs(struct inode *inode) -{ - unsigned i, flags = to_bch_ei(inode)->i_flags; - - for (i = 0; i < ARRAY_SIZE(bch_inode_flags_to_vfs_flags_map); i++) - if (flags & (1 << i)) - inode->i_flags |= bch_inode_flags_to_vfs_flags_map[i]; - else - inode->i_flags &= ~bch_inode_flags_to_vfs_flags_map[i]; -} - -/* Get FS_IOC_GETFLAGS flags from bcachefs inode: */ -static unsigned bch2_inode_flags_to_user_flags(unsigned flags) -{ - unsigned i, ret = 0; - - for (i = 0; i < ARRAY_SIZE(bch_inode_flags_to_user_flags_map); i++) - if (flags & (1 << i)) - ret |= bch_inode_flags_to_user_flags_map[i]; - - return ret; -} - -static int bch2_inode_user_flags_set(struct bch_inode_info *ei, - struct bch_inode_unpacked *bi, - void *p) -{ - /* - * We're relying on btree locking here for exclusion with other ioctl - * calls - use the flags in the btree (@bi), not ei->i_flags: - */ - unsigned bch_flags = bi->i_flags; - unsigned oldflags = bch2_inode_flags_to_user_flags(bch_flags); - unsigned newflags = *((unsigned *) p); - unsigned i; - - if (((newflags ^ oldflags) & (FS_APPEND_FL|FS_IMMUTABLE_FL)) && - !capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - - for (i = 0; i < ARRAY_SIZE(bch_inode_flags_to_user_flags_map); i++) { - if (newflags & bch_inode_flags_to_user_flags_map[i]) - bch_flags |= (1 << i); - else - bch_flags &= ~(1 << i); - - newflags &= ~bch_inode_flags_to_user_flags_map[i]; - oldflags &= ~bch_inode_flags_to_user_flags_map[i]; - } - - if (oldflags != newflags) - return -EOPNOTSUPP; - - bi->i_flags = bch_flags; - ei->vfs_inode.i_ctime = current_fs_time(ei->vfs_inode.i_sb); - - return 0; -} - -#define FS_IOC_GOINGDOWN _IOR ('X', 125, __u32) - -static long bch2_fs_file_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - struct inode *inode = file_inode(filp); - struct super_block *sb = inode->i_sb; - struct bch_fs *c = sb->s_fs_info; - struct bch_inode_info *ei = to_bch_ei(inode); - unsigned flags; - int ret; - - switch (cmd) { - case FS_IOC_GETFLAGS: - return put_user(bch2_inode_flags_to_user_flags(ei->i_flags), - (int __user *) arg); - - case FS_IOC_SETFLAGS: { - ret = mnt_want_write_file(filp); - if (ret) - return ret; - - if (!inode_owner_or_capable(inode)) { - ret = -EACCES; - goto setflags_out; - } - - if (get_user(flags, (int __user *) arg)) { - ret = -EFAULT; - goto setflags_out; - } - - if (!S_ISREG(inode->i_mode) && - !S_ISDIR(inode->i_mode) && - (flags & (FS_NODUMP_FL|FS_NOATIME_FL)) != flags) { - ret = -EINVAL; - goto setflags_out; - } - - inode_lock(inode); - - mutex_lock(&ei->update_lock); - ret = __bch2_write_inode(c, ei, bch2_inode_user_flags_set, &flags); - mutex_unlock(&ei->update_lock); - - if (!ret) - bch2_inode_flags_to_vfs(inode); - - inode_unlock(inode); -setflags_out: - mnt_drop_write_file(filp); - return ret; - } - - case FS_IOC_GETVERSION: - return -ENOTTY; - case FS_IOC_SETVERSION: - return -ENOTTY; - - case FS_IOC_GOINGDOWN: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - down_write(&sb->s_umount); - sb->s_flags |= MS_RDONLY; - bch2_fs_emergency_read_only(c); - up_write(&sb->s_umount); - return 0; - - default: - return bch2_fs_ioctl(c, cmd, (void __user *) arg); - } -} - -#ifdef CONFIG_COMPAT -static long bch2_compat_fs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - /* These are just misnamed, they actually get/put from/to user an int */ - switch (cmd) { - case FS_IOC_GETFLAGS: - cmd = FS_IOC_GETFLAGS; - break; - case FS_IOC32_SETFLAGS: - cmd = FS_IOC_SETFLAGS; - break; - default: - return -ENOIOCTLCMD; - } - return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); -} -#endif - /* Directories: */ static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence) @@ -931,8 +754,7 @@ static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence) static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) { - struct inode *inode = file_inode(file); - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_fs *c = file_inode(file)->i_sb->s_fs_info; return bch2_readdir(c, file, ctx); } @@ -957,8 +779,10 @@ static const struct inode_operations bch_file_inode_operations = { .setattr = bch2_setattr, .fiemap = bch2_fiemap, .listxattr = bch2_xattr_list, +#ifdef CONFIG_BCACHEFS_POSIX_ACL .get_acl = bch2_get_acl, .set_acl = bch2_set_acl, +#endif }; static const struct inode_operations bch_dir_inode_operations = { @@ -974,8 +798,10 @@ static const struct inode_operations bch_dir_inode_operations = { .setattr = bch2_setattr, .tmpfile = bch2_tmpfile, .listxattr = bch2_xattr_list, +#ifdef CONFIG_BCACHEFS_POSIX_ACL .get_acl = bch2_get_acl, .set_acl = bch2_set_acl, +#endif }; static const struct file_operations bch_dir_file_operations = { @@ -993,15 +819,19 @@ static const struct inode_operations bch_symlink_inode_operations = { .get_link = page_get_link, .setattr = bch2_setattr, .listxattr = bch2_xattr_list, +#ifdef CONFIG_BCACHEFS_POSIX_ACL .get_acl = bch2_get_acl, .set_acl = bch2_set_acl, +#endif }; static const struct inode_operations bch_special_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, +#ifdef CONFIG_BCACHEFS_POSIX_ACL .get_acl = bch2_get_acl, .set_acl = bch2_set_acl, +#endif }; static const struct address_space_operations bch_address_space_operations = { @@ -1024,20 +854,21 @@ static const struct address_space_operations bch_address_space_operations = { static struct inode *bch2_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) { - struct inode *inode; + struct bch_fs *c = sb->s_fs_info; + struct inode *vinode; if (ino < BCACHEFS_ROOT_INO) return ERR_PTR(-ESTALE); - inode = bch2_vfs_inode_get(sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - if (generation && inode->i_generation != generation) { + vinode = bch2_vfs_inode_get(c, ino); + if (IS_ERR(vinode)) + return ERR_CAST(vinode); + if (generation && vinode->i_generation != generation) { /* we didn't find the right inode.. */ - iput(inode); + iput(vinode); return ERR_PTR(-ESTALE); } - return inode; + return vinode; } static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *fid, @@ -1061,127 +892,119 @@ static const struct export_operations bch_export_ops = { }; static void bch2_vfs_inode_init(struct bch_fs *c, - struct bch_inode_info *ei, + struct bch_inode_info *inode, struct bch_inode_unpacked *bi) { - struct inode *inode = &ei->vfs_inode; + inode->v.i_mode = bi->bi_mode; + i_uid_write(&inode->v, bi->bi_uid); + i_gid_write(&inode->v, bi->bi_gid); + inode->v.i_blocks = bi->bi_sectors; + inode->v.i_ino = bi->bi_inum; + set_nlink(&inode->v, bi->bi_nlink + nlink_bias(inode->v.i_mode)); + inode->v.i_rdev = bi->bi_dev; + inode->v.i_generation = bi->bi_generation; + inode->v.i_size = bi->bi_size; + inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime); + inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime); + inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime); - pr_debug("init inode %llu with mode %o", - bi->inum, bi->i_mode); + inode->ei_journal_seq = 0; + inode->ei_size = bi->bi_size; + inode->ei_flags = bi->bi_flags; + atomic64_set(&inode->ei_sectors, bi->bi_sectors); + inode->ei_str_hash = bch2_hash_info_init(c, bi); - ei->i_flags = bi->i_flags; - ei->i_size = bi->i_size; - - inode->i_mode = bi->i_mode; - i_uid_write(inode, bi->i_uid); - i_gid_write(inode, bi->i_gid); - - atomic64_set(&ei->i_sectors, bi->i_sectors); - inode->i_blocks = bi->i_sectors; - - inode->i_ino = bi->inum; - set_nlink(inode, bi->i_nlink + nlink_bias(inode->i_mode)); - inode->i_rdev = bi->i_dev; - inode->i_generation = bi->i_generation; - inode->i_size = bi->i_size; - inode->i_atime = bch2_time_to_timespec(c, bi->i_atime); - inode->i_mtime = bch2_time_to_timespec(c, bi->i_mtime); - inode->i_ctime = bch2_time_to_timespec(c, bi->i_ctime); bch2_inode_flags_to_vfs(inode); - ei->str_hash = bch2_hash_info_init(c, bi); + inode->v.i_mapping->a_ops = &bch_address_space_operations; - inode->i_mapping->a_ops = &bch_address_space_operations; - - switch (inode->i_mode & S_IFMT) { + switch (inode->v.i_mode & S_IFMT) { case S_IFREG: - inode->i_op = &bch_file_inode_operations; - inode->i_fop = &bch_file_operations; + inode->v.i_op = &bch_file_inode_operations; + inode->v.i_fop = &bch_file_operations; break; case S_IFDIR: - inode->i_op = &bch_dir_inode_operations; - inode->i_fop = &bch_dir_file_operations; + inode->v.i_op = &bch_dir_inode_operations; + inode->v.i_fop = &bch_dir_file_operations; break; case S_IFLNK: - inode_nohighmem(inode); - inode->i_op = &bch_symlink_inode_operations; + inode_nohighmem(&inode->v); + inode->v.i_op = &bch_symlink_inode_operations; break; default: - init_special_inode(inode, inode->i_mode, inode->i_rdev); - inode->i_op = &bch_special_inode_operations; + init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev); + inode->v.i_op = &bch_special_inode_operations; break; } } static struct inode *bch2_alloc_inode(struct super_block *sb) { - struct bch_inode_info *ei; + struct bch_inode_info *inode; - ei = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS); - if (!ei) + inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS); + if (!inode) return NULL; - pr_debug("allocated %p", &ei->vfs_inode); + inode_init_once(&inode->v); + mutex_init(&inode->ei_update_lock); + inode->ei_journal_seq = 0; + atomic_long_set(&inode->ei_size_dirty_count, 0); + atomic_long_set(&inode->ei_sectors_dirty_count, 0); - inode_init_once(&ei->vfs_inode); - mutex_init(&ei->update_lock); - ei->journal_seq = 0; - atomic_long_set(&ei->i_size_dirty_count, 0); - atomic_long_set(&ei->i_sectors_dirty_count, 0); - - return &ei->vfs_inode; + return &inode->v; } static void bch2_i_callback(struct rcu_head *head) { - struct inode *inode = container_of(head, struct inode, i_rcu); + struct inode *vinode = container_of(head, struct inode, i_rcu); + struct bch_inode_info *inode = to_bch_ei(vinode); - kmem_cache_free(bch2_inode_cache, to_bch_ei(inode)); + kmem_cache_free(bch2_inode_cache, inode); } -static void bch2_destroy_inode(struct inode *inode) +static void bch2_destroy_inode(struct inode *vinode) { - call_rcu(&inode->i_rcu, bch2_i_callback); + call_rcu(&vinode->i_rcu, bch2_i_callback); } -static int bch2_vfs_write_inode(struct inode *inode, +static int bch2_vfs_write_inode(struct inode *vinode, struct writeback_control *wbc) { - struct bch_fs *c = inode->i_sb->s_fs_info; - struct bch_inode_info *ei = to_bch_ei(inode); + struct bch_fs *c = vinode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(vinode); int ret; - mutex_lock(&ei->update_lock); - ret = bch2_write_inode(c, ei); - mutex_unlock(&ei->update_lock); + mutex_lock(&inode->ei_update_lock); + ret = bch2_write_inode(c, inode); + mutex_unlock(&inode->ei_update_lock); if (c->opts.journal_flush_disabled) return ret; if (!ret && wbc->sync_mode == WB_SYNC_ALL) - ret = bch2_journal_flush_seq(&c->journal, ei->journal_seq); + ret = bch2_journal_flush_seq(&c->journal, inode->ei_journal_seq); return ret; } -static void bch2_evict_inode(struct inode *inode) +static void bch2_evict_inode(struct inode *vinode) { - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_fs *c = vinode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(vinode); - truncate_inode_pages_final(&inode->i_data); - - if (!bch2_journal_error(&c->journal) && !is_bad_inode(inode)) { - struct bch_inode_info *ei = to_bch_ei(inode); + truncate_inode_pages_final(&inode->v.i_data); + if (!bch2_journal_error(&c->journal) && !is_bad_inode(&inode->v)) { /* XXX - we want to check this stuff iff there weren't IO errors: */ - BUG_ON(atomic_long_read(&ei->i_sectors_dirty_count)); - BUG_ON(atomic64_read(&ei->i_sectors) != inode->i_blocks); + BUG_ON(atomic_long_read(&inode->ei_sectors_dirty_count)); + BUG_ON(atomic64_read(&inode->ei_sectors) != inode->v.i_blocks); } - clear_inode(inode); + clear_inode(&inode->v); - if (!inode->i_nlink && !is_bad_inode(inode)) { - bch2_inode_rm(c, inode->i_ino); + if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { + bch2_inode_rm(c, inode->v.i_ino); atomic_long_dec(&c->nr_inodes); } } @@ -1305,16 +1128,17 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) struct bch_opts opts = bch2_opts_empty(); int ret; - opts.read_only = (*flags & MS_RDONLY) != 0; + opt_set(opts, read_only, (*flags & MS_RDONLY) != 0); ret = bch2_parse_mount_opts(&opts, data); if (ret) return ret; - if (opts.read_only >= 0 && - opts.read_only != c->opts.read_only) { + if (opts.read_only != c->opts.read_only) { const char *err = NULL; + mutex_lock(&c->state_lock); + if (opts.read_only) { bch2_fs_read_only(c); @@ -1330,6 +1154,8 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) } c->opts.read_only = opts.read_only; + + mutex_unlock(&c->state_lock); } if (opts.errors >= 0) @@ -1338,6 +1164,38 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) return ret; } +static int bch2_show_options(struct seq_file *seq, struct dentry *root) +{ + struct bch_fs *c = root->d_sb->s_fs_info; + enum bch_opt_id i; + + for (i = 0; i < bch2_opts_nr; i++) { + const struct bch_option *opt = &bch2_opt_table[i]; + u64 v = bch2_opt_get_by_id(&c->opts, i); + + if (opt->mode < OPT_MOUNT) + continue; + + if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) + continue; + + switch (opt->type) { + case BCH_OPT_BOOL: + seq_printf(seq, ",%s%s", v ? "" : "no", opt->attr.name); + break; + case BCH_OPT_UINT: + seq_printf(seq, ",%s=%llu", opt->attr.name, v); + break; + case BCH_OPT_STR: + seq_printf(seq, ",%s=%s", opt->attr.name, opt->choices[v]); + break; + } + } + + return 0; + +} + static const struct super_operations bch_super_operations = { .alloc_inode = bch2_alloc_inode, .destroy_inode = bch2_destroy_inode, @@ -1345,7 +1203,7 @@ static const struct super_operations bch_super_operations = { .evict_inode = bch2_evict_inode, .sync_fs = bch2_sync_fs, .statfs = bch2_statfs, - .show_options = generic_show_options, + .show_options = bch2_show_options, .remount_fs = bch2_remount, #if 0 .put_super = bch2_put_super, @@ -1371,12 +1229,12 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, struct bch_fs *c; struct bch_dev *ca; struct super_block *sb; - struct inode *inode; + struct inode *vinode; struct bch_opts opts = bch2_opts_empty(); unsigned i; int ret; - opts.read_only = (flags & MS_RDONLY) != 0; + opt_set(opts, read_only, (flags & MS_RDONLY) != 0); ret = bch2_parse_mount_opts(&opts, data); if (ret) @@ -1427,18 +1285,18 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, break; } - if (opts.posix_acl < 0) +#ifdef CONFIG_BCACHEFS_POSIX_ACL + if (c->opts.acl) sb->s_flags |= MS_POSIXACL; - else - sb->s_flags |= opts.posix_acl ? MS_POSIXACL : 0; +#endif - inode = bch2_vfs_inode_get(sb, BCACHEFS_ROOT_INO); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); + vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO); + if (IS_ERR(vinode)) { + ret = PTR_ERR(vinode); goto err_put_super; } - sb->s_root = d_make_root(inode); + sb->s_root = d_make_root(vinode); if (!sb->s_root) { ret = -ENOMEM; goto err_put_super; diff --git a/libbcachefs/fs.h b/libbcachefs/fs.h index 5f2c39f0..d255ca7c 100644 --- a/libbcachefs/fs.h +++ b/libbcachefs/fs.h @@ -7,30 +7,35 @@ #include struct bch_inode_info { - struct inode vfs_inode; + struct inode v; - struct mutex update_lock; - u64 journal_seq; + struct mutex ei_update_lock; + u64 ei_journal_seq; - atomic_long_t i_size_dirty_count; + atomic_long_t ei_size_dirty_count; /* * these are updated whenever we update the inode in the btree - for * e.g. fsync */ - u64 i_size; - u32 i_flags; + u64 ei_size; + u32 ei_flags; - atomic_long_t i_sectors_dirty_count; - atomic64_t i_sectors; + atomic_long_t ei_sectors_dirty_count; + atomic64_t ei_sectors; - struct bch_hash_info str_hash; + struct bch_hash_info ei_str_hash; - unsigned long last_dirtied; + unsigned long ei_last_dirtied; }; #define to_bch_ei(_inode) \ - container_of(_inode, struct bch_inode_info, vfs_inode) + container_of_or_null(_inode, struct bch_inode_info, v) + +static inline struct bch_inode_info *file_bch_inode(struct file *file) +{ + return to_bch_ei(file_inode(file)); +} static inline u8 mode_to_type(umode_t mode) { diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index f137b730..4760b16e 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -67,7 +67,7 @@ static int reattach_inode(struct bch_fs *c, snprintf(name_buf, sizeof(name_buf), "%llu", inum); name = (struct qstr) QSTR(name_buf); - lostfound_inode->i_nlink++; + lostfound_inode->bi_nlink++; bch2_inode_pack(&packed, lostfound_inode); @@ -80,7 +80,7 @@ static int reattach_inode(struct bch_fs *c, return ret; } - ret = bch2_dirent_create(c, lostfound_inode->inum, + ret = bch2_dirent_create(c, lostfound_inode->bi_inum, &lostfound_hash_info, DT_DIR, &name, inum, NULL, BTREE_INSERT_NOFAIL); @@ -263,9 +263,9 @@ static int check_extents(struct bch_fs *c) "extent type %u for missing inode %llu", k.k->type, k.k->p.inode) || fsck_err_on(w.have_inode && - !S_ISREG(w.inode.i_mode) && !S_ISLNK(w.inode.i_mode), c, + !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c, "extent type %u for non regular file, inode %llu mode %o", - k.k->type, k.k->p.inode, w.inode.i_mode)) { + k.k->type, k.k->p.inode, w.inode.bi_mode)) { ret = bch2_btree_delete_at(&iter, 0); if (ret) goto err; @@ -274,18 +274,18 @@ static int check_extents(struct bch_fs *c) unfixable_fsck_err_on(w.first_this_inode && w.have_inode && - !(w.inode.i_flags & BCH_INODE_I_SECTORS_DIRTY) && - w.inode.i_sectors != + !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) && + w.inode.bi_sectors != (i_sectors = bch2_count_inode_sectors(c, w.cur_inum)), c, "i_sectors wrong: got %llu, should be %llu", - w.inode.i_sectors, i_sectors); + w.inode.bi_sectors, i_sectors); unfixable_fsck_err_on(w.have_inode && - !(w.inode.i_flags & BCH_INODE_I_SIZE_DIRTY) && + !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && k.k->type != BCH_RESERVATION && - k.k->p.offset > round_up(w.inode.i_size, PAGE_SIZE) >> 9, c, + k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c, "extent type %u offset %llu past end of inode %llu, i_size %llu", - k.k->type, k.k->p.offset, k.k->p.inode, w.inode.i_size); + k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size); } err: fsck_err: @@ -324,9 +324,9 @@ static int check_dirents(struct bch_fs *c) "dirent in nonexisting directory:\n%s", bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, buf, sizeof(buf), k)) || - fsck_err_on(!S_ISDIR(w.inode.i_mode), c, + fsck_err_on(!S_ISDIR(w.inode.bi_mode), c, "dirent in non directory inode type %u:\n%s", - mode_to_type(w.inode.i_mode), + mode_to_type(w.inode.bi_mode), bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, buf, sizeof(buf), k))) { ret = bch2_btree_delete_at(&iter, 0); @@ -397,9 +397,9 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(have_target && d.v->d_type != - mode_to_type(le16_to_cpu(target.i_mode)), c, + mode_to_type(le16_to_cpu(target.bi_mode)), c, "incorrect d_type: should be %u:\n%s", - mode_to_type(le16_to_cpu(target.i_mode)), + mode_to_type(le16_to_cpu(target.bi_mode)), bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, buf, sizeof(buf), k))) { struct bkey_i_dirent *n; @@ -411,7 +411,7 @@ static int check_dirents(struct bch_fs *c) } bkey_reassemble(&n->k_i, d.s_c); - n->v.d_type = mode_to_type(le16_to_cpu(target.i_mode)); + n->v.d_type = mode_to_type(le16_to_cpu(target.bi_mode)); ret = bch2_btree_insert_at(c, NULL, NULL, NULL, BTREE_INSERT_NOFAIL, @@ -485,7 +485,7 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode) if (fsck_err_on(ret, c, "root directory missing")) goto create_root; - if (fsck_err_on(!S_ISDIR(root_inode->i_mode), c, + if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c, "root inode not a directory")) goto create_root; @@ -494,7 +494,7 @@ fsck_err: return ret; create_root: bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0); - root_inode->inum = BCACHEFS_ROOT_INO; + root_inode->bi_inum = BCACHEFS_ROOT_INO; bch2_inode_pack(&packed, root_inode); @@ -528,7 +528,7 @@ static int check_lostfound(struct bch_fs *c, if (fsck_err_on(ret, c, "lost+found missing")) goto create_lostfound; - if (fsck_err_on(!S_ISDIR(lostfound_inode->i_mode), c, + if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c, "lost+found inode not a directory")) goto create_lostfound; @@ -536,7 +536,7 @@ static int check_lostfound(struct bch_fs *c, fsck_err: return ret; create_lostfound: - root_inode->i_nlink++; + root_inode->bi_nlink++; bch2_inode_pack(&packed, root_inode); @@ -553,7 +553,7 @@ create_lostfound: return ret; ret = bch2_dirent_create(c, BCACHEFS_ROOT_INO, &root_hash_info, DT_DIR, - &lostfound, lostfound_inode->inum, NULL, + &lostfound, lostfound_inode->bi_inum, NULL, BTREE_INSERT_NOFAIL); if (ret) return ret; @@ -711,7 +711,7 @@ up: for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { if (k.k->type != BCH_INODE_FS || - !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode))) + !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode))) continue; if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c, @@ -849,15 +849,15 @@ static int bch2_gc_do_inode(struct bch_fs *c, inode.k->p.inode)) return ret; - i_nlink = u.i_nlink + nlink_bias(u.i_mode); + i_nlink = u.bi_nlink + nlink_bias(u.bi_mode); fsck_err_on(i_nlink < link.count, c, "inode %llu i_link too small (%u < %u, type %i)", inode.k->p.inode, i_nlink, - link.count, mode_to_type(u.i_mode)); + link.count, mode_to_type(u.bi_mode)); /* These should have been caught/fixed by earlier passes: */ - if (S_ISDIR(u.i_mode)) { + if (S_ISDIR(u.bi_mode)) { need_fsck_err_on(link.count > 1, c, "directory %llu with multiple hardlinks: %u", inode.k->p.inode, link.count); @@ -877,7 +877,7 @@ static int bch2_gc_do_inode(struct bch_fs *c, "but found orphaned inode %llu", inode.k->p.inode); - if (fsck_err_on(S_ISDIR(u.i_mode) && + if (fsck_err_on(S_ISDIR(u.bi_mode) && bch2_empty_dir(c, inode.k->p.inode), c, "non empty directory with link count 0, " "inode nlink %u, dir links found %u", @@ -897,7 +897,7 @@ static int bch2_gc_do_inode(struct bch_fs *c, return ret; } - if (u.i_flags & BCH_INODE_I_SIZE_DIRTY) { + if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY) { fsck_err_on(c->sb.clean, c, "filesystem marked clean, " "but inode %llu has i_size dirty", @@ -911,7 +911,7 @@ static int bch2_gc_do_inode(struct bch_fs *c, */ ret = bch2_inode_truncate(c, inode.k->p.inode, - round_up(u.i_size, PAGE_SIZE) >> 9, + round_up(u.bi_size, PAGE_SIZE) >> 9, NULL, NULL); if (ret) { bch_err(c, "error in fs gc: error %i " @@ -923,13 +923,13 @@ static int bch2_gc_do_inode(struct bch_fs *c, * We truncated without our normal sector accounting hook, just * make sure we recalculate it: */ - u.i_flags |= BCH_INODE_I_SECTORS_DIRTY; + u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY; - u.i_flags &= ~BCH_INODE_I_SIZE_DIRTY; + u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; do_update = true; } - if (u.i_flags & BCH_INODE_I_SECTORS_DIRTY) { + if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY) { s64 sectors; fsck_err_on(c->sb.clean, c, @@ -948,8 +948,8 @@ static int bch2_gc_do_inode(struct bch_fs *c, return sectors; } - u.i_sectors = sectors; - u.i_flags &= ~BCH_INODE_I_SECTORS_DIRTY; + u.bi_sectors = sectors; + u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY; do_update = true; } @@ -958,12 +958,12 @@ static int bch2_gc_do_inode(struct bch_fs *c, "filesystem marked clean, " "but inode %llu has wrong i_nlink " "(type %u i_nlink %u, should be %u)", - inode.k->p.inode, mode_to_type(u.i_mode), + inode.k->p.inode, mode_to_type(u.bi_mode), i_nlink, real_i_nlink); bch_verbose(c, "setting inode %llu nlinks from %u to %u", inode.k->p.inode, i_nlink, real_i_nlink); - u.i_nlink = real_i_nlink - nlink_bias(u.i_mode);; + u.bi_nlink = real_i_nlink - nlink_bias(u.bi_mode); do_update = true; } diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 1422cc24..05f617ae 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "bkey_methods.h" #include "btree_update.h" +#include "error.h" #include "extents.h" #include "inode.h" #include "io.h" @@ -91,10 +92,10 @@ void bch2_inode_pack(struct bkey_inode_buf *packed, unsigned nr_fields = 0, last_nonzero_fieldnr = 0; bkey_inode_init(&packed->inode.k_i); - packed->inode.k.p.inode = inode->inum; - packed->inode.v.i_hash_seed = inode->i_hash_seed; - packed->inode.v.i_flags = cpu_to_le32(inode->i_flags); - packed->inode.v.i_mode = cpu_to_le16(inode->i_mode); + packed->inode.k.p.inode = inode->bi_inum; + packed->inode.v.bi_hash_seed = inode->bi_hash_seed; + packed->inode.v.bi_flags = cpu_to_le32(inode->bi_flags); + packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode); #define BCH_INODE_FIELD(_name, _bits) \ out += inode_encode_field(out, end, 0, inode->_name); \ @@ -124,9 +125,9 @@ void bch2_inode_pack(struct bkey_inode_buf *packed, int ret = bch2_inode_unpack(inode_i_to_s_c(&packed->inode), &unpacked); BUG_ON(ret); - BUG_ON(unpacked.inum != inode->inum); - BUG_ON(unpacked.i_hash_seed != inode->i_hash_seed); - BUG_ON(unpacked.i_mode != inode->i_mode); + BUG_ON(unpacked.bi_inum != inode->bi_inum); + BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed); + BUG_ON(unpacked.bi_mode != inode->bi_mode); #define BCH_INODE_FIELD(_name, _bits) BUG_ON(unpacked._name != inode->_name); BCH_INODE_FIELDS() @@ -143,10 +144,10 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode, unsigned fieldnr = 0, field_bits; int ret; - unpacked->inum = inode.k->p.inode; - unpacked->i_hash_seed = inode.v->i_hash_seed; - unpacked->i_flags = le32_to_cpu(inode.v->i_flags); - unpacked->i_mode = le16_to_cpu(inode.v->i_mode); + unpacked->bi_inum = inode.k->p.inode; + unpacked->bi_hash_seed = inode.v->bi_hash_seed; + unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags); + unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode); #define BCH_INODE_FIELD(_name, _bits) \ if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \ @@ -231,7 +232,7 @@ static void bch2_inode_to_text(struct bch_fs *c, char *buf, break; } - scnprintf(buf, size, "i_size %llu", unpacked.i_size); + scnprintf(buf, size, "i_size %llu", unpacked.bi_size); break; } } @@ -249,17 +250,17 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, memset(inode_u, 0, sizeof(*inode_u)); /* ick */ - inode_u->i_flags |= c->sb.str_hash_type << INODE_STR_HASH_OFFSET; - get_random_bytes(&inode_u->i_hash_seed, sizeof(inode_u->i_hash_seed)); + inode_u->bi_flags |= c->opts.str_hash << INODE_STR_HASH_OFFSET; + get_random_bytes(&inode_u->bi_hash_seed, sizeof(inode_u->bi_hash_seed)); - inode_u->i_mode = mode; - inode_u->i_uid = uid; - inode_u->i_gid = gid; - inode_u->i_dev = rdev; - inode_u->i_atime = now; - inode_u->i_mtime = now; - inode_u->i_ctime = now; - inode_u->i_otime = now; + inode_u->bi_mode = mode; + inode_u->bi_uid = uid; + inode_u->bi_gid = gid; + inode_u->bi_dev = rdev; + inode_u->bi_atime = now; + inode_u->bi_mtime = now; + inode_u->bi_ctime = now; + inode_u->bi_otime = now; } int bch2_inode_create(struct bch_fs *c, struct bch_inode_unpacked *inode_u, @@ -287,7 +288,7 @@ again: while (1) { struct bkey_s_c k = bch2_btree_iter_peek_with_holes(&iter); - u32 i_generation = 0; + u32 bi_generation = 0; ret = btree_iter_err(k); if (ret) { @@ -308,11 +309,11 @@ again: case BCH_INODE_GENERATION: { struct bkey_s_c_inode_generation g = bkey_s_c_to_inode_generation(k); - i_generation = le32_to_cpu(g.v->i_generation); + bi_generation = le32_to_cpu(g.v->bi_generation); /* fallthrough: */ } default: - inode_u->i_generation = i_generation; + inode_u->bi_generation = bi_generation; bch2_inode_pack(&inode_p, inode_u); inode_p.inode.k.p = k.k->p; @@ -326,7 +327,7 @@ again: bch2_btree_iter_unlock(&iter); if (!ret) { - inode_u->inum = + inode_u->bi_inum = inode_p.inode.k.p.inode; *hint = inode_p.inode.k.p.inode + 1; } @@ -384,7 +385,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) * but there could be whiteouts (from hash collisions) that we should * delete: * - * XXX: the dirent could ideally would delete whitouts when they're no + * XXX: the dirent could ideally would delete whiteouts when they're no * longer needed */ ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS, @@ -398,7 +399,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) BTREE_ITER_INTENT); do { struct bkey_s_c k = bch2_btree_iter_peek_with_holes(&iter); - u32 i_generation = 0; + u32 bi_generation = 0; ret = btree_iter_err(k); if (ret) { @@ -406,29 +407,33 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) return ret; } + bch2_fs_inconsistent_on(k.k->type != BCH_INODE_FS, c, + "inode %llu not found when deleting", + inode_nr); + switch (k.k->type) { case BCH_INODE_FS: { struct bch_inode_unpacked inode_u; if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u)) - i_generation = cpu_to_le32(inode_u.i_generation) + 1; + bi_generation = cpu_to_le32(inode_u.bi_generation) + 1; break; } case BCH_INODE_GENERATION: { struct bkey_s_c_inode_generation g = bkey_s_c_to_inode_generation(k); - i_generation = le32_to_cpu(g.v->i_generation); + bi_generation = le32_to_cpu(g.v->bi_generation); break; } } - if (!i_generation) { + if (!bi_generation) { bkey_init(&delete.k); delete.k.p.inode = inode_nr; } else { bkey_inode_generation_init(&delete.k_i); delete.k.p.inode = inode_nr; - delete.v.i_generation = cpu_to_le32(i_generation); + delete.v.bi_generation = cpu_to_le32(bi_generation); } ret = bch2_btree_insert_at(c, NULL, NULL, NULL, @@ -504,17 +509,17 @@ void bch2_inode_pack_test(void) { struct bch_inode_unpacked *u, test_inodes[] = { { - .i_atime = U64_MAX, - .i_ctime = U64_MAX, - .i_mtime = U64_MAX, - .i_otime = U64_MAX, - .i_size = U64_MAX, - .i_sectors = U64_MAX, - .i_uid = U32_MAX, - .i_gid = U32_MAX, - .i_nlink = U32_MAX, - .i_generation = U32_MAX, - .i_dev = U32_MAX, + .bi_atime = U64_MAX, + .bi_ctime = U64_MAX, + .bi_mtime = U64_MAX, + .bi_otime = U64_MAX, + .bi_size = U64_MAX, + .bi_sectors = U64_MAX, + .bi_uid = U32_MAX, + .bi_gid = U32_MAX, + .bi_nlink = U32_MAX, + .bi_generation = U32_MAX, + .bi_dev = U32_MAX, }, }; diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index 22aac3e6..53c70617 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -6,10 +6,10 @@ extern const struct bkey_ops bch2_bkey_inode_ops; struct bch_inode_unpacked { - u64 inum; - __le64 i_hash_seed; - u32 i_flags; - u16 i_mode; + u64 bi_inum; + __le64 bi_hash_seed; + u32 bi_flags; + u16 bi_mode; #define BCH_INODE_FIELD(_name, _bits) u##_bits _name; BCH_INODE_FIELDS() @@ -22,7 +22,7 @@ struct bkey_inode_buf { #define BCH_INODE_FIELD(_name, _bits) + 8 + _bits / 8 u8 _pad[0 + BCH_INODE_FIELDS()]; #undef BCH_INODE_FIELD -}; +} __attribute__((packed, aligned(8))); void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *); int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *); diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index b22fc8d9..3ee9d39e 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -700,11 +700,11 @@ reread: sectors_read = min_t(unsigned, case JOURNAL_ENTRY_NONE: if (!saw_bad) return 0; - sectors = c->sb.block_size; + sectors = c->opts.block_size; goto next_block; case JOURNAL_ENTRY_BAD: saw_bad = true; - sectors = c->sb.block_size; + sectors = c->opts.block_size; goto next_block; default: return ret; @@ -1192,7 +1192,7 @@ static enum { j->prev_buf_sectors = vstruct_blocks_plus(buf->data, c->block_bits, journal_entry_u64s_reserve(buf)) * - c->sb.block_size; + c->opts.block_size; BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors); diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index b5ae5aeb..c9482151 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -66,42 +66,24 @@ const char * const bch2_dev_state[] = { NULL }; -const struct bch_option bch2_opt_table[] = { -#define OPT_BOOL() .type = BCH_OPT_BOOL -#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, .min = _min, .max = _max -#define OPT_STR(_choices) .type = BCH_OPT_STR, .choices = _choices - -#define BCH_OPT(_name, _mode, _sb_opt, _bits, _type) \ - [Opt_##_name] = { \ - .name = #_name, \ - .set_sb = SET_##_sb_opt, \ - _type \ - }, - BCH_VISIBLE_OPTS() -#undef BCH_OPT -}; - -static int bch2_opt_lookup(const char *name) +void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src) { - const struct bch_option *i; +#define BCH_OPT(_name, ...) \ + if (opt_defined(src, _name)) \ + opt_set(*dst, _name, src._name); - for (i = bch2_opt_table; - i < bch2_opt_table + ARRAY_SIZE(bch2_opt_table); - i++) - if (!strcmp(name, i->name)) - return i - bch2_opt_table; - - return -1; + BCH_OPTS() +#undef BCH_OPT } -static u64 bch2_opt_get(struct bch_opts *opts, enum bch_opt_id id) +u64 bch2_opt_get_by_id(const struct bch_opts *opts, enum bch_opt_id id) { switch (id) { #define BCH_OPT(_name, ...) \ case Opt_##_name: \ return opts->_name; \ - BCH_VISIBLE_OPTS() + BCH_OPTS() #undef BCH_OPT default: @@ -109,15 +91,15 @@ static u64 bch2_opt_get(struct bch_opts *opts, enum bch_opt_id id) } } -void bch2_opt_set(struct bch_opts *opts, enum bch_opt_id id, u64 v) +void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v) { switch (id) { #define BCH_OPT(_name, ...) \ case Opt_##_name: \ - opts->_name = v; \ + opt_set(*opts, _name, v); \ break; - BCH_VISIBLE_OPTS() + BCH_OPTS() #undef BCH_OPT default: @@ -129,13 +111,13 @@ void bch2_opt_set(struct bch_opts *opts, enum bch_opt_id id, u64 v) * Initial options from superblock - here we don't want any options undefined, * any options the superblock doesn't specify are set to 0: */ -struct bch_opts bch2_sb_opts(struct bch_sb *sb) +struct bch_opts bch2_opts_from_sb(struct bch_sb *sb) { struct bch_opts opts = bch2_opts_empty(); -#define BCH_OPT(_name, _mode, _sb_opt, ...) \ +#define BCH_OPT(_name, _bits, _mode, _type, _sb_opt, _default) \ if (_sb_opt != NO_SB_OPT) \ - opts._name = _sb_opt(sb); + opt_set(opts, _name, _sb_opt(sb)); BCH_OPTS() #undef BCH_OPT @@ -143,9 +125,41 @@ struct bch_opts bch2_sb_opts(struct bch_sb *sb) return opts; } -static int parse_one_opt(enum bch_opt_id id, const char *val, u64 *res) +const struct bch_option bch2_opt_table[] = { +#define OPT_BOOL() .type = BCH_OPT_BOOL +#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, .min = _min, .max = _max +#define OPT_STR(_choices) .type = BCH_OPT_STR, .choices = _choices + +#define BCH_OPT(_name, _bits, _mode, _type, _sb_opt, _default) \ + [Opt_##_name] = { \ + .attr = { \ + .name = #_name, \ + .mode = _mode == OPT_RUNTIME ? 0644 : 0444, \ + }, \ + .mode = _mode, \ + .set_sb = SET_##_sb_opt, \ + _type \ + }, + + BCH_OPTS() +#undef BCH_OPT +}; + +static int bch2_opt_lookup(const char *name) +{ + const struct bch_option *i; + + for (i = bch2_opt_table; + i < bch2_opt_table + ARRAY_SIZE(bch2_opt_table); + i++) + if (!strcmp(name, i->attr.name)) + return i - bch2_opt_table; + + return -1; +} + +int bch2_opt_parse(const struct bch_option *opt, const char *val, u64 *res) { - const struct bch_option *opt = &bch2_opt_table[id]; ssize_t ret; switch (opt->type) { @@ -190,11 +204,11 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options) if (val) { id = bch2_opt_lookup(name); if (id < 0) - continue; + goto bad_opt; - ret = parse_one_opt(id, val, &v); + ret = bch2_opt_parse(&bch2_opt_table[id], val, &v); if (ret < 0) - return ret; + goto bad_val; } else { id = bch2_opt_lookup(name); v = 1; @@ -205,47 +219,31 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options) v = 0; } - if (id < 0 || - bch2_opt_table[id].type != BCH_OPT_BOOL) - continue; + if (id < 0) + goto bad_opt; + + if (bch2_opt_table[id].type != BCH_OPT_BOOL) + goto no_val; } - bch2_opt_set(opts, id, v); + if (bch2_opt_table[id].mode < OPT_MOUNT) + goto bad_opt; + + if (id == Opt_acl && + !IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL)) + goto bad_opt; + + bch2_opt_set_by_id(opts, id, v); } return 0; -} - -enum bch_opt_id bch2_parse_sysfs_opt(const char *name, const char *val, - u64 *res) -{ - int id = bch2_opt_lookup(name); - int ret; - - if (id < 0) - return -EINVAL; - - ret = parse_one_opt(id, val, res); - if (ret < 0) - return ret; - - return id; -} - -ssize_t bch2_opt_show(struct bch_opts *opts, const char *name, - char *buf, size_t size) -{ - int id = bch2_opt_lookup(name); - const struct bch_option *opt; - u64 v; - - if (id < 0) - return -EINVAL; - - v = bch2_opt_get(opts, id); - opt = &bch2_opt_table[id]; - - return opt->type == BCH_OPT_STR - ? bch2_scnprint_string_list(buf, size, opt->choices, v) - : scnprintf(buf, size, "%lli", v); +bad_opt: + pr_err("Bad mount option %s", name); + return -1; +bad_val: + pr_err("Invalid value %s for mount option %s", val, name); + return -1; +no_val: + pr_err("Mount option %s requires a value", name); + return -1; } diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index 667f629e..33e3a2c8 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "bcachefs_format.h" extern const char * const bch2_error_actions[]; @@ -30,18 +31,12 @@ extern const char * const bch2_dev_state[]; /* dummy option, for options that aren't stored in the superblock */ LE64_BITMASK(NO_SB_OPT, struct bch_sb, flags[0], 0, 0); -/** - * BCH_OPT(name, mode, sb_opt, type, ...) - * - * @name - name of mount option, sysfs attribute, and struct bch_opts - * member - * - * @mode - sysfs attr permissions - * - * @sb_option - name of corresponding superblock option - * - * @type - one of OPT_BOOL, OPT_UINT, OPT_STR - */ +enum opt_mode { + OPT_INTERNAL, + OPT_FORMAT, + OPT_MOUNT, + OPT_RUNTIME, +}; enum opt_type { BCH_OPT_BOOL, @@ -49,82 +44,162 @@ enum opt_type { BCH_OPT_STR, }; -#define BCH_VISIBLE_OPTS() \ - BCH_OPT(errors, 0644, BCH_SB_ERROR_ACTION, \ - s8, OPT_STR(bch2_error_actions)) \ - BCH_OPT(metadata_replicas, 0444, BCH_SB_META_REPLICAS_WANT,\ - s8, OPT_UINT(1, BCH_REPLICAS_MAX)) \ - BCH_OPT(data_replicas, 0444, BCH_SB_DATA_REPLICAS_WANT,\ - s8, OPT_UINT(1, BCH_REPLICAS_MAX)) \ - BCH_OPT(metadata_replicas_required, 0444, BCH_SB_META_REPLICAS_REQ,\ - s8, OPT_UINT(1, BCH_REPLICAS_MAX)) \ - BCH_OPT(data_replicas_required, 0444, BCH_SB_DATA_REPLICAS_REQ,\ - s8, OPT_UINT(1, BCH_REPLICAS_MAX)) \ - BCH_OPT(degraded, 0444, NO_SB_OPT, \ - s8, OPT_BOOL()) \ - BCH_OPT(metadata_checksum, 0644, BCH_SB_META_CSUM_TYPE, \ - s8, OPT_STR(bch2_csum_types)) \ - BCH_OPT(data_checksum, 0644, BCH_SB_DATA_CSUM_TYPE, \ - s8, OPT_STR(bch2_csum_types)) \ - BCH_OPT(compression, 0644, BCH_SB_COMPRESSION_TYPE,\ - s8, OPT_STR(bch2_compression_types)) \ - BCH_OPT(str_hash, 0644, BCH_SB_STR_HASH_TYPE, \ - s8, OPT_STR(bch2_str_hash_types)) \ - BCH_OPT(inodes_32bit, 0644, BCH_SB_INODE_32BIT, \ - s8, OPT_BOOL()) \ - BCH_OPT(gc_reserve_percent, 0444, BCH_SB_GC_RESERVE, \ - s8, OPT_UINT(5, 21)) \ - BCH_OPT(root_reserve_percent, 0444, BCH_SB_ROOT_RESERVE, \ - s8, OPT_UINT(0, 100)) \ - BCH_OPT(wide_macs, 0644, BCH_SB_128_BIT_MACS, \ - s8, OPT_BOOL()) \ - BCH_OPT(verbose_recovery, 0444, NO_SB_OPT, \ - s8, OPT_BOOL()) \ - BCH_OPT(posix_acl, 0444, NO_SB_OPT, \ - s8, OPT_BOOL()) \ - BCH_OPT(journal_flush_disabled, 0644, NO_SB_OPT, \ - s8, OPT_BOOL()) \ - BCH_OPT(nofsck, 0444, NO_SB_OPT, \ - s8, OPT_BOOL()) \ - BCH_OPT(fix_errors, 0444, NO_SB_OPT, \ - s8, OPT_BOOL()) \ - BCH_OPT(nochanges, 0444, NO_SB_OPT, \ - s8, OPT_BOOL()) \ - BCH_OPT(noreplay, 0444, NO_SB_OPT, \ - s8, OPT_BOOL()) \ - BCH_OPT(norecovery, 0444, NO_SB_OPT, \ - s8, OPT_BOOL()) \ - BCH_OPT(noexcl, 0444, NO_SB_OPT, \ - s8, OPT_BOOL()) \ - BCH_OPT(sb, 0444, NO_SB_OPT, \ - s64, OPT_UINT(0, S64_MAX)) \ +/** + * BCH_OPT(name, type, in mem type, mode, sb_opt) + * + * @name - name of mount option, sysfs attribute, and struct bch_opts + * member + * + * @mode - when opt may be set + * + * @sb_option - name of corresponding superblock option + * + * @type - one of OPT_BOOL, OPT_UINT, OPT_STR + */ + +/* + * XXX: add fields for + * - default value + * - helptext + */ #define BCH_OPTS() \ - BCH_OPT(read_only, 0444, NO_SB_OPT, \ - s8, OPT_BOOL()) \ - BCH_OPT(nostart, 0444, NO_SB_OPT, \ - s8, OPT_BOOL()) \ - BCH_VISIBLE_OPTS() + BCH_OPT(block_size, u16, OPT_FORMAT, \ + OPT_UINT(1, 128), \ + BCH_SB_BLOCK_SIZE, 8) \ + BCH_OPT(btree_node_size, u16, OPT_FORMAT, \ + OPT_UINT(1, 128), \ + BCH_SB_BTREE_NODE_SIZE, 512) \ + BCH_OPT(errors, u8, OPT_RUNTIME, \ + OPT_STR(bch2_error_actions), \ + BCH_SB_ERROR_ACTION, BCH_ON_ERROR_RO) \ + BCH_OPT(metadata_replicas, u8, OPT_MOUNT, \ + OPT_UINT(1, BCH_REPLICAS_MAX), \ + BCH_SB_META_REPLICAS_WANT, 1) \ + BCH_OPT(data_replicas, u8, OPT_MOUNT, \ + OPT_UINT(1, BCH_REPLICAS_MAX), \ + BCH_SB_DATA_REPLICAS_WANT, 1) \ + BCH_OPT(metadata_replicas_required, u8, OPT_MOUNT, \ + OPT_UINT(1, BCH_REPLICAS_MAX), \ + BCH_SB_META_REPLICAS_REQ, 1) \ + BCH_OPT(data_replicas_required, u8, OPT_MOUNT, \ + OPT_UINT(1, BCH_REPLICAS_MAX), \ + BCH_SB_DATA_REPLICAS_REQ, 1) \ + BCH_OPT(metadata_checksum, u8, OPT_RUNTIME, \ + OPT_STR(bch2_csum_types), \ + BCH_SB_META_CSUM_TYPE, BCH_CSUM_OPT_CRC32C) \ + BCH_OPT(data_checksum, u8, OPT_RUNTIME, \ + OPT_STR(bch2_csum_types), \ + BCH_SB_DATA_CSUM_TYPE, BCH_CSUM_OPT_CRC32C) \ + BCH_OPT(compression, u8, OPT_RUNTIME, \ + OPT_STR(bch2_compression_types), \ + BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_NONE)\ + BCH_OPT(str_hash, u8, OPT_RUNTIME, \ + OPT_STR(bch2_str_hash_types), \ + BCH_SB_STR_HASH_TYPE, BCH_STR_HASH_SIPHASH) \ + BCH_OPT(inodes_32bit, u8, OPT_RUNTIME, \ + OPT_BOOL(), \ + BCH_SB_INODE_32BIT, false) \ + BCH_OPT(gc_reserve_percent, u8, OPT_MOUNT, \ + OPT_UINT(5, 21), \ + BCH_SB_GC_RESERVE, 8) \ + BCH_OPT(root_reserve_percent, u8, OPT_MOUNT, \ + OPT_UINT(0, 100), \ + BCH_SB_ROOT_RESERVE, 0) \ + BCH_OPT(wide_macs, u8, OPT_RUNTIME, \ + OPT_BOOL(), \ + BCH_SB_128_BIT_MACS, false) \ + BCH_OPT(acl, u8, OPT_MOUNT, \ + OPT_BOOL(), \ + BCH_SB_POSIX_ACL, true) \ + BCH_OPT(degraded, u8, OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(verbose_recovery, u8, OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(journal_flush_disabled, u8, OPT_RUNTIME, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(nofsck, u8, OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(fix_errors, u8, OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(nochanges, u8, OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(noreplay, u8, OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(norecovery, u8, OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(noexcl, u8, OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(sb, u64, OPT_MOUNT, \ + OPT_UINT(0, S64_MAX), \ + NO_SB_OPT, BCH_SB_SECTOR) \ + BCH_OPT(read_only, u8, OPT_INTERNAL, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(nostart, u8, OPT_INTERNAL, \ + OPT_BOOL(), \ + NO_SB_OPT, false) struct bch_opts { -#define BCH_OPT(_name, _mode, _sb_opt, _bits, ...) \ - _bits _name; +#define BCH_OPT(_name, _bits, ...) unsigned _name##_defined:1; + BCH_OPTS() +#undef BCH_OPT + +#define BCH_OPT(_name, _bits, ...) _bits _name; + BCH_OPTS() +#undef BCH_OPT +}; + +static const struct bch_opts bch2_opts_default = { +#define BCH_OPT(_name, _bits, _mode, _type, _sb_opt, _default) \ + ._name##_defined = true, \ + ._name = _default, \ BCH_OPTS() #undef BCH_OPT }; -enum bch_opt_id { -#define BCH_OPT(_name, ...) \ - Opt_##_name, +#define opt_defined(_opts, _name) ((_opts)._name##_defined) - BCH_VISIBLE_OPTS() +#define opt_get(_opts, _name) \ + (opt_defined(_opts, _name) ? _opts._name : bch2_opts_default._name) + +#define opt_set(_opts, _name, _v) \ +do { \ + (_opts)._name##_defined = true; \ + (_opts)._name = _v; \ +} while (0) + +static inline struct bch_opts bch2_opts_empty(void) +{ + struct bch_opts opts; + + memset(&opts, 0, sizeof(opts)); + return opts; +} + +void bch2_opts_apply(struct bch_opts *, struct bch_opts); + +enum bch_opt_id { +#define BCH_OPT(_name, ...) Opt_##_name, + BCH_OPTS() #undef BCH_OPT + bch2_opts_nr }; struct bch_option { - const char *name; + struct attribute attr; void (*set_sb)(struct bch_sb *, u64); + enum opt_mode mode; enum opt_type type; union { @@ -140,32 +215,12 @@ struct bch_option { extern const struct bch_option bch2_opt_table[]; -static inline struct bch_opts bch2_opts_empty(void) -{ - struct bch_opts ret; +u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id); +void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64); - memset(&ret, 255, sizeof(ret)); - return ret; -} - -static inline void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src) -{ -#define BCH_OPT(_name, ...) \ - if (src._name >= 0) \ - dst->_name = src._name; - - BCH_OPTS() -#undef BCH_OPT -} - -#define opt_defined(_opt) ((_opt) >= 0) - -void bch2_opt_set(struct bch_opts *, enum bch_opt_id, u64); -struct bch_opts bch2_sb_opts(struct bch_sb *); +struct bch_opts bch2_opts_from_sb(struct bch_sb *); +int bch2_opt_parse(const struct bch_option *, const char *, u64 *); int bch2_parse_mount_opts(struct bch_opts *, char *); -enum bch_opt_id bch2_parse_sysfs_opt(const char *, const char *, u64 *); - -ssize_t bch2_opt_show(struct bch_opts *, const char *, char *, size_t); #endif /* _BCACHEFS_OPTS_H */ diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index d91fbdf1..530cf0a4 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -26,14 +26,14 @@ bch2_hash_info_init(struct bch_fs *c, { /* XXX ick */ struct bch_hash_info info = { - .type = (bi->i_flags >> INODE_STR_HASH_OFFSET) & + .type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) & ~(~0U << INODE_STR_HASH_BITS) }; switch (info.type) { case BCH_STR_HASH_CRC32C: case BCH_STR_HASH_CRC64: - info.crc_key = bi->i_hash_seed; + info.crc_key = bi->bi_hash_seed; break; case BCH_STR_HASH_SIPHASH: { SHASH_DESC_ON_STACK(desc, c->sha256); @@ -42,8 +42,8 @@ bch2_hash_info_init(struct bch_fs *c, desc->tfm = c->sha256; desc->flags = 0; - crypto_shash_digest(desc, (void *) &bi->i_hash_seed, - sizeof(bi->i_hash_seed), digest); + crypto_shash_digest(desc, (void *) &bi->bi_hash_seed, + sizeof(bi->bi_hash_seed), digest); memcpy(&info.siphash_key, digest, sizeof(info.siphash_key)); break; } diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 482ab572..1e4eafb2 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -32,7 +32,7 @@ struct bch_sb_field *bch2_sb_field_get(struct bch_sb *sb, return NULL; } -void bch2_free_super(struct bcache_superblock *sb) +void bch2_free_super(struct bch_sb_handle *sb) { if (sb->bio) bio_put(sb->bio); @@ -43,7 +43,7 @@ void bch2_free_super(struct bcache_superblock *sb) memset(sb, 0, sizeof(*sb)); } -static int __bch2_super_realloc(struct bcache_superblock *sb, unsigned order) +static int __bch2_super_realloc(struct bch_sb_handle *sb, unsigned order) { struct bch_sb *new_sb; struct bio *bio; @@ -77,7 +77,7 @@ static int __bch2_super_realloc(struct bcache_superblock *sb, unsigned order) return 0; } -static int bch2_sb_realloc(struct bcache_superblock *sb, unsigned u64s) +static int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) { u64 new_bytes = __vstruct_bytes(struct bch_sb, u64s); u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits; @@ -145,9 +145,9 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb *sb, return f; } -struct bch_sb_field *bch2_sb_field_resize(struct bcache_superblock *sb, - enum bch_sb_field_type type, - unsigned u64s) +struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb, + enum bch_sb_field_type type, + unsigned u64s) { struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type); ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0; @@ -179,7 +179,7 @@ struct bch_sb_field *bch2_fs_sb_field_resize(struct bch_fs *c, /* XXX: we're not checking that offline device have enough space */ for_each_online_member(ca, c, i) { - struct bcache_superblock *sb = &ca->disk_sb; + struct bch_sb_handle *sb = &ca->disk_sb; if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) { percpu_ref_put(&ca->ref); @@ -305,7 +305,7 @@ static const char *bch2_sb_validate_members(struct bch_sb *sb) return NULL; } -const char *bch2_sb_validate(struct bcache_superblock *disk_sb) +const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) { struct bch_sb *sb = disk_sb->sb; struct bch_sb_field *f; @@ -318,8 +318,10 @@ const char *bch2_sb_validate(struct bcache_superblock *disk_sb) le64_to_cpu(sb->version) > BCH_SB_VERSION_MAX) return"Unsupported superblock version"; - if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) + if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) { SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb, 7); + SET_BCH_SB_POSIX_ACL(sb, 1); + } block_size = le16_to_cpu(sb->block_size); @@ -462,11 +464,8 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.uuid = src->uuid; c->sb.user_uuid = src->user_uuid; - c->sb.block_size = le16_to_cpu(src->block_size); - c->sb.btree_node_size = BCH_SB_BTREE_NODE_SIZE(src); c->sb.nr_devices = src->nr_devices; c->sb.clean = BCH_SB_CLEAN(src); - c->sb.str_hash_type = BCH_SB_STR_HASH_TYPE(src); c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); c->sb.encoded_extent_max= 1 << BCH_SB_ENCODED_EXTENT_MAX_BITS(src); c->sb.time_base_lo = le64_to_cpu(src->time_base_lo); @@ -557,7 +556,7 @@ int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca) /* read superblock: */ -static const char *read_one_super(struct bcache_superblock *sb, u64 offset) +static const char *read_one_super(struct bch_sb_handle *sb, u64 offset) { struct bch_csum csum; size_t bytes; @@ -605,37 +604,37 @@ reread: return NULL; } -const char *bch2_read_super(struct bcache_superblock *sb, - struct bch_opts opts, - const char *path) +const char *bch2_read_super(const char *path, + struct bch_opts opts, + struct bch_sb_handle *ret) { - u64 offset = opt_defined(opts.sb) ? opts.sb : BCH_SB_SECTOR; + u64 offset = opt_get(opts, sb); struct bch_sb_layout layout; const char *err; unsigned i; - memset(sb, 0, sizeof(*sb)); - sb->mode = FMODE_READ; + memset(ret, 0, sizeof(*ret)); + ret->mode = FMODE_READ; - if (!(opt_defined(opts.noexcl) && opts.noexcl)) - sb->mode |= FMODE_EXCL; + if (!opt_get(opts, noexcl)) + ret->mode |= FMODE_EXCL; - if (!(opt_defined(opts.nochanges) && opts.nochanges)) - sb->mode |= FMODE_WRITE; + if (!opt_get(opts, nochanges)) + ret->mode |= FMODE_WRITE; - err = bch2_blkdev_open(path, sb->mode, sb, &sb->bdev); + err = bch2_blkdev_open(path, ret->mode, ret, &ret->bdev); if (err) return err; err = "cannot allocate memory"; - if (__bch2_super_realloc(sb, 0)) + if (__bch2_super_realloc(ret, 0)) goto err; err = "dynamic fault"; if (bch2_fs_init_fault("read_super")) goto err; - err = read_one_super(sb, offset); + err = read_one_super(ret, offset); if (!err) goto got_super; @@ -650,22 +649,22 @@ const char *bch2_read_super(struct bcache_superblock *sb, * Error reading primary superblock - read location of backup * superblocks: */ - bio_reset(sb->bio); - sb->bio->bi_bdev = sb->bdev; - sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR; - sb->bio->bi_iter.bi_size = sizeof(struct bch_sb_layout); - bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META); + bio_reset(ret->bio); + ret->bio->bi_bdev = ret->bdev; + ret->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR; + ret->bio->bi_iter.bi_size = sizeof(struct bch_sb_layout); + bio_set_op_attrs(ret->bio, REQ_OP_READ, REQ_SYNC|REQ_META); /* * use sb buffer to read layout, since sb buffer is page aligned but * layout won't be: */ - bch2_bio_map(sb->bio, sb->sb); + bch2_bio_map(ret->bio, ret->sb); err = "IO error"; - if (submit_bio_wait(sb->bio)) + if (submit_bio_wait(ret->bio)) goto err; - memcpy(&layout, sb->sb, sizeof(layout)); + memcpy(&layout, ret->sb, sizeof(layout)); err = validate_sb_layout(&layout); if (err) goto err; @@ -676,26 +675,26 @@ const char *bch2_read_super(struct bcache_superblock *sb, if (offset == BCH_SB_SECTOR) continue; - err = read_one_super(sb, offset); + err = read_one_super(ret, offset); if (!err) goto got_super; } goto err; got_super: pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u", - le64_to_cpu(sb->sb->version), - le64_to_cpu(sb->sb->flags), - le64_to_cpu(sb->sb->seq), - le16_to_cpu(sb->sb->u64s)); + le64_to_cpu(ret->sb->version), + le64_to_cpu(ret->sb->flags), + le64_to_cpu(ret->sb->seq), + le16_to_cpu(ret->sb->u64s)); err = "Superblock block size smaller than device block size"; - if (le16_to_cpu(sb->sb->block_size) << 9 < - bdev_logical_block_size(sb->bdev)) + if (le16_to_cpu(ret->sb->block_size) << 9 < + bdev_logical_block_size(ret->bdev)) goto err; return NULL; err: - bch2_free_super(sb); + bch2_free_super(ret); return err; } diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index ed27dd0f..8cafb301 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -9,8 +9,8 @@ #include struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type); -struct bch_sb_field *bch2_sb_field_resize(struct bcache_superblock *, - enum bch_sb_field_type, unsigned); +struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *, + enum bch_sb_field_type, unsigned); struct bch_sb_field *bch2_fs_sb_field_resize(struct bch_fs *, enum bch_sb_field_type, unsigned); @@ -26,7 +26,7 @@ bch2_sb_get_##_name(struct bch_sb *sb) \ } \ \ static inline struct bch_sb_field_##_name * \ -bch2_sb_resize_##_name(struct bcache_superblock *sb, unsigned u64s) \ +bch2_sb_resize_##_name(struct bch_sb_handle *sb, unsigned u64s) \ { \ return field_to_type(bch2_sb_field_resize(sb, \ BCH_SB_FIELD_##_name, u64s), _name); \ @@ -112,15 +112,15 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) int bch2_sb_to_fs(struct bch_fs *, struct bch_sb *); int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *); -void bch2_free_super(struct bcache_superblock *); -int bch2_super_realloc(struct bcache_superblock *, unsigned); +void bch2_free_super(struct bch_sb_handle *); +int bch2_super_realloc(struct bch_sb_handle *, unsigned); const char *bch2_sb_validate_journal(struct bch_sb *, struct bch_member_cpu); -const char *bch2_sb_validate(struct bcache_superblock *); +const char *bch2_sb_validate(struct bch_sb_handle *); -const char *bch2_read_super(struct bcache_superblock *, - struct bch_opts, const char *); +const char *bch2_read_super(const char *, struct bch_opts, + struct bch_sb_handle *); void bch2_write_super(struct bch_fs *); /* replicas: */ diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 951053f7..4af9075c 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -241,13 +241,12 @@ static void bch2_writes_disabled(struct percpu_ref *writes) void bch2_fs_read_only(struct bch_fs *c) { - mutex_lock(&c->state_lock); if (c->state != BCH_FS_STARTING && c->state != BCH_FS_RW) - goto out; + return; if (test_bit(BCH_FS_ERROR, &c->flags)) - goto out; + return; /* * Block new foreground-end write operations from starting - any new @@ -296,8 +295,6 @@ void bch2_fs_read_only(struct bch_fs *c) } c->state = BCH_FS_RO; -out: - mutex_unlock(&c->state_lock); } static void bch2_fs_read_only_work(struct work_struct *work) @@ -305,7 +302,9 @@ static void bch2_fs_read_only_work(struct work_struct *work) struct bch_fs *c = container_of(work, struct bch_fs, read_only_work); + mutex_lock(&c->state_lock); bch2_fs_read_only(c); + mutex_unlock(&c->state_lock); } static void bch2_fs_read_only_async(struct bch_fs *c) @@ -330,10 +329,9 @@ const char *bch2_fs_read_write(struct bch_fs *c) const char *err = NULL; unsigned i; - mutex_lock(&c->state_lock); if (c->state != BCH_FS_STARTING && c->state != BCH_FS_RO) - goto out; + return NULL; for_each_rw_member(ca, c, i) bch2_dev_allocator_add(c, ca); @@ -367,13 +365,10 @@ const char *bch2_fs_read_write(struct bch_fs *c) percpu_ref_reinit(&c->writes); c->state = BCH_FS_RW; - err = NULL; -out: - mutex_unlock(&c->state_lock); - return err; + return NULL; err: __bch2_fs_read_only(c); - goto out; + return err; } /* Filesystem startup/shutdown: */ @@ -452,7 +447,9 @@ static void bch2_fs_offline(struct bch_fs *c) kobject_put(&c->opts_dir); kobject_put(&c->internal); + mutex_lock(&c->state_lock); __bch2_fs_read_only(c); + mutex_unlock(&c->state_lock); } static void bch2_fs_release(struct kobject *kobj) @@ -555,15 +552,16 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) goto err; } - c->block_bits = ilog2(c->sb.block_size); - mutex_unlock(&c->sb_lock); scnprintf(c->name, sizeof(c->name), "%pU", &c->sb.user_uuid); - bch2_opts_apply(&c->opts, bch2_sb_opts(sb)); + c->opts = bch2_opts_default; + bch2_opts_apply(&c->opts, bch2_opts_from_sb(sb)); bch2_opts_apply(&c->opts, opts); + c->block_bits = ilog2(c->opts.block_size); + c->opts.nochanges |= c->opts.noreplay; c->opts.read_only |= c->opts.nochanges; @@ -590,7 +588,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio)) || mempool_init_page_pool(&c->bio_bounce_pages, max_t(unsigned, - c->sb.btree_node_size, + c->opts.btree_node_size, c->sb.encoded_extent_max) / PAGE_SECTORS, 0) || !(c->usage_percpu = alloc_percpu(struct bch_fs_usage)) || @@ -657,7 +655,8 @@ static const char *__bch2_fs_online(struct bch_fs *c) if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) || kobject_add(&c->internal, &c->kobj, "internal") || kobject_add(&c->opts_dir, &c->kobj, "options") || - kobject_add(&c->time_stats, &c->kobj, "time_stats")) + kobject_add(&c->time_stats, &c->kobj, "time_stats") || + bch2_opts_create_sysfs_files(&c->opts_dir)) return "error creating sysfs objects"; mutex_lock(&c->state_lock); @@ -699,6 +698,8 @@ static const char *__bch2_fs_start(struct bch_fs *c) closure_init_stack(&cl); + mutex_lock(&c->state_lock); + BUG_ON(c->state != BCH_FS_STARTING); mutex_lock(&c->sb_lock); @@ -742,6 +743,8 @@ static const char *__bch2_fs_start(struct bch_fs *c) if (ret) goto err; + set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); + bch_verbose(c, "starting mark and sweep:"); err = "error in recovery"; ret = bch2_initial_gc(c, &journal); @@ -796,6 +799,8 @@ static const char *__bch2_fs_start(struct bch_fs *c) bch_notice(c, "initializing new filesystem"); + set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); + ret = bch2_initial_gc(c, &journal); if (ret) goto err; @@ -831,7 +836,7 @@ static const char *__bch2_fs_start(struct bch_fs *c) bch2_inode_init(c, &inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0); - inode.inum = BCACHEFS_ROOT_INO; + inode.bi_inum = BCACHEFS_ROOT_INO; bch2_inode_pack(&packed_inode, &inode); @@ -873,6 +878,7 @@ recovery_done: err = NULL; out: + mutex_unlock(&c->state_lock); bch2_journal_entries_free(&journal); return err; err: @@ -922,7 +928,7 @@ static const char *bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) if (!sb_mi) return "Invalid superblock: member info area missing"; - if (le16_to_cpu(sb->block_size) != c->sb.block_size) + if (le16_to_cpu(sb->block_size) != c->opts.block_size) return "mismatched block size"; if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) < @@ -1129,7 +1135,7 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) btree_node_reserve_buckets = DIV_ROUND_UP(BTREE_NODE_RESERVE, - ca->mi.bucket_size / c->sb.btree_node_size); + ca->mi.bucket_size / c->opts.btree_node_size); if (percpu_ref_init(&ca->ref, bch2_dev_ref_release, 0, GFP_KERNEL) || @@ -1176,7 +1182,7 @@ err: return -ENOMEM; } -static int __bch2_dev_online(struct bch_fs *c, struct bcache_superblock *sb) +static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb) { struct bch_dev *ca; int ret; @@ -1462,7 +1468,7 @@ err: /* Add new device to running filesystem: */ int bch2_dev_add(struct bch_fs *c, const char *path) { - struct bcache_superblock sb; + struct bch_sb_handle sb; const char *err; struct bch_dev *ca = NULL; struct bch_sb_field_members *mi, *dev_mi; @@ -1470,7 +1476,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) unsigned dev_idx, nr_devices, u64s; int ret = -EINVAL; - err = bch2_read_super(&sb, bch2_opts_empty(), path); + err = bch2_read_super(path, bch2_opts_empty(), &sb); if (err) return -EINVAL; @@ -1572,14 +1578,14 @@ err: /* Hot add existing device to running filesystem: */ int bch2_dev_online(struct bch_fs *c, const char *path) { - struct bcache_superblock sb = { 0 }; + struct bch_sb_handle sb = { 0 }; struct bch_dev *ca; unsigned dev_idx; const char *err; mutex_lock(&c->state_lock); - err = bch2_read_super(&sb, bch2_opts_empty(), path); + err = bch2_read_super(path, bch2_opts_empty(), &sb); if (err) goto err; @@ -1673,7 +1679,7 @@ const char *bch2_fs_open(char * const *devices, unsigned nr_devices, { const char *err; struct bch_fs *c = NULL; - struct bcache_superblock *sb; + struct bch_sb_handle *sb; unsigned i, best_sb = 0; if (!nr_devices) @@ -1688,7 +1694,7 @@ const char *bch2_fs_open(char * const *devices, unsigned nr_devices, goto err; for (i = 0; i < nr_devices; i++) { - err = bch2_read_super(&sb[i], opts, devices[i]); + err = bch2_read_super(devices[i], opts, &sb[i]); if (err) goto err; @@ -1757,7 +1763,7 @@ err: goto out; } -static const char *__bch2_fs_open_incremental(struct bcache_superblock *sb, +static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb, struct bch_opts opts) { const char *err; @@ -1821,11 +1827,11 @@ err: const char *bch2_fs_open_incremental(const char *path) { - struct bcache_superblock sb; + struct bch_sb_handle sb; struct bch_opts opts = bch2_opts_empty(); const char *err; - err = bch2_read_super(&sb, opts, path); + err = bch2_read_super(path, opts, &sb); if (err) return err; diff --git a/libbcachefs/super_types.h b/libbcachefs/super_types.h index 579929ac..756dfeba 100644 --- a/libbcachefs/super_types.h +++ b/libbcachefs/super_types.h @@ -1,7 +1,7 @@ #ifndef _BCACHEFS_SUPER_TYPES_H #define _BCACHEFS_SUPER_TYPES_H -struct bcache_superblock { +struct bch_sb_handle { struct bch_sb *sb; struct block_device *bdev; struct bio *bio; diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index ff3deba8..07d9be75 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -194,14 +194,6 @@ read_attribute(data_replicas_have); BCH_DEBUG_PARAMS() #undef BCH_DEBUG_PARAM -#define BCH_OPT(_name, _mode, ...) \ - static struct attribute sysfs_opt_##_name = { \ - .name = #_name, .mode = _mode, \ - }; - - BCH_VISIBLE_OPTS() -#undef BCH_OPT - #define BCH_TIME_STAT(name, frequency_units, duration_units) \ sysfs_time_stats_attribute(name, frequency_units, duration_units); BCH_TIME_STATS() @@ -528,8 +520,13 @@ SHOW(bch2_fs_opts_dir) { char *out = buf, *end = buf + PAGE_SIZE; struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); + const struct bch_option *opt = container_of(attr, struct bch_option, attr); + int id = opt - bch2_opt_table; + u64 v = bch2_opt_get_by_id(&c->opts, id); - out += bch2_opt_show(&c->opts, attr->name, out, end - out); + out += opt->type == BCH_OPT_STR + ? bch2_scnprint_string_list(out, end - out, opt->choices, v) + : scnprintf(out, end - out, "%lli", v); out += scnprintf(out, end - out, "\n"); return out - buf; @@ -538,15 +535,13 @@ SHOW(bch2_fs_opts_dir) STORE(bch2_fs_opts_dir) { struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); - const struct bch_option *opt; - int id; + const struct bch_option *opt = container_of(attr, struct bch_option, attr); + int ret, id = opt - bch2_opt_table; u64 v; - id = bch2_parse_sysfs_opt(attr->name, buf, &v); - if (id < 0) - return id; - - opt = &bch2_opt_table[id]; + ret = bch2_opt_parse(opt, buf, &v); + if (ret < 0) + return ret; mutex_lock(&c->sb_lock); @@ -563,7 +558,7 @@ STORE(bch2_fs_opts_dir) bch2_write_super(c); } - bch2_opt_set(&c->opts, id, v); + bch2_opt_set_by_id(&c->opts, id, v); mutex_unlock(&c->sb_lock); @@ -571,15 +566,26 @@ STORE(bch2_fs_opts_dir) } SYSFS_OPS(bch2_fs_opts_dir); -struct attribute *bch2_fs_opts_dir_files[] = { -#define BCH_OPT(_name, ...) \ - &sysfs_opt_##_name, +struct attribute *bch2_fs_opts_dir_files[] = { NULL }; - BCH_VISIBLE_OPTS() -#undef BCH_OPT +int bch2_opts_create_sysfs_files(struct kobject *kobj) +{ + const struct bch_option *i; + int ret; - NULL -}; + for (i = bch2_opt_table; + i < bch2_opt_table + bch2_opts_nr; + i++) { + if (i->mode == OPT_INTERNAL) + continue; + + ret = sysfs_create_file(kobj, &i->attr); + if (ret) + return ret; + } + + return 0; +} /* time stats */ diff --git a/libbcachefs/sysfs.h b/libbcachefs/sysfs.h index a4825056..1ba759fd 100644 --- a/libbcachefs/sysfs.h +++ b/libbcachefs/sysfs.h @@ -20,6 +20,8 @@ extern struct sysfs_ops bch2_fs_opts_dir_sysfs_ops; extern struct sysfs_ops bch2_fs_time_stats_sysfs_ops; extern struct sysfs_ops bch2_dev_sysfs_ops; +int bch2_opts_create_sysfs_files(struct kobject *); + #else static struct attribute *bch2_fs_files[] = {}; @@ -34,6 +36,8 @@ static const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops; static const struct sysfs_ops bch2_fs_time_stats_sysfs_ops; static const struct sysfs_ops bch2_dev_sysfs_ops; +static inline int bch2_opts_create_sysfs_files(struct kobject *kobj) { return 0; } + #endif /* NO_BCACHEFS_SYSFS */ #endif /* _BCACHEFS_SYSFS_H_ */ diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index b2075c2e..3a49d728 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -162,18 +162,17 @@ const struct bkey_ops bch2_bkey_xattr_ops = { .val_to_text = bch2_xattr_to_text, }; -int bch2_xattr_get(struct bch_fs *c, struct inode *inode, +int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, const char *name, void *buffer, size_t size, int type) { - struct bch_inode_info *ei = to_bch_ei(inode); struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_xattr xattr; int ret; - k = bch2_hash_lookup(bch2_xattr_hash_desc, &ei->str_hash, c, - ei->vfs_inode.i_ino, &iter, - &X_SEARCH(type, name, strlen(name))); + k = bch2_hash_lookup(bch2_xattr_hash_desc, &inode->ei_str_hash, c, + inode->v.i_ino, &iter, + &X_SEARCH(type, name, strlen(name))); if (IS_ERR(k.k)) return bch2_btree_iter_unlock(&iter) ?: -ENODATA; @@ -236,15 +235,13 @@ int __bch2_xattr_set(struct bch_fs *c, u64 inum, return ret; } -int bch2_xattr_set(struct bch_fs *c, struct inode *inode, - const char *name, const void *value, size_t size, - int flags, int type) +int bch2_xattr_set(struct bch_fs *c, struct bch_inode_info *inode, + const char *name, const void *value, size_t size, + int flags, int type) { - struct bch_inode_info *ei = to_bch_ei(inode); - - return __bch2_xattr_set(c, inode->i_ino, &ei->str_hash, - name, value, size, flags, type, - &ei->journal_seq); + return __bch2_xattr_set(c, inode->v.i_ino, &inode->ei_str_hash, + name, value, size, flags, type, + &inode->ei_journal_seq); } static size_t bch2_xattr_emit(struct dentry *dentry, @@ -313,23 +310,25 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) } static int bch2_xattr_get_handler(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) + struct dentry *dentry, struct inode *vinode, + const char *name, void *buffer, size_t size) { - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(vinode); + struct bch_fs *c = inode->v.i_sb->s_fs_info; return bch2_xattr_get(c, inode, name, buffer, size, handler->flags); } static int bch2_xattr_set_handler(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) + struct dentry *dentry, struct inode *vinode, + const char *name, const void *value, + size_t size, int flags) { - struct bch_fs *c = inode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(vinode); + struct bch_fs *c = inode->v.i_sb->s_fs_info; return bch2_xattr_set(c, inode, name, value, size, flags, - handler->flags); + handler->flags); } static const struct xattr_handler bch_xattr_user_handler = { diff --git a/libbcachefs/xattr.h b/libbcachefs/xattr.h index 16310d89..9c815a2d 100644 --- a/libbcachefs/xattr.h +++ b/libbcachefs/xattr.h @@ -9,12 +9,13 @@ extern const struct bkey_ops bch2_bkey_xattr_ops; struct dentry; struct xattr_handler; struct bch_hash_info; +struct bch_inode_info; -int bch2_xattr_get(struct bch_fs *, struct inode *, +int bch2_xattr_get(struct bch_fs *, struct bch_inode_info *, const char *, void *, size_t, int); int __bch2_xattr_set(struct bch_fs *, u64, const struct bch_hash_info *, const char *, const void *, size_t, int, int, u64 *); -int bch2_xattr_set(struct bch_fs *, struct inode *, +int bch2_xattr_set(struct bch_fs *, struct bch_inode_info *, const char *, const void *, size_t, int, int); ssize_t bch2_xattr_list(struct dentry *, char *, size_t);