mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to 386f00b639 bcachefs: Snapshot creation, deletion
This commit is contained in:
parent
9942fc82d4
commit
e61b61c03b
@ -1 +1 @@
|
||||
bd6ed9fb42c0aa36d1f4a21eeab45fe12e1fb792
|
||||
386f00b6399a1eb38053c236aae87678f3535df7
|
||||
|
@ -191,6 +191,7 @@ static void list_keys(struct bch_fs *c, enum btree_id btree_id,
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, btree_id, start,
|
||||
BTREE_ITER_ALL_SNAPSHOTS|
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
if (bkey_cmp(k.k->p, end) > 0)
|
||||
break;
|
||||
|
@ -138,8 +138,9 @@ static void create_link(struct bch_fs *c,
|
||||
struct bch_inode_unpacked inode;
|
||||
|
||||
int ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
bch2_link_trans(&trans, parent->bi_inum, inum,
|
||||
&parent_u, &inode, &qstr));
|
||||
bch2_link_trans(&trans,
|
||||
(subvol_inum) { 1, parent->bi_inum }, &parent_u,
|
||||
(subvol_inum) { 1, inum }, &inode, &qstr));
|
||||
if (ret)
|
||||
die("error creating hardlink: %s", strerror(-ret));
|
||||
}
|
||||
@ -155,9 +156,10 @@ static struct bch_inode_unpacked create_file(struct bch_fs *c,
|
||||
|
||||
int ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
bch2_create_trans(&trans,
|
||||
parent->bi_inum, parent,
|
||||
(subvol_inum) { 1, parent->bi_inum }, parent,
|
||||
&new_inode, &qstr,
|
||||
uid, gid, mode, rdev, NULL, NULL));
|
||||
uid, gid, mode, rdev, NULL, NULL,
|
||||
(subvol_inum) {}, 0));
|
||||
if (ret)
|
||||
die("error creating file: %s", strerror(-ret));
|
||||
|
||||
@ -225,7 +227,9 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
const struct xattr_handler *h = xattr_resolve_name(&attr);
|
||||
|
||||
int ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
bch2_xattr_set(&trans, dst->bi_inum, &hash_info, attr,
|
||||
bch2_xattr_set(&trans,
|
||||
(subvol_inum) { 1, dst->bi_inum },
|
||||
&hash_info, attr,
|
||||
val, val_size, h->flags, 0));
|
||||
if (ret < 0)
|
||||
die("error creating xattr: %s", strerror(-ret));
|
||||
@ -569,7 +573,8 @@ static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
syncfs(src_fd);
|
||||
|
||||
struct bch_inode_unpacked root_inode;
|
||||
int ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, &root_inode);
|
||||
int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
|
||||
&root_inode);
|
||||
if (ret)
|
||||
die("error looking up root directory: %s", strerror(-ret));
|
||||
|
||||
|
@ -229,7 +229,7 @@ retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_hash_lookup(&trans, &iter, bch2_xattr_hash_desc,
|
||||
&hash, inode->v.i_ino,
|
||||
&hash, inode_inum(inode),
|
||||
&X_SEARCH(acl_to_xattr_type(type), "", 0),
|
||||
0);
|
||||
if (ret) {
|
||||
@ -259,11 +259,11 @@ out:
|
||||
return acl;
|
||||
}
|
||||
|
||||
int bch2_set_acl_trans(struct btree_trans *trans,
|
||||
int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum,
|
||||
struct bch_inode_unpacked *inode_u,
|
||||
const struct bch_hash_info *hash_info,
|
||||
struct posix_acl *acl, int type)
|
||||
{
|
||||
struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode_u);
|
||||
int ret;
|
||||
|
||||
if (type == ACL_TYPE_DEFAULT &&
|
||||
@ -276,14 +276,14 @@ int bch2_set_acl_trans(struct btree_trans *trans,
|
||||
if (IS_ERR(xattr))
|
||||
return PTR_ERR(xattr);
|
||||
|
||||
ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
|
||||
inode_u->bi_inum, &xattr->k_i, 0);
|
||||
ret = bch2_hash_set(trans, bch2_xattr_hash_desc, &hash_info,
|
||||
inum, &xattr->k_i, 0);
|
||||
} else {
|
||||
struct xattr_search_key search =
|
||||
X_SEARCH(acl_to_xattr_type(type), "", 0);
|
||||
|
||||
ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, hash_info,
|
||||
inode_u->bi_inum, &search);
|
||||
ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, &hash_info,
|
||||
inum, &search);
|
||||
}
|
||||
|
||||
return ret == -ENOENT ? 0 : ret;
|
||||
@ -297,7 +297,6 @@ int bch2_set_acl(struct user_namespace *mnt_userns,
|
||||
struct btree_trans trans;
|
||||
struct btree_iter inode_iter = { NULL };
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct bch_hash_info hash_info;
|
||||
struct posix_acl *acl;
|
||||
umode_t mode;
|
||||
int ret;
|
||||
@ -308,7 +307,7 @@ retry:
|
||||
bch2_trans_begin(&trans);
|
||||
acl = _acl;
|
||||
|
||||
ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode->v.i_ino,
|
||||
ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode),
|
||||
BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
goto btree_err;
|
||||
@ -321,9 +320,7 @@ retry:
|
||||
goto btree_err;
|
||||
}
|
||||
|
||||
hash_info = bch2_hash_info_init(c, &inode_u);
|
||||
|
||||
ret = bch2_set_acl_trans(&trans, &inode_u, &hash_info, acl, type);
|
||||
ret = bch2_set_acl_trans(&trans, inode_inum(inode), &inode_u, acl, type);
|
||||
if (ret)
|
||||
goto btree_err;
|
||||
|
||||
@ -352,7 +349,7 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_acl_chmod(struct btree_trans *trans,
|
||||
int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
|
||||
struct bch_inode_unpacked *inode,
|
||||
umode_t mode,
|
||||
struct posix_acl **new_acl)
|
||||
@ -366,7 +363,7 @@ int bch2_acl_chmod(struct btree_trans *trans,
|
||||
int ret;
|
||||
|
||||
ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
|
||||
&hash_info, inode->bi_inum,
|
||||
&hash_info, inum,
|
||||
&X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0),
|
||||
BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
|
@ -28,25 +28,24 @@ typedef struct {
|
||||
|
||||
struct posix_acl *bch2_get_acl(struct inode *, int);
|
||||
|
||||
int bch2_set_acl_trans(struct btree_trans *,
|
||||
int bch2_set_acl_trans(struct btree_trans *, subvol_inum,
|
||||
struct bch_inode_unpacked *,
|
||||
const struct bch_hash_info *,
|
||||
struct posix_acl *, int);
|
||||
int bch2_set_acl(struct user_namespace *, struct inode *, struct posix_acl *, int);
|
||||
int bch2_acl_chmod(struct btree_trans *, struct bch_inode_unpacked *,
|
||||
int bch2_acl_chmod(struct btree_trans *, subvol_inum,
|
||||
struct bch_inode_unpacked *,
|
||||
umode_t, struct posix_acl **);
|
||||
|
||||
#else
|
||||
|
||||
static inline int bch2_set_acl_trans(struct btree_trans *trans,
|
||||
static inline int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum,
|
||||
struct bch_inode_unpacked *inode_u,
|
||||
const struct bch_hash_info *hash_info,
|
||||
struct posix_acl *acl, int type)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int bch2_acl_chmod(struct btree_trans *trans,
|
||||
static inline int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
|
||||
struct bch_inode_unpacked *inode,
|
||||
umode_t mode,
|
||||
struct posix_acl **new_acl)
|
||||
|
@ -380,6 +380,8 @@ enum gc_phase {
|
||||
GC_PHASE_BTREE_alloc,
|
||||
GC_PHASE_BTREE_quotas,
|
||||
GC_PHASE_BTREE_reflink,
|
||||
GC_PHASE_BTREE_subvolumes,
|
||||
GC_PHASE_BTREE_snapshots,
|
||||
|
||||
GC_PHASE_PENDING_DELETE,
|
||||
};
|
||||
@ -563,6 +565,21 @@ struct btree_path_buf {
|
||||
|
||||
#define REPLICAS_DELTA_LIST_MAX (1U << 16)
|
||||
|
||||
struct snapshot_t {
|
||||
u32 parent;
|
||||
u32 children[2];
|
||||
u32 subvol; /* Nonzero only if a subvolume points to this node: */
|
||||
u32 equiv;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
u32 subvol;
|
||||
u64 inum;
|
||||
} subvol_inum;
|
||||
|
||||
#define BCACHEFS_ROOT_SUBVOL_INUM \
|
||||
((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
|
||||
|
||||
struct bch_fs {
|
||||
struct closure cl;
|
||||
|
||||
@ -634,6 +651,12 @@ struct bch_fs {
|
||||
struct closure sb_write;
|
||||
struct mutex sb_lock;
|
||||
|
||||
/* snapshot.c: */
|
||||
GENRADIX(struct snapshot_t) snapshots;
|
||||
struct bch_snapshot_table __rcu *snapshot_table;
|
||||
struct mutex snapshot_table_lock;
|
||||
struct work_struct snapshot_delete_work;
|
||||
|
||||
/* BTREE CACHE */
|
||||
struct bio_set btree_bio;
|
||||
struct workqueue_struct *io_complete_wq;
|
||||
|
@ -323,7 +323,7 @@ static inline void bkey_init(struct bkey *k)
|
||||
*/
|
||||
#define BCH_BKEY_TYPES() \
|
||||
x(deleted, 0) \
|
||||
x(discard, 1) \
|
||||
x(whiteout, 1) \
|
||||
x(error, 2) \
|
||||
x(cookie, 3) \
|
||||
x(hash_whiteout, 4) \
|
||||
@ -342,7 +342,9 @@ static inline void bkey_init(struct bkey *k)
|
||||
x(inline_data, 17) \
|
||||
x(btree_ptr_v2, 18) \
|
||||
x(indirect_inline_data, 19) \
|
||||
x(alloc_v2, 20)
|
||||
x(alloc_v2, 20) \
|
||||
x(subvolume, 21) \
|
||||
x(snapshot, 22)
|
||||
|
||||
enum bch_bkey_type {
|
||||
#define x(name, nr) KEY_TYPE_##name = nr,
|
||||
@ -355,7 +357,7 @@ struct bch_deleted {
|
||||
struct bch_val v;
|
||||
};
|
||||
|
||||
struct bch_discard {
|
||||
struct bch_whiteout {
|
||||
struct bch_val v;
|
||||
};
|
||||
|
||||
@ -686,6 +688,10 @@ struct bch_inode_generation {
|
||||
__le32 pad;
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
/*
|
||||
* bi_subvol and bi_parent_subvol are only set for subvolume roots:
|
||||
*/
|
||||
|
||||
#define BCH_INODE_FIELDS() \
|
||||
x(bi_atime, 96) \
|
||||
x(bi_ctime, 96) \
|
||||
@ -709,7 +715,9 @@ struct bch_inode_generation {
|
||||
x(bi_erasure_code, 16) \
|
||||
x(bi_fields_set, 16) \
|
||||
x(bi_dir, 64) \
|
||||
x(bi_dir_offset, 64)
|
||||
x(bi_dir_offset, 64) \
|
||||
x(bi_subvol, 32) \
|
||||
x(bi_parent_subvol, 32)
|
||||
|
||||
/* subset of BCH_INODE_FIELDS */
|
||||
#define BCH_INODE_OPTS() \
|
||||
@ -792,6 +800,9 @@ struct bch_dirent {
|
||||
__u8 d_name[];
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
#define DT_SUBVOL 16
|
||||
#define BCH_DT_MAX 17
|
||||
|
||||
#define BCH_NAME_MAX (U8_MAX * sizeof(u64) - \
|
||||
sizeof(struct bkey) - \
|
||||
offsetof(struct bch_dirent, d_name))
|
||||
@ -928,6 +939,42 @@ struct bch_inline_data {
|
||||
u8 data[0];
|
||||
};
|
||||
|
||||
/* Subvolumes: */
|
||||
|
||||
#define SUBVOL_POS_MIN POS(0, 1)
|
||||
#define SUBVOL_POS_MAX POS(0, S32_MAX)
|
||||
#define BCACHEFS_ROOT_SUBVOL 1
|
||||
|
||||
struct bch_subvolume {
|
||||
struct bch_val v;
|
||||
__le32 flags;
|
||||
__le32 snapshot;
|
||||
__le64 inode;
|
||||
};
|
||||
|
||||
LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1)
|
||||
/*
|
||||
* We need to know whether a subvolume is a snapshot so we can know whether we
|
||||
* can delete it (or whether it should just be rm -rf'd)
|
||||
*/
|
||||
LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2)
|
||||
|
||||
/* Snapshots */
|
||||
|
||||
struct bch_snapshot {
|
||||
struct bch_val v;
|
||||
__le32 flags;
|
||||
__le32 parent;
|
||||
__le32 children[2];
|
||||
__le32 subvol;
|
||||
__le32 pad;
|
||||
};
|
||||
|
||||
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
|
||||
|
||||
/* True if a subvolume points to this snapshot node: */
|
||||
LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2)
|
||||
|
||||
/* Optional/variable size superblock sections: */
|
||||
|
||||
struct bch_sb_field {
|
||||
@ -1695,7 +1742,9 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6);
|
||||
x(alloc, 4) \
|
||||
x(quotas, 5) \
|
||||
x(stripes, 6) \
|
||||
x(reflink, 7)
|
||||
x(reflink, 7) \
|
||||
x(subvolumes, 8) \
|
||||
x(snapshots, 9)
|
||||
|
||||
enum btree_id {
|
||||
#define x(kwd, val) BTREE_ID_##kwd = val,
|
||||
|
@ -78,6 +78,9 @@ struct bch_ioctl_incremental {
|
||||
#define BCH_IOCTL_DISK_RESIZE _IOW(0xbc, 14, struct bch_ioctl_disk_resize)
|
||||
#define BCH_IOCTL_DISK_RESIZE_JOURNAL _IOW(0xbc,15, struct bch_ioctl_disk_resize_journal)
|
||||
|
||||
#define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc, 16, struct bch_ioctl_subvolume)
|
||||
#define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17, struct bch_ioctl_subvolume)
|
||||
|
||||
/* ioctl below act on a particular file, not the filesystem as a whole: */
|
||||
|
||||
#define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *)
|
||||
@ -349,4 +352,16 @@ struct bch_ioctl_disk_resize_journal {
|
||||
__u64 nbuckets;
|
||||
};
|
||||
|
||||
struct bch_ioctl_subvolume {
|
||||
__u32 flags;
|
||||
__u32 dirfd;
|
||||
__u16 mode;
|
||||
__u16 pad[3];
|
||||
__u64 dst_ptr;
|
||||
__u64 src_ptr;
|
||||
};
|
||||
|
||||
#define BCH_SUBVOL_SNAPSHOT_CREATE (1U << 0)
|
||||
#define BCH_SUBVOL_SNAPSHOT_RO (1U << 1)
|
||||
|
||||
#endif /* _BCACHEFS_IOCTL_H */
|
||||
|
@ -55,7 +55,7 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
|
||||
#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted)
|
||||
|
||||
#define bkey_whiteout(_k) \
|
||||
((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard)
|
||||
((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_whiteout)
|
||||
|
||||
enum bkey_lr_packed {
|
||||
BKEY_PACKED_BOTH,
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "inode.h"
|
||||
#include "quota.h"
|
||||
#include "reflink.h"
|
||||
#include "subvolume.h"
|
||||
#include "xattr.h"
|
||||
|
||||
const char * const bch2_bkey_types[] = {
|
||||
@ -30,7 +31,7 @@ static const char *deleted_key_invalid(const struct bch_fs *c,
|
||||
.key_invalid = deleted_key_invalid, \
|
||||
}
|
||||
|
||||
#define bch2_bkey_ops_discard (struct bkey_ops) { \
|
||||
#define bch2_bkey_ops_whiteout (struct bkey_ops) { \
|
||||
.key_invalid = deleted_key_invalid, \
|
||||
}
|
||||
|
||||
@ -100,6 +101,8 @@ const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k)
|
||||
|
||||
static unsigned bch2_key_types_allowed[] = {
|
||||
[BKEY_TYPE_extents] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_whiteout)|
|
||||
(1U << KEY_TYPE_error)|
|
||||
(1U << KEY_TYPE_cookie)|
|
||||
(1U << KEY_TYPE_extent)|
|
||||
@ -107,26 +110,43 @@ static unsigned bch2_key_types_allowed[] = {
|
||||
(1U << KEY_TYPE_reflink_p)|
|
||||
(1U << KEY_TYPE_inline_data),
|
||||
[BKEY_TYPE_inodes] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_whiteout)|
|
||||
(1U << KEY_TYPE_inode)|
|
||||
(1U << KEY_TYPE_inode_generation),
|
||||
[BKEY_TYPE_dirents] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_whiteout)|
|
||||
(1U << KEY_TYPE_hash_whiteout)|
|
||||
(1U << KEY_TYPE_dirent),
|
||||
[BKEY_TYPE_xattrs] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_whiteout)|
|
||||
(1U << KEY_TYPE_cookie)|
|
||||
(1U << KEY_TYPE_hash_whiteout)|
|
||||
(1U << KEY_TYPE_xattr),
|
||||
[BKEY_TYPE_alloc] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_alloc)|
|
||||
(1U << KEY_TYPE_alloc_v2),
|
||||
[BKEY_TYPE_quotas] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_quota),
|
||||
[BKEY_TYPE_stripes] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_stripe),
|
||||
[BKEY_TYPE_reflink] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_reflink_v)|
|
||||
(1U << KEY_TYPE_indirect_inline_data),
|
||||
[BKEY_TYPE_subvolumes] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_subvolume),
|
||||
[BKEY_TYPE_snapshots] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_snapshot),
|
||||
[BKEY_TYPE_btree] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_btree_ptr)|
|
||||
(1U << KEY_TYPE_btree_ptr_v2),
|
||||
};
|
||||
@ -134,21 +154,18 @@ static unsigned bch2_key_types_allowed[] = {
|
||||
const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum btree_node_type type)
|
||||
{
|
||||
unsigned key_types_allowed = (1U << KEY_TYPE_deleted)|
|
||||
bch2_key_types_allowed[type] ;
|
||||
|
||||
if (k.k->u64s < BKEY_U64s)
|
||||
return "u64s too small";
|
||||
|
||||
if (!(key_types_allowed & (1U << k.k->type)))
|
||||
if (!(bch2_key_types_allowed[type] & (1U << k.k->type)))
|
||||
return "invalid key type for this btree";
|
||||
|
||||
if (type == BKEY_TYPE_btree &&
|
||||
bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
|
||||
return "value too big";
|
||||
|
||||
if (btree_node_type_is_extents(type)) {
|
||||
if ((k.k->size == 0) != bkey_deleted(k.k))
|
||||
if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
|
||||
if (k.k->size == 0)
|
||||
return "bad size field";
|
||||
|
||||
if (k.k->size > k.k->p.offset)
|
||||
@ -165,7 +182,7 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
|
||||
if (type != BKEY_TYPE_btree &&
|
||||
btree_type_has_snapshots(type) &&
|
||||
k.k->p.snapshot != U32_MAX)
|
||||
!k.k->p.snapshot)
|
||||
return "invalid snapshot field";
|
||||
|
||||
if (type != BKEY_TYPE_btree &&
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "extents.h"
|
||||
#include "journal.h"
|
||||
#include "replicas.h"
|
||||
#include "subvolume.h"
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
@ -152,7 +153,7 @@ bool __bch2_btree_node_relock(struct btree_trans *trans,
|
||||
if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
|
||||
(btree_node_lock_seq_matches(path, b, level) &&
|
||||
btree_node_lock_increment(trans, b, level, want))) {
|
||||
mark_btree_node_locked(trans, path, level, want);
|
||||
mark_btree_node_locked(path, level, want);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
@ -188,7 +189,7 @@ static bool bch2_btree_node_upgrade(struct btree_trans *trans,
|
||||
|
||||
return false;
|
||||
success:
|
||||
mark_btree_node_intent_locked(trans, path, level);
|
||||
mark_btree_node_intent_locked(path, level);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -674,6 +675,9 @@ static void bch2_btree_iter_verify(struct btree_iter *iter)
|
||||
|
||||
static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
|
||||
{
|
||||
BUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
|
||||
!iter->pos.snapshot);
|
||||
|
||||
BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
|
||||
iter->pos.snapshot != iter->snapshot);
|
||||
|
||||
@ -681,6 +685,55 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
|
||||
bkey_cmp(iter->pos, iter->k.p) > 0);
|
||||
}
|
||||
|
||||
static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct btree_iter copy;
|
||||
struct bkey_s_c prev;
|
||||
int ret = 0;
|
||||
|
||||
if (!bch2_debug_check_iterators)
|
||||
return 0;
|
||||
|
||||
if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS))
|
||||
return 0;
|
||||
|
||||
if (bkey_err(k) || !k.k)
|
||||
return 0;
|
||||
|
||||
BUG_ON(!bch2_snapshot_is_ancestor(trans->c,
|
||||
iter->snapshot,
|
||||
k.k->p.snapshot));
|
||||
|
||||
bch2_trans_iter_init(trans, ©, iter->btree_id, iter->pos,
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
prev = bch2_btree_iter_prev(©);
|
||||
if (!prev.k)
|
||||
goto out;
|
||||
|
||||
ret = bkey_err(prev);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (!bkey_cmp(prev.k->p, k.k->p) &&
|
||||
bch2_snapshot_is_ancestor(trans->c, iter->snapshot,
|
||||
prev.k->p.snapshot) > 0) {
|
||||
char buf1[100], buf2[200];
|
||||
|
||||
bch2_bkey_to_text(&PBUF(buf1), k.k);
|
||||
bch2_bkey_to_text(&PBUF(buf2), prev.k);
|
||||
|
||||
panic("iter snap %u\n"
|
||||
"k %s\n"
|
||||
"prev %s\n",
|
||||
iter->snapshot,
|
||||
buf1, buf2);
|
||||
}
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, ©);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
|
||||
@ -689,6 +742,7 @@ static inline void bch2_btree_path_verify(struct btree_trans *trans,
|
||||
struct btree_path *path) {}
|
||||
static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
|
||||
static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
|
||||
static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; }
|
||||
|
||||
#endif
|
||||
|
||||
@ -896,12 +950,12 @@ static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
|
||||
bch2_btree_node_iter_peek_all(&l->iter, l->b));
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
|
||||
static inline struct bkey_s_c btree_path_level_peek(struct bch_fs *c,
|
||||
struct btree_path *path,
|
||||
struct btree_path_level *l,
|
||||
struct bkey *u)
|
||||
{
|
||||
struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
|
||||
struct bkey_s_c k = __btree_iter_unpack(c, l, u,
|
||||
bch2_btree_node_iter_peek(&l->iter, l->b));
|
||||
|
||||
path->pos = k.k ? k.k->p : l->b->key.k.p;
|
||||
@ -1041,7 +1095,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
|
||||
t != BTREE_NODE_UNLOCKED) {
|
||||
btree_node_unlock(path, b->c.level);
|
||||
six_lock_increment(&b->c.lock, t);
|
||||
mark_btree_node_locked(trans, path, b->c.level, t);
|
||||
mark_btree_node_locked(path, b->c.level, t);
|
||||
}
|
||||
|
||||
btree_path_level_init(trans, path, b);
|
||||
@ -1118,7 +1172,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
|
||||
for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
|
||||
path->l[i].b = NULL;
|
||||
|
||||
mark_btree_node_locked(trans, path, path->level, lock_type);
|
||||
mark_btree_node_locked(path, path->level, lock_type);
|
||||
btree_path_level_init(trans, path, b);
|
||||
return 0;
|
||||
}
|
||||
@ -1210,7 +1264,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
mark_btree_node_locked(trans, path, level, lock_type);
|
||||
mark_btree_node_locked(path, level, lock_type);
|
||||
btree_path_level_init(trans, path, b);
|
||||
|
||||
if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
@ -1252,10 +1306,6 @@ retry_all:
|
||||
|
||||
btree_trans_verify_sorted(trans);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans->traverse_all_idx = U8_MAX;
|
||||
#endif
|
||||
|
||||
for (i = trans->nr_sorted - 2; i >= 0; --i) {
|
||||
struct btree_path *path1 = trans->paths + trans->sorted[i];
|
||||
struct btree_path *path2 = trans->paths + trans->sorted[i + 1];
|
||||
@ -1294,9 +1344,6 @@ retry_all:
|
||||
path = trans->paths + trans->sorted[i];
|
||||
|
||||
EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans->traverse_all_idx = path->idx;
|
||||
#endif
|
||||
|
||||
ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
|
||||
if (ret)
|
||||
@ -1985,11 +2032,25 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
}
|
||||
|
||||
if (likely(k.k)) {
|
||||
if (likely(!bkey_deleted(k.k)))
|
||||
break;
|
||||
/*
|
||||
* We can never have a key in a leaf node at POS_MAX, so
|
||||
* we don't have to check these successor() calls:
|
||||
*/
|
||||
if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
|
||||
!bch2_snapshot_is_ancestor(trans->c,
|
||||
iter->snapshot,
|
||||
k.k->p.snapshot)) {
|
||||
search_key = bpos_successor(k.k->p);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Advance to next key: */
|
||||
search_key = bkey_successor(iter, k.k->p);
|
||||
if (bkey_whiteout(k.k) &&
|
||||
!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
|
||||
search_key = bkey_successor(iter, k.k->p);
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
} else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) {
|
||||
/* Advance to next leaf node: */
|
||||
search_key = bpos_successor(iter->path->l[0].b->key.k.p);
|
||||
@ -2010,6 +2071,9 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
|
||||
iter->pos = bkey_start_pos(k.k);
|
||||
|
||||
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
|
||||
iter->pos.snapshot = iter->snapshot;
|
||||
|
||||
cmp = bpos_cmp(k.k->p, iter->path->pos);
|
||||
if (cmp) {
|
||||
iter->path = bch2_btree_path_make_mut(trans, iter->path,
|
||||
@ -2022,6 +2086,10 @@ out:
|
||||
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
bch2_btree_iter_verify(iter);
|
||||
ret = bch2_btree_iter_verify_ret(iter, k);
|
||||
if (unlikely(ret))
|
||||
return bkey_s_c_err(ret);
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
@ -2045,7 +2113,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_trans *trans = iter->trans;
|
||||
struct bpos search_key = iter->pos;
|
||||
struct btree_path *saved_path = NULL;
|
||||
struct bkey_s_c k;
|
||||
struct bkey saved_k;
|
||||
const struct bch_val *saved_v;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(iter->path->cached || iter->path->level);
|
||||
@ -2053,6 +2124,9 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
|
||||
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
|
||||
search_key.snapshot = U32_MAX;
|
||||
|
||||
while (1) {
|
||||
iter->path = btree_path_set_pos(trans, iter->path, search_key,
|
||||
iter->flags & BTREE_ITER_INTENT);
|
||||
@ -2065,18 +2139,61 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
goto out;
|
||||
}
|
||||
|
||||
k = btree_path_level_peek(trans, iter->path,
|
||||
k = btree_path_level_peek(trans->c, iter->path,
|
||||
&iter->path->l[0], &iter->k);
|
||||
if (!k.k ||
|
||||
((iter->flags & BTREE_ITER_IS_EXTENTS)
|
||||
? bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0
|
||||
: bkey_cmp(k.k->p, iter->pos) > 0))
|
||||
? bpos_cmp(bkey_start_pos(k.k), search_key) >= 0
|
||||
: bpos_cmp(k.k->p, search_key) > 0))
|
||||
k = btree_path_level_prev(trans->c, iter->path,
|
||||
&iter->path->l[0], &iter->k);
|
||||
|
||||
btree_path_check_sort(trans, iter->path, 0);
|
||||
|
||||
if (likely(k.k)) {
|
||||
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) {
|
||||
if (k.k->p.snapshot == iter->snapshot)
|
||||
goto got_key;
|
||||
|
||||
/*
|
||||
* If we have a saved candidate, and we're no
|
||||
* longer at the same _key_ (not pos), return
|
||||
* that candidate
|
||||
*/
|
||||
if (saved_path && bkey_cmp(k.k->p, saved_k.p)) {
|
||||
bch2_path_put(trans, iter->path,
|
||||
iter->flags & BTREE_ITER_INTENT);
|
||||
iter->path = saved_path;
|
||||
saved_path = NULL;
|
||||
iter->k = saved_k;
|
||||
k.v = saved_v;
|
||||
goto got_key;
|
||||
}
|
||||
|
||||
if (bch2_snapshot_is_ancestor(iter->trans->c,
|
||||
iter->snapshot,
|
||||
k.k->p.snapshot)) {
|
||||
if (saved_path)
|
||||
bch2_path_put(trans, saved_path,
|
||||
iter->flags & BTREE_ITER_INTENT);
|
||||
saved_path = btree_path_clone(trans, iter->path,
|
||||
iter->flags & BTREE_ITER_INTENT);
|
||||
saved_k = *k.k;
|
||||
saved_v = k.v;
|
||||
}
|
||||
|
||||
search_key = bpos_predecessor(k.k->p);
|
||||
continue;
|
||||
}
|
||||
got_key:
|
||||
if (bkey_whiteout(k.k) &&
|
||||
!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
|
||||
search_key = bkey_predecessor(iter, k.k->p);
|
||||
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
|
||||
search_key.snapshot = U32_MAX;
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
} else if (likely(bpos_cmp(iter->path->l[0].b->data->min_key, POS_MIN))) {
|
||||
/* Advance to previous leaf node: */
|
||||
@ -2094,7 +2211,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
/* Extents can straddle iter->pos: */
|
||||
if (bkey_cmp(k.k->p, iter->pos) < 0)
|
||||
iter->pos = k.k->p;
|
||||
|
||||
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
|
||||
iter->pos.snapshot = iter->snapshot;
|
||||
out:
|
||||
if (saved_path)
|
||||
bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT);
|
||||
iter->path->should_be_locked = true;
|
||||
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
@ -2143,7 +2265,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
if (unlikely(ret))
|
||||
return bkey_s_c_err(ret);
|
||||
|
||||
if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) {
|
||||
if ((iter->flags & BTREE_ITER_CACHED) ||
|
||||
!(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
|
||||
struct bkey_i *next_update;
|
||||
|
||||
next_update = iter->flags & BTREE_ITER_WITH_UPDATES
|
||||
@ -2202,6 +2325,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
bch2_btree_iter_verify(iter);
|
||||
ret = bch2_btree_iter_verify_ret(iter, k);
|
||||
if (unlikely(ret))
|
||||
return bkey_s_c_err(ret);
|
||||
|
||||
return k;
|
||||
}
|
||||
@ -2352,13 +2478,13 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
|
||||
btree_node_type_is_extents(btree_id))
|
||||
flags |= BTREE_ITER_IS_EXTENTS;
|
||||
|
||||
if (!btree_type_has_snapshots(btree_id) &&
|
||||
!(flags & __BTREE_ITER_ALL_SNAPSHOTS))
|
||||
if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
|
||||
!btree_type_has_snapshots(btree_id))
|
||||
flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
|
||||
|
||||
if (!(flags & BTREE_ITER_ALL_SNAPSHOTS))
|
||||
pos.snapshot = btree_type_has_snapshots(btree_id)
|
||||
? U32_MAX : 0;
|
||||
if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) &&
|
||||
btree_type_has_snapshots(btree_id))
|
||||
flags |= BTREE_ITER_FILTER_SNAPSHOTS;
|
||||
|
||||
iter->trans = trans;
|
||||
iter->path = NULL;
|
||||
|
@ -234,6 +234,15 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it
|
||||
iter->pos = bkey_start_pos(&iter->k);
|
||||
}
|
||||
|
||||
static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot)
|
||||
{
|
||||
struct bpos pos = iter->pos;
|
||||
|
||||
iter->snapshot = snapshot;
|
||||
pos.snapshot = snapshot;
|
||||
bch2_btree_iter_set_pos(iter, pos);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlocks before scheduling
|
||||
* Note: does not revalidate iterator
|
||||
|
@ -163,6 +163,11 @@ btree_key_cache_create(struct btree_key_cache *c,
|
||||
was_new = false;
|
||||
}
|
||||
|
||||
if (btree_id == BTREE_ID_subvolumes)
|
||||
six_lock_pcpu_alloc(&ck->c.lock);
|
||||
else
|
||||
six_lock_pcpu_free(&ck->c.lock);
|
||||
|
||||
ck->c.level = 0;
|
||||
ck->c.btree_id = btree_id;
|
||||
ck->key.btree_id = btree_id;
|
||||
@ -296,7 +301,7 @@ retry:
|
||||
if (!ck)
|
||||
goto retry;
|
||||
|
||||
mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
|
||||
mark_btree_node_locked(path, 0, SIX_LOCK_intent);
|
||||
path->locks_want = 1;
|
||||
} else {
|
||||
enum six_lock_type lock_want = __btree_lock_want(path, 0);
|
||||
@ -318,7 +323,7 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
mark_btree_node_locked(trans, path, 0, lock_want);
|
||||
mark_btree_node_locked(path, 0, lock_want);
|
||||
}
|
||||
|
||||
path->l[0].lock_seq = ck->c.lock.state.seq;
|
||||
@ -366,7 +371,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
|
||||
bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos,
|
||||
BTREE_ITER_SLOTS|
|
||||
BTREE_ITER_INTENT);
|
||||
BTREE_ITER_INTENT|
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos,
|
||||
BTREE_ITER_CACHED|
|
||||
BTREE_ITER_CACHED_NOFILL|
|
||||
|
@ -58,8 +58,7 @@ static inline void mark_btree_node_unlocked(struct btree_path *path,
|
||||
path->nodes_intent_locked &= ~(1 << level);
|
||||
}
|
||||
|
||||
static inline void mark_btree_node_locked(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
static inline void mark_btree_node_locked(struct btree_path *path,
|
||||
unsigned level,
|
||||
enum six_lock_type type)
|
||||
{
|
||||
@ -69,19 +68,12 @@ static inline void mark_btree_node_locked(struct btree_trans *trans,
|
||||
|
||||
path->nodes_locked |= 1 << level;
|
||||
path->nodes_intent_locked |= type << level;
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
path->ip_locked = _RET_IP_;
|
||||
BUG_ON(trans->in_traverse_all &&
|
||||
trans->traverse_all_idx != U8_MAX &&
|
||||
path->sorted_idx > trans->paths[trans->traverse_all_idx].sorted_idx);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mark_btree_node_intent_locked(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
static inline void mark_btree_node_intent_locked(struct btree_path *path,
|
||||
unsigned level)
|
||||
{
|
||||
mark_btree_node_locked(trans, path, level, SIX_LOCK_intent);
|
||||
mark_btree_node_locked(path, level, SIX_LOCK_intent);
|
||||
}
|
||||
|
||||
static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level)
|
||||
@ -120,9 +112,6 @@ static inline void __bch2_btree_path_unlock(struct btree_path *path)
|
||||
|
||||
while (path->nodes_locked)
|
||||
btree_node_unlock(path, __ffs(path->nodes_locked));
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
path->ip_locked = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type)
|
||||
|
@ -209,6 +209,7 @@ struct btree_node_iter {
|
||||
#define BTREE_ITER_WITH_UPDATES (1 << 10)
|
||||
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 11)
|
||||
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
|
||||
#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 13)
|
||||
|
||||
enum btree_path_uptodate {
|
||||
BTREE_ITER_UPTODATE = 0,
|
||||
@ -255,7 +256,6 @@ struct btree_path {
|
||||
} l[BTREE_MAX_DEPTH];
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
unsigned long ip_allocated;
|
||||
unsigned long ip_locked;
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -369,7 +369,6 @@ struct btree_trans {
|
||||
struct bpos locking_pos;
|
||||
u8 locking_btree_id;
|
||||
u8 locking_level;
|
||||
u8 traverse_all_idx;
|
||||
pid_t pid;
|
||||
#endif
|
||||
unsigned long ip;
|
||||
@ -607,7 +606,8 @@ static inline bool btree_node_is_extents(struct btree *b)
|
||||
|
||||
#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \
|
||||
((1U << BKEY_TYPE_alloc)| \
|
||||
(1U << BKEY_TYPE_stripes))
|
||||
(1U << BKEY_TYPE_stripes)| \
|
||||
(1U << BKEY_TYPE_snapshots))
|
||||
|
||||
#define BTREE_NODE_TYPE_HAS_TRIGGERS \
|
||||
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
|
||||
@ -654,7 +654,8 @@ enum btree_update_flags {
|
||||
|
||||
#define BTREE_TRIGGER_WANTS_OLD_AND_NEW \
|
||||
((1U << KEY_TYPE_stripe)| \
|
||||
(1U << KEY_TYPE_inode))
|
||||
(1U << KEY_TYPE_inode)| \
|
||||
(1U << KEY_TYPE_snapshot))
|
||||
|
||||
static inline bool btree_node_type_needs_gc(enum btree_node_type type)
|
||||
{
|
||||
@ -671,11 +672,6 @@ struct btree_root {
|
||||
s8 error;
|
||||
};
|
||||
|
||||
/*
|
||||
* Optional hook that will be called just prior to a btree node update, when
|
||||
* we're holding the write lock and we know what key is about to be overwritten:
|
||||
*/
|
||||
|
||||
enum btree_insert_ret {
|
||||
BTREE_INSERT_OK,
|
||||
/* leaf node needs to be split */
|
||||
@ -696,8 +692,4 @@ enum btree_node_sibling {
|
||||
btree_next_sib,
|
||||
};
|
||||
|
||||
typedef struct btree_nr_keys (*sort_fix_overlapping_fn)(struct bset *,
|
||||
struct btree *,
|
||||
struct btree_node_iter *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_TYPES_H */
|
||||
|
@ -61,7 +61,7 @@ int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
|
||||
struct disk_reservation *, u64 *, int flags);
|
||||
|
||||
int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id,
|
||||
struct bpos, struct bpos, u64 *);
|
||||
struct bpos, struct bpos, unsigned, u64 *);
|
||||
int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
|
||||
struct bpos, struct bpos, u64 *);
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "journal.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "keylist.h"
|
||||
#include "subvolume.h"
|
||||
#include "replicas.h"
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
@ -245,6 +246,11 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
|
||||
BUG_ON(i->cached != i->path->cached);
|
||||
BUG_ON(i->level != i->path->level);
|
||||
BUG_ON(i->btree_id != i->path->btree_id);
|
||||
EBUG_ON(!i->level &&
|
||||
!(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
|
||||
test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
|
||||
i->k->k.p.snapshot &&
|
||||
bch2_snapshot_internal_node(trans->c, i->k->k.p.snapshot));
|
||||
}
|
||||
|
||||
static noinline int
|
||||
@ -934,6 +940,43 @@ err:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
static int check_pos_snapshot_overwritten(struct btree_trans *trans,
|
||||
enum btree_id id,
|
||||
struct bpos pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
if (!snapshot_t(c, pos.snapshot)->children[0])
|
||||
return 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, id, pos,
|
||||
BTREE_ITER_NOT_EXTENTS|
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
while (1) {
|
||||
k = bch2_btree_iter_prev(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
if (bkey_cmp(pos, k.k->p))
|
||||
break;
|
||||
|
||||
if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
struct btree_iter *orig_iter,
|
||||
struct bkey_i *insert,
|
||||
@ -958,6 +1001,28 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
goto out;
|
||||
|
||||
if (bch2_bkey_maybe_mergable(k.k, &insert->k)) {
|
||||
/*
|
||||
* We can't merge extents if they belong to interior snapshot
|
||||
* tree nodes, and there's a snapshot in which one extent is
|
||||
* visible and the other is not - i.e. if visibility is
|
||||
* different.
|
||||
*
|
||||
* Instead of checking if visibilitiy of the two extents is
|
||||
* different, for now we just check if either has been
|
||||
* overwritten:
|
||||
*/
|
||||
ret = check_pos_snapshot_overwritten(trans, btree_id, insert->k.p);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
if (ret)
|
||||
goto nomerge1;
|
||||
|
||||
ret = check_pos_snapshot_overwritten(trans, btree_id, k.k->p);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
if (ret)
|
||||
goto nomerge1;
|
||||
|
||||
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
|
||||
if ((ret = PTR_ERR_OR_ZERO(update)))
|
||||
goto err;
|
||||
@ -973,22 +1038,26 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
|
||||
if (!bkey_cmp(k.k->p, bkey_start_pos(&insert->k)))
|
||||
nomerge1:
|
||||
ret = 0;
|
||||
if (!bkey_cmp(k.k->p, start))
|
||||
goto next;
|
||||
|
||||
while (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) > 0) {
|
||||
bool front_split = bkey_cmp(bkey_start_pos(k.k), start) < 0;
|
||||
bool back_split = bkey_cmp(k.k->p, insert->k.p) > 0;
|
||||
|
||||
/*
|
||||
* If we're going to be splitting a compressed extent, note it
|
||||
* so that __bch2_trans_commit() can increase our disk
|
||||
* reservation:
|
||||
*/
|
||||
if (bkey_cmp(bkey_start_pos(k.k), start) < 0 &&
|
||||
bkey_cmp(k.k->p, insert->k.p) > 0 &&
|
||||
if (((front_split && back_split) ||
|
||||
((front_split || back_split) && k.k->p.snapshot != insert->k.p.snapshot)) &&
|
||||
(compressed_sectors = bch2_bkey_sectors_compressed(k)))
|
||||
trans->extra_journal_res += compressed_sectors;
|
||||
|
||||
if (bkey_cmp(bkey_start_pos(k.k), start) < 0) {
|
||||
if (front_split) {
|
||||
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
|
||||
if ((ret = PTR_ERR_OR_ZERO(update)))
|
||||
goto err;
|
||||
@ -999,6 +1068,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
|
||||
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
|
||||
BTREE_ITER_NOT_EXTENTS|
|
||||
BTREE_ITER_ALL_SNAPSHOTS|
|
||||
BTREE_ITER_INTENT);
|
||||
ret = bch2_btree_iter_traverse(&update_iter) ?:
|
||||
bch2_trans_update(trans, &update_iter, update,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
|
||||
flags);
|
||||
bch2_trans_iter_exit(trans, &update_iter);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (k.k->p.snapshot != insert->k.p.snapshot &&
|
||||
(front_split || back_split)) {
|
||||
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
|
||||
if ((ret = PTR_ERR_OR_ZERO(update)))
|
||||
goto err;
|
||||
|
||||
bkey_reassemble(update, k);
|
||||
|
||||
bch2_cut_front(start, update);
|
||||
bch2_cut_back(insert->k.p, update);
|
||||
|
||||
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
|
||||
BTREE_ITER_NOT_EXTENTS|
|
||||
BTREE_ITER_ALL_SNAPSHOTS|
|
||||
BTREE_ITER_INTENT);
|
||||
ret = bch2_btree_iter_traverse(&update_iter) ?:
|
||||
bch2_trans_update(trans, &update_iter, update,
|
||||
@ -1010,12 +1105,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (bkey_cmp(k.k->p, insert->k.p) <= 0) {
|
||||
ret = bch2_btree_delete_at(trans, &iter, flags);
|
||||
update = bch2_trans_kmalloc(trans, sizeof(*update));
|
||||
if ((ret = PTR_ERR_OR_ZERO(update)))
|
||||
goto err;
|
||||
|
||||
bkey_init(&update->k);
|
||||
update->k.p = k.k->p;
|
||||
|
||||
if (insert->k.p.snapshot != k.k->p.snapshot) {
|
||||
update->k.p.snapshot = insert->k.p.snapshot;
|
||||
update->k.type = KEY_TYPE_whiteout;
|
||||
}
|
||||
|
||||
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
|
||||
BTREE_ITER_NOT_EXTENTS|
|
||||
BTREE_ITER_INTENT);
|
||||
ret = bch2_btree_iter_traverse(&update_iter) ?:
|
||||
bch2_trans_update(trans, &update_iter, update,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
|
||||
flags);
|
||||
bch2_trans_iter_exit(trans, &update_iter);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bkey_cmp(k.k->p, insert->k.p) > 0) {
|
||||
if (back_split) {
|
||||
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
|
||||
if ((ret = PTR_ERR_OR_ZERO(update)))
|
||||
goto err;
|
||||
@ -1023,10 +1138,15 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
bkey_reassemble(update, k);
|
||||
bch2_cut_front(insert->k.p, update);
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, update, flags);
|
||||
bch2_trans_copy_iter(&update_iter, &iter);
|
||||
update_iter.pos = update->k.p;
|
||||
ret = bch2_trans_update(trans, &update_iter, update,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
|
||||
flags);
|
||||
bch2_trans_iter_exit(trans, &update_iter);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
goto out;
|
||||
}
|
||||
next:
|
||||
@ -1037,7 +1157,23 @@ next:
|
||||
goto out;
|
||||
}
|
||||
|
||||
bch2_bkey_merge(c, bkey_i_to_s(insert), k);
|
||||
if (bch2_bkey_maybe_mergable(&insert->k, k.k)) {
|
||||
ret = check_pos_snapshot_overwritten(trans, btree_id, insert->k.p);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret)
|
||||
goto nomerge2;
|
||||
|
||||
ret = check_pos_snapshot_overwritten(trans, btree_id, k.k->p);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret)
|
||||
goto nomerge2;
|
||||
|
||||
bch2_bkey_merge(c, bkey_i_to_s(insert), k);
|
||||
}
|
||||
nomerge2:
|
||||
ret = 0;
|
||||
out:
|
||||
if (!bkey_deleted(&insert->k)) {
|
||||
/*
|
||||
@ -1057,6 +1193,39 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* When deleting, check if we need to emit a whiteout (because we're overwriting
|
||||
* something in an ancestor snapshot)
|
||||
*/
|
||||
static int need_whiteout_for_snapshot(struct btree_trans *trans,
|
||||
enum btree_id btree_id, struct bpos pos)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u32 snapshot = pos.snapshot;
|
||||
int ret;
|
||||
|
||||
if (!bch2_snapshot_parent(trans->c, pos.snapshot))
|
||||
return 0;
|
||||
|
||||
pos.snapshot++;
|
||||
|
||||
for_each_btree_key(trans, iter, btree_id, pos,
|
||||
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
|
||||
if (bkey_cmp(k.k->p, pos))
|
||||
break;
|
||||
|
||||
if (bch2_snapshot_is_ancestor(trans->c, snapshot,
|
||||
k.k->p.snapshot)) {
|
||||
ret = !bkey_whiteout(k.k);
|
||||
break;
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_i *k, enum btree_update_flags flags)
|
||||
{
|
||||
@ -1089,6 +1258,16 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
|
||||
btree_insert_entry_cmp(i - 1, i) >= 0);
|
||||
#endif
|
||||
|
||||
if (bkey_deleted(&n.k->k) &&
|
||||
(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) {
|
||||
int ret = need_whiteout_for_snapshot(trans, n.btree_id, n.k->k.p);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
if (ret)
|
||||
n.k->k.type = KEY_TYPE_whiteout;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pending updates are kept sorted: first, find position of new update,
|
||||
* then delete/trim any updates the new update overwrites:
|
||||
@ -1175,13 +1354,14 @@ int bch2_btree_delete_at(struct btree_trans *trans,
|
||||
|
||||
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
|
||||
struct bpos start, struct bpos end,
|
||||
unsigned iter_flags,
|
||||
u64 *journal_seq)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT);
|
||||
bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT|iter_flags);
|
||||
retry:
|
||||
while ((bch2_trans_begin(trans),
|
||||
(k = bch2_btree_iter_peek(&iter)).k) &&
|
||||
@ -1248,5 +1428,5 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
u64 *journal_seq)
|
||||
{
|
||||
return bch2_trans_do(c, NULL, journal_seq, 0,
|
||||
bch2_btree_delete_range_trans(&trans, id, start, end, journal_seq));
|
||||
bch2_btree_delete_range_trans(&trans, id, start, end, 0, journal_seq));
|
||||
}
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "movinggc.h"
|
||||
#include "reflink.h"
|
||||
#include "replicas.h"
|
||||
#include "subvolume.h"
|
||||
|
||||
#include <linux/preempt.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
@ -1200,6 +1201,8 @@ static int bch2_mark_key_locked(struct bch_fs *c,
|
||||
return bch2_mark_reservation(c, old, new, journal_seq, flags);
|
||||
case KEY_TYPE_reflink_p:
|
||||
return bch2_mark_reflink_p(c, old, new, journal_seq, flags);
|
||||
case KEY_TYPE_snapshot:
|
||||
return bch2_mark_snapshot(c, old, new, journal_seq, flags);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "fs.h"
|
||||
#include "keylist.h"
|
||||
#include "str_hash.h"
|
||||
#include "subvolume.h"
|
||||
|
||||
#include <linux/dcache.h>
|
||||
|
||||
@ -99,7 +100,8 @@ const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
if (memchr(d.v->d_name, '/', len))
|
||||
return "invalid name";
|
||||
|
||||
if (le64_to_cpu(d.v->d_inum) == d.k->p.inode)
|
||||
if (d.v->d_type != DT_SUBVOL &&
|
||||
le64_to_cpu(d.v->d_inum) == d.k->p.inode)
|
||||
return "dirent points to own directory";
|
||||
|
||||
return NULL;
|
||||
@ -113,7 +115,7 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
bch_scnmemcpy(out, d.v->d_name,
|
||||
bch2_dirent_name_bytes(d));
|
||||
pr_buf(out, " -> %llu type %s", d.v->d_inum,
|
||||
d.v->d_type < DT_MAX
|
||||
d.v->d_type < BCH_DT_MAX
|
||||
? bch2_d_types[d.v->d_type]
|
||||
: "(bad d_type)");
|
||||
}
|
||||
@ -149,8 +151,8 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
|
||||
return dirent;
|
||||
}
|
||||
|
||||
int bch2_dirent_create(struct btree_trans *trans,
|
||||
u64 dir_inum, const struct bch_hash_info *hash_info,
|
||||
int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
|
||||
const struct bch_hash_info *hash_info,
|
||||
u8 type, const struct qstr *name, u64 dst_inum,
|
||||
u64 *dir_offset, int flags)
|
||||
{
|
||||
@ -163,7 +165,7 @@ int bch2_dirent_create(struct btree_trans *trans,
|
||||
return ret;
|
||||
|
||||
ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
|
||||
dir_inum, &dirent->k_i, flags);
|
||||
dir, &dirent->k_i, flags);
|
||||
*dir_offset = dirent->k.p.offset;
|
||||
|
||||
return ret;
|
||||
@ -176,22 +178,86 @@ static void dirent_copy_target(struct bkey_i_dirent *dst,
|
||||
dst->v.d_type = src.v->d_type;
|
||||
}
|
||||
|
||||
int __bch2_dirent_read_target(struct btree_trans *trans,
|
||||
struct bkey_s_c_dirent d,
|
||||
u32 *subvol, u32 *snapshot, u64 *inum,
|
||||
bool is_fsck)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
*subvol = 0;
|
||||
*snapshot = d.k->p.snapshot;
|
||||
|
||||
if (likely(d.v->d_type != DT_SUBVOL)) {
|
||||
*inum = le64_to_cpu(d.v->d_inum);
|
||||
} else {
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_subvolume s;
|
||||
int ret;
|
||||
|
||||
*subvol = le64_to_cpu(d.v->d_inum);
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes,
|
||||
POS(0, *subvol),
|
||||
BTREE_ITER_CACHED);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_subvolume) {
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
s = bkey_s_c_to_subvolume(k);
|
||||
*snapshot = le32_to_cpu(s.v->snapshot);
|
||||
*inum = le64_to_cpu(s.v->inode);
|
||||
err:
|
||||
if (ret == -ENOENT && !is_fsck)
|
||||
bch2_fs_inconsistent(trans->c, "pointer to missing subvolume %u",
|
||||
*subvol);
|
||||
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
|
||||
struct bkey_s_c_dirent d, subvol_inum *target)
|
||||
{
|
||||
u32 snapshot;
|
||||
int ret = 0;
|
||||
|
||||
ret = __bch2_dirent_read_target(trans, d, &target->subvol, &snapshot,
|
||||
&target->inum, false);
|
||||
if (!target->subvol)
|
||||
target->subvol = dir.subvol;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dirent_rename(struct btree_trans *trans,
|
||||
u64 src_dir, struct bch_hash_info *src_hash,
|
||||
u64 dst_dir, struct bch_hash_info *dst_hash,
|
||||
const struct qstr *src_name, u64 *src_inum, u64 *src_offset,
|
||||
const struct qstr *dst_name, u64 *dst_inum, u64 *dst_offset,
|
||||
enum bch_rename_mode mode)
|
||||
subvol_inum src_dir, struct bch_hash_info *src_hash,
|
||||
subvol_inum dst_dir, struct bch_hash_info *dst_hash,
|
||||
const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset,
|
||||
const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset,
|
||||
enum bch_rename_mode mode)
|
||||
{
|
||||
struct btree_iter src_iter = { NULL };
|
||||
struct btree_iter dst_iter = { NULL };
|
||||
struct bkey_s_c old_src, old_dst;
|
||||
struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
|
||||
struct bpos dst_pos =
|
||||
POS(dst_dir, bch2_dirent_hash(dst_hash, dst_name));
|
||||
POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name));
|
||||
int ret = 0;
|
||||
|
||||
*src_inum = *dst_inum = 0;
|
||||
if (src_dir.subvol != dst_dir.subvol)
|
||||
return -EXDEV;
|
||||
|
||||
memset(src_inum, 0, sizeof(*src_inum));
|
||||
memset(dst_inum, 0, sizeof(*dst_inum));
|
||||
|
||||
/*
|
||||
* Lookup dst:
|
||||
@ -214,8 +280,12 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (mode != BCH_RENAME)
|
||||
*dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum);
|
||||
if (mode != BCH_RENAME) {
|
||||
ret = bch2_dirent_read_target(trans, dst_dir,
|
||||
bkey_s_c_to_dirent(old_dst), dst_inum);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
if (mode != BCH_RENAME_EXCHANGE)
|
||||
*src_offset = dst_iter.pos.offset;
|
||||
|
||||
@ -231,7 +301,10 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
*src_inum = le64_to_cpu(bkey_s_c_to_dirent(old_src).v->d_inum);
|
||||
ret = bch2_dirent_read_target(trans, src_dir,
|
||||
bkey_s_c_to_dirent(old_src), src_inum);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* Create new dst key: */
|
||||
new_dst = dirent_create_key(trans, 0, dst_name, 0);
|
||||
@ -310,63 +383,79 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dirent_delete_at(struct btree_trans *trans,
|
||||
const struct bch_hash_info *hash_info,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
return bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
|
||||
hash_info, iter);
|
||||
}
|
||||
|
||||
int __bch2_dirent_lookup_trans(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
u64 dir_inum,
|
||||
subvol_inum dir,
|
||||
const struct bch_hash_info *hash_info,
|
||||
const struct qstr *name, unsigned flags)
|
||||
const struct qstr *name, subvol_inum *inum,
|
||||
unsigned flags)
|
||||
{
|
||||
return bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
|
||||
hash_info, dir_inum, name, flags);
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_dirent d;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
|
||||
hash_info, dir, name, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
d = bkey_s_c_to_dirent(k);
|
||||
|
||||
ret = bch2_dirent_read_target(trans, dir, d, inum);
|
||||
if (ret)
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
|
||||
u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
|
||||
const struct bch_hash_info *hash_info,
|
||||
const struct qstr *name)
|
||||
const struct qstr *name, subvol_inum *inum)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 inum = 0;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = __bch2_dirent_lookup_trans(&trans, &iter, dir_inum,
|
||||
hash_info, name, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = __bch2_dirent_lookup_trans(&trans, &iter, dir, hash_info,
|
||||
name, inum, 0);
|
||||
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
out:
|
||||
BUG_ON(ret == -EINTR);
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
bch2_trans_exit(&trans);
|
||||
return inum;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum)
|
||||
int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for_each_btree_key(trans, iter, BTREE_ID_dirents,
|
||||
POS(dir_inum, 0), 0, k, ret) {
|
||||
if (k.k->p.inode > dir_inum)
|
||||
SPOS(dir.inum, 0, snapshot), 0, k, ret) {
|
||||
if (k.k->p.inode > dir.inum)
|
||||
break;
|
||||
|
||||
if (k.k->type == KEY_TYPE_dirent) {
|
||||
@ -379,19 +468,26 @@ int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx)
|
||||
int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_dirent dirent;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_dirents,
|
||||
POS(inum, ctx->pos), 0, k, ret) {
|
||||
if (k.k->p.inode > inum)
|
||||
SPOS(inum.inum, ctx->pos, snapshot), 0, k, ret) {
|
||||
if (k.k->p.inode > inum.inum)
|
||||
break;
|
||||
|
||||
if (k.k->type != KEY_TYPE_dirent)
|
||||
@ -407,11 +503,14 @@ int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx)
|
||||
if (!dir_emit(ctx, dirent.v->d_name,
|
||||
bch2_dirent_name_bytes(dirent),
|
||||
le64_to_cpu(dirent.v->d_inum),
|
||||
dirent.v->d_type))
|
||||
vfs_d_type(dirent.v->d_type)))
|
||||
break;
|
||||
ctx->pos = dirent.k->p.offset + 1;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
||||
ret = bch2_trans_exit(&trans) ?: ret;
|
||||
|
||||
|
@ -29,13 +29,17 @@ static inline unsigned dirent_val_u64s(unsigned len)
|
||||
sizeof(u64));
|
||||
}
|
||||
|
||||
int bch2_dirent_create(struct btree_trans *, u64,
|
||||
int bch2_dirent_create(struct btree_trans *, subvol_inum,
|
||||
const struct bch_hash_info *, u8,
|
||||
const struct qstr *, u64, u64 *, int);
|
||||
|
||||
int bch2_dirent_delete_at(struct btree_trans *,
|
||||
const struct bch_hash_info *,
|
||||
struct btree_iter *);
|
||||
int __bch2_dirent_read_target(struct btree_trans *, struct bkey_s_c_dirent,
|
||||
u32 *, u32 *, u64 *, bool);
|
||||
|
||||
static inline unsigned vfs_d_type(unsigned type)
|
||||
{
|
||||
return type == DT_SUBVOL ? DT_DIR : type;
|
||||
}
|
||||
|
||||
enum bch_rename_mode {
|
||||
BCH_RENAME,
|
||||
@ -44,19 +48,20 @@ enum bch_rename_mode {
|
||||
};
|
||||
|
||||
int bch2_dirent_rename(struct btree_trans *,
|
||||
u64, struct bch_hash_info *,
|
||||
u64, struct bch_hash_info *,
|
||||
const struct qstr *, u64 *, u64 *,
|
||||
const struct qstr *, u64 *, u64 *,
|
||||
subvol_inum, struct bch_hash_info *,
|
||||
subvol_inum, struct bch_hash_info *,
|
||||
const struct qstr *, subvol_inum *, u64 *,
|
||||
const struct qstr *, subvol_inum *, u64 *,
|
||||
enum bch_rename_mode);
|
||||
|
||||
int __bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *, u64,
|
||||
const struct bch_hash_info *,
|
||||
const struct qstr *, unsigned);
|
||||
u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *,
|
||||
const struct qstr *);
|
||||
int __bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *,
|
||||
subvol_inum, const struct bch_hash_info *,
|
||||
const struct qstr *, subvol_inum *, unsigned);
|
||||
u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum,
|
||||
const struct bch_hash_info *,
|
||||
const struct qstr *, subvol_inum *);
|
||||
|
||||
int bch2_empty_dir_trans(struct btree_trans *, u64);
|
||||
int bch2_readdir(struct bch_fs *, u64, struct dir_context *);
|
||||
int bch2_empty_dir_trans(struct btree_trans *, subvol_inum);
|
||||
int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *);
|
||||
|
||||
#endif /* _BCACHEFS_DIRENT_H */
|
||||
|
@ -612,38 +612,6 @@ bool bch2_bkey_is_incompressible(struct bkey_s_c k)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
|
||||
unsigned nr_replicas, bool compressed)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bpos end = pos;
|
||||
struct bkey_s_c k;
|
||||
bool ret = true;
|
||||
int err;
|
||||
|
||||
end.offset += size;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_extents, pos,
|
||||
BTREE_ITER_SLOTS, k, err) {
|
||||
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
|
||||
break;
|
||||
|
||||
if (nr_replicas > bch2_bkey_replicas(c, k) ||
|
||||
(!compressed && bch2_bkey_sectors_compressed(k))) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
|
@ -567,7 +567,6 @@ unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
|
||||
unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
|
||||
bool bch2_bkey_is_incompressible(struct bkey_s_c);
|
||||
unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
|
||||
bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned, bool);
|
||||
|
||||
unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c);
|
||||
unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
|
||||
|
@ -6,82 +6,186 @@
|
||||
#include "dirent.h"
|
||||
#include "fs-common.h"
|
||||
#include "inode.h"
|
||||
#include "subvolume.h"
|
||||
#include "xattr.h"
|
||||
|
||||
#include <linux/posix_acl.h>
|
||||
|
||||
int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
|
||||
static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode)
|
||||
{
|
||||
return S_ISDIR(inode->bi_mode) && !inode->bi_subvol;
|
||||
}
|
||||
|
||||
int bch2_create_trans(struct btree_trans *trans,
|
||||
subvol_inum dir,
|
||||
struct bch_inode_unpacked *dir_u,
|
||||
struct bch_inode_unpacked *new_inode,
|
||||
const struct qstr *name,
|
||||
uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
|
||||
struct posix_acl *default_acl,
|
||||
struct posix_acl *acl)
|
||||
struct posix_acl *acl,
|
||||
subvol_inum snapshot_src,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter dir_iter = { NULL };
|
||||
struct btree_iter inode_iter = { NULL };
|
||||
struct bch_hash_info hash = bch2_hash_info_init(c, new_inode);
|
||||
subvol_inum new_inum = dir;
|
||||
u64 now = bch2_current_time(c);
|
||||
u64 cpu = raw_smp_processor_id();
|
||||
u64 dir_offset = 0;
|
||||
u64 dir_target;
|
||||
u32 snapshot;
|
||||
unsigned dir_type = mode_to_type(mode);
|
||||
int ret;
|
||||
|
||||
ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT);
|
||||
ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
|
||||
|
||||
if (!name)
|
||||
new_inode->bi_flags |= BCH_INODE_UNLINKED;
|
||||
|
||||
ret = bch2_inode_create(trans, &inode_iter, new_inode, U32_MAX, cpu);
|
||||
ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (default_acl) {
|
||||
ret = bch2_set_acl_trans(trans, new_inode, &hash,
|
||||
default_acl, ACL_TYPE_DEFAULT);
|
||||
if (!(flags & BCH_CREATE_SNAPSHOT)) {
|
||||
/* Normal create path - allocate a new inode: */
|
||||
bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
|
||||
|
||||
if (flags & BCH_CREATE_TMPFILE)
|
||||
new_inode->bi_flags |= BCH_INODE_UNLINKED;
|
||||
|
||||
ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
snapshot_src = (subvol_inum) { 0 };
|
||||
} else {
|
||||
/*
|
||||
* Creating a snapshot - we're not allocating a new inode, but
|
||||
* we do have to lookup the root inode of the subvolume we're
|
||||
* snapshotting and update it (in the new snapshot):
|
||||
*/
|
||||
|
||||
if (!snapshot_src.inum) {
|
||||
/* Inode wasn't specified, just snapshot: */
|
||||
struct btree_iter subvol_iter;
|
||||
struct bkey_s_c k;
|
||||
|
||||
bch2_trans_iter_init(trans, &subvol_iter, BTREE_ID_subvolumes,
|
||||
POS(0, snapshot_src.subvol), 0);
|
||||
k = bch2_btree_iter_peek_slot(&subvol_iter);
|
||||
|
||||
ret = bkey_err(k);
|
||||
if (!ret && k.k->type != KEY_TYPE_subvolume) {
|
||||
bch_err(c, "subvolume %u not found",
|
||||
snapshot_src.subvol);
|
||||
ret = -ENOENT;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
snapshot_src.inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode);
|
||||
bch2_trans_iter_exit(trans, &subvol_iter);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_inode_peek(trans, &inode_iter, new_inode, snapshot_src,
|
||||
BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (new_inode->bi_subvol != snapshot_src.subvol) {
|
||||
/* Not a subvolume root: */
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're not root, we have to own the subvolume being
|
||||
* snapshotted:
|
||||
*/
|
||||
if (uid && new_inode->bi_uid != uid) {
|
||||
ret = -EPERM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
flags |= BCH_CREATE_SUBVOL;
|
||||
}
|
||||
|
||||
new_inum.inum = new_inode->bi_inum;
|
||||
dir_target = new_inode->bi_inum;
|
||||
|
||||
if (flags & BCH_CREATE_SUBVOL) {
|
||||
u32 new_subvol, dir_snapshot;
|
||||
|
||||
ret = bch2_subvolume_create(trans, new_inode->bi_inum,
|
||||
snapshot_src.subvol,
|
||||
&new_subvol, &snapshot,
|
||||
(flags & BCH_CREATE_SNAPSHOT_RO) != 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
new_inode->bi_parent_subvol = dir.subvol;
|
||||
new_inode->bi_subvol = new_subvol;
|
||||
new_inum.subvol = new_subvol;
|
||||
dir_target = new_subvol;
|
||||
dir_type = DT_SUBVOL;
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &dir_snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_btree_iter_set_snapshot(&dir_iter, dir_snapshot);
|
||||
ret = bch2_btree_iter_traverse(&dir_iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (acl) {
|
||||
ret = bch2_set_acl_trans(trans, new_inode, &hash,
|
||||
acl, ACL_TYPE_ACCESS);
|
||||
if (ret)
|
||||
goto err;
|
||||
if (!(flags & BCH_CREATE_SNAPSHOT)) {
|
||||
if (default_acl) {
|
||||
ret = bch2_set_acl_trans(trans, new_inum, new_inode,
|
||||
default_acl, ACL_TYPE_DEFAULT);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (acl) {
|
||||
ret = bch2_set_acl_trans(trans, new_inum, new_inode,
|
||||
acl, ACL_TYPE_ACCESS);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
if (name) {
|
||||
if (!(flags & BCH_CREATE_TMPFILE)) {
|
||||
struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
|
||||
dir_u->bi_mtime = dir_u->bi_ctime = now;
|
||||
u64 dir_offset;
|
||||
|
||||
if (S_ISDIR(new_inode->bi_mode))
|
||||
if (is_subdir_for_nlink(new_inode))
|
||||
dir_u->bi_nlink++;
|
||||
dir_u->bi_mtime = dir_u->bi_ctime = now;
|
||||
|
||||
ret = bch2_inode_write(trans, &dir_iter, dir_u);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_dirent_create(trans, dir_inum, &dir_hash,
|
||||
mode_to_type(new_inode->bi_mode),
|
||||
name, new_inode->bi_inum,
|
||||
ret = bch2_dirent_create(trans, dir, &dir_hash,
|
||||
dir_type,
|
||||
name,
|
||||
dir_target,
|
||||
&dir_offset,
|
||||
BCH_HASH_SET_MUST_CREATE);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
|
||||
new_inode->bi_dir = dir_u->bi_inum;
|
||||
new_inode->bi_dir_offset = dir_offset;
|
||||
}
|
||||
}
|
||||
|
||||
if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
|
||||
new_inode->bi_dir = dir_u->bi_inum;
|
||||
new_inode->bi_dir_offset = dir_offset;
|
||||
}
|
||||
|
||||
/* XXX use bch2_btree_iter_set_snapshot() */
|
||||
inode_iter.snapshot = U32_MAX;
|
||||
bch2_btree_iter_set_pos(&inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX));
|
||||
inode_iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
|
||||
bch2_btree_iter_set_snapshot(&inode_iter, snapshot);
|
||||
|
||||
ret = bch2_btree_iter_traverse(&inode_iter) ?:
|
||||
bch2_inode_write(trans, &inode_iter, new_inode);
|
||||
@ -91,9 +195,10 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
|
||||
u64 inum, struct bch_inode_unpacked *dir_u,
|
||||
struct bch_inode_unpacked *inode_u, const struct qstr *name)
|
||||
int bch2_link_trans(struct btree_trans *trans,
|
||||
subvol_inum dir, struct bch_inode_unpacked *dir_u,
|
||||
subvol_inum inum, struct bch_inode_unpacked *inode_u,
|
||||
const struct qstr *name)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter dir_iter = { NULL };
|
||||
@ -103,6 +208,9 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
|
||||
u64 dir_offset = 0;
|
||||
int ret;
|
||||
|
||||
if (dir.subvol != inum.subvol)
|
||||
return -EXDEV;
|
||||
|
||||
ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -110,7 +218,7 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
|
||||
inode_u->bi_ctime = now;
|
||||
bch2_inode_nlink_inc(inode_u);
|
||||
|
||||
ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT);
|
||||
ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -118,15 +226,15 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
|
||||
|
||||
dir_hash = bch2_hash_info_init(c, dir_u);
|
||||
|
||||
ret = bch2_dirent_create(trans, dir_inum, &dir_hash,
|
||||
ret = bch2_dirent_create(trans, dir, &dir_hash,
|
||||
mode_to_type(inode_u->bi_mode),
|
||||
name, inum, &dir_offset,
|
||||
name, inum.inum, &dir_offset,
|
||||
BCH_HASH_SET_MUST_CREATE);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
|
||||
inode_u->bi_dir = dir_inum;
|
||||
inode_u->bi_dir = dir.inum;
|
||||
inode_u->bi_dir_offset = dir_offset;
|
||||
}
|
||||
|
||||
@ -139,55 +247,83 @@ err:
|
||||
}
|
||||
|
||||
int bch2_unlink_trans(struct btree_trans *trans,
|
||||
u64 dir_inum, struct bch_inode_unpacked *dir_u,
|
||||
subvol_inum dir,
|
||||
struct bch_inode_unpacked *dir_u,
|
||||
struct bch_inode_unpacked *inode_u,
|
||||
const struct qstr *name)
|
||||
const struct qstr *name,
|
||||
int deleting_snapshot)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter dir_iter = { NULL };
|
||||
struct btree_iter dirent_iter = { NULL };
|
||||
struct btree_iter inode_iter = { NULL };
|
||||
struct bch_hash_info dir_hash;
|
||||
u64 inum, now = bch2_current_time(c);
|
||||
subvol_inum inum;
|
||||
u64 now = bch2_current_time(c);
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT);
|
||||
ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
dir_hash = bch2_hash_info_init(c, dir_u);
|
||||
|
||||
ret = __bch2_dirent_lookup_trans(trans, &dirent_iter, dir_inum, &dir_hash,
|
||||
name, BTREE_ITER_INTENT);
|
||||
ret = __bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
|
||||
name, &inum, BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
k = bch2_btree_iter_peek_slot(&dirent_iter);
|
||||
ret = bkey_err(k);
|
||||
ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum,
|
||||
BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
|
||||
|
||||
ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
if (deleting_snapshot == 1 && !inode_u->bi_subvol) {
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (inode_u->bi_dir == k.k->p.inode &&
|
||||
inode_u->bi_dir_offset == k.k->p.offset) {
|
||||
if (deleting_snapshot <= 0 && S_ISDIR(inode_u->bi_mode)) {
|
||||
ret = bch2_empty_dir_trans(trans, inum);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (inode_u->bi_subvol) {
|
||||
ret = bch2_subvolume_delete(trans, inode_u->bi_subvol,
|
||||
deleting_snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
k = bch2_btree_iter_peek_slot(&dirent_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* If we're deleting a subvolume, we need to really delete the
|
||||
* dirent, not just emit a whiteout in the current snapshot:
|
||||
*/
|
||||
bch2_btree_iter_set_snapshot(&dirent_iter, k.k->p.snapshot);
|
||||
ret = bch2_btree_iter_traverse(&dirent_iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (inode_u->bi_dir == dirent_iter.pos.inode &&
|
||||
inode_u->bi_dir_offset == dirent_iter.pos.offset) {
|
||||
inode_u->bi_dir = 0;
|
||||
inode_u->bi_dir_offset = 0;
|
||||
}
|
||||
|
||||
dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
|
||||
dir_u->bi_nlink -= S_ISDIR(inode_u->bi_mode);
|
||||
dir_u->bi_nlink -= is_subdir_for_nlink(inode_u);
|
||||
bch2_inode_nlink_dec(inode_u);
|
||||
|
||||
ret = (S_ISDIR(inode_u->bi_mode)
|
||||
? bch2_empty_dir_trans(trans, inum)
|
||||
: 0) ?:
|
||||
bch2_dirent_delete_at(trans, &dir_hash, &dirent_iter) ?:
|
||||
ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
|
||||
&dir_hash, &dirent_iter,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_inode_write(trans, &dir_iter, dir_u) ?:
|
||||
bch2_inode_write(trans, &inode_iter, inode_u);
|
||||
err:
|
||||
@ -222,8 +358,8 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
|
||||
}
|
||||
|
||||
int bch2_rename_trans(struct btree_trans *trans,
|
||||
u64 src_dir, struct bch_inode_unpacked *src_dir_u,
|
||||
u64 dst_dir, struct bch_inode_unpacked *dst_dir_u,
|
||||
subvol_inum src_dir, struct bch_inode_unpacked *src_dir_u,
|
||||
subvol_inum dst_dir, struct bch_inode_unpacked *dst_dir_u,
|
||||
struct bch_inode_unpacked *src_inode_u,
|
||||
struct bch_inode_unpacked *dst_inode_u,
|
||||
const struct qstr *src_name,
|
||||
@ -236,7 +372,8 @@ int bch2_rename_trans(struct btree_trans *trans,
|
||||
struct btree_iter src_inode_iter = { NULL };
|
||||
struct btree_iter dst_inode_iter = { NULL };
|
||||
struct bch_hash_info src_hash, dst_hash;
|
||||
u64 src_inode, src_offset, dst_inode, dst_offset;
|
||||
subvol_inum src_inum, dst_inum;
|
||||
u64 src_offset, dst_offset;
|
||||
u64 now = bch2_current_time(c);
|
||||
int ret;
|
||||
|
||||
@ -247,7 +384,8 @@ int bch2_rename_trans(struct btree_trans *trans,
|
||||
|
||||
src_hash = bch2_hash_info_init(c, src_dir_u);
|
||||
|
||||
if (dst_dir != src_dir) {
|
||||
if (dst_dir.inum != src_dir.inum ||
|
||||
dst_dir.subvol != src_dir.subvol) {
|
||||
ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir,
|
||||
BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
@ -262,19 +400,19 @@ int bch2_rename_trans(struct btree_trans *trans,
|
||||
ret = bch2_dirent_rename(trans,
|
||||
src_dir, &src_hash,
|
||||
dst_dir, &dst_hash,
|
||||
src_name, &src_inode, &src_offset,
|
||||
dst_name, &dst_inode, &dst_offset,
|
||||
src_name, &src_inum, &src_offset,
|
||||
dst_name, &dst_inum, &dst_offset,
|
||||
mode);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inode,
|
||||
ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inum,
|
||||
BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (dst_inode) {
|
||||
ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inode,
|
||||
if (dst_inum.inum) {
|
||||
ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inum,
|
||||
BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -305,7 +443,7 @@ int bch2_rename_trans(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (S_ISDIR(dst_inode_u->bi_mode) &&
|
||||
bch2_empty_dir_trans(trans, dst_inode)) {
|
||||
bch2_empty_dir_trans(trans, dst_inum)) {
|
||||
ret = -ENOTEMPTY;
|
||||
goto err;
|
||||
}
|
||||
@ -324,12 +462,12 @@ int bch2_rename_trans(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (S_ISDIR(src_inode_u->bi_mode)) {
|
||||
if (is_subdir_for_nlink(src_inode_u)) {
|
||||
src_dir_u->bi_nlink--;
|
||||
dst_dir_u->bi_nlink++;
|
||||
}
|
||||
|
||||
if (dst_inode && S_ISDIR(dst_inode_u->bi_mode)) {
|
||||
if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
|
||||
dst_dir_u->bi_nlink--;
|
||||
src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
|
||||
}
|
||||
@ -340,22 +478,22 @@ int bch2_rename_trans(struct btree_trans *trans,
|
||||
src_dir_u->bi_mtime = now;
|
||||
src_dir_u->bi_ctime = now;
|
||||
|
||||
if (src_dir != dst_dir) {
|
||||
if (src_dir.inum != dst_dir.inum) {
|
||||
dst_dir_u->bi_mtime = now;
|
||||
dst_dir_u->bi_ctime = now;
|
||||
}
|
||||
|
||||
src_inode_u->bi_ctime = now;
|
||||
|
||||
if (dst_inode)
|
||||
if (dst_inum.inum)
|
||||
dst_inode_u->bi_ctime = now;
|
||||
|
||||
ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?:
|
||||
(src_dir != dst_dir
|
||||
(src_dir.inum != dst_dir.inum
|
||||
? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u)
|
||||
: 0 ) ?:
|
||||
bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?:
|
||||
(dst_inode
|
||||
(dst_inum.inum
|
||||
? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u)
|
||||
: 0 );
|
||||
err:
|
||||
|
@ -4,27 +4,33 @@
|
||||
|
||||
struct posix_acl;
|
||||
|
||||
int bch2_create_trans(struct btree_trans *, u64,
|
||||
#define BCH_CREATE_TMPFILE (1U << 0)
|
||||
#define BCH_CREATE_SUBVOL (1U << 1)
|
||||
#define BCH_CREATE_SNAPSHOT (1U << 2)
|
||||
#define BCH_CREATE_SNAPSHOT_RO (1U << 3)
|
||||
|
||||
int bch2_create_trans(struct btree_trans *, subvol_inum,
|
||||
struct bch_inode_unpacked *,
|
||||
struct bch_inode_unpacked *,
|
||||
const struct qstr *,
|
||||
uid_t, gid_t, umode_t, dev_t,
|
||||
struct posix_acl *,
|
||||
struct posix_acl *);
|
||||
struct posix_acl *,
|
||||
subvol_inum, unsigned);
|
||||
|
||||
int bch2_link_trans(struct btree_trans *, u64,
|
||||
u64, struct bch_inode_unpacked *,
|
||||
struct bch_inode_unpacked *,
|
||||
int bch2_link_trans(struct btree_trans *,
|
||||
subvol_inum, struct bch_inode_unpacked *,
|
||||
subvol_inum, struct bch_inode_unpacked *,
|
||||
const struct qstr *);
|
||||
|
||||
int bch2_unlink_trans(struct btree_trans *,
|
||||
u64, struct bch_inode_unpacked *,
|
||||
int bch2_unlink_trans(struct btree_trans *, subvol_inum,
|
||||
struct bch_inode_unpacked *,
|
||||
const struct qstr *);
|
||||
struct bch_inode_unpacked *,
|
||||
const struct qstr *, int);
|
||||
|
||||
int bch2_rename_trans(struct btree_trans *,
|
||||
u64, struct bch_inode_unpacked *,
|
||||
u64, struct bch_inode_unpacked *,
|
||||
subvol_inum, struct bch_inode_unpacked *,
|
||||
subvol_inum, struct bch_inode_unpacked *,
|
||||
struct bch_inode_unpacked *,
|
||||
struct bch_inode_unpacked *,
|
||||
const struct qstr *,
|
||||
|
@ -786,23 +786,35 @@ static void readpage_bio_extend(struct readpages_iter *iter,
|
||||
}
|
||||
}
|
||||
|
||||
static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bch_read_bio *rbio, u64 inum,
|
||||
static void bchfs_read(struct btree_trans *trans,
|
||||
struct bch_read_bio *rbio,
|
||||
subvol_inum inum,
|
||||
struct readpages_iter *readpages_iter)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_buf sk;
|
||||
int flags = BCH_READ_RETRY_IF_STALE|
|
||||
BCH_READ_MAY_PROMOTE;
|
||||
u32 snapshot;
|
||||
int ret = 0;
|
||||
|
||||
rbio->c = c;
|
||||
rbio->start_time = local_clock();
|
||||
rbio->subvol = inum.subvol;
|
||||
|
||||
bch2_bkey_buf_init(&sk);
|
||||
retry:
|
||||
bch2_trans_begin(trans);
|
||||
iter = (struct btree_iter) { NULL };
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
|
||||
SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_FILTER_SNAPSHOTS);
|
||||
while (1) {
|
||||
struct bkey_s_c k;
|
||||
unsigned bytes, sectors, offset_into_extent;
|
||||
@ -817,15 +829,15 @@ retry:
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_btree_iter_set_pos(iter,
|
||||
POS(inum, rbio->bio.bi_iter.bi_sector));
|
||||
bch2_btree_iter_set_pos(&iter,
|
||||
POS(inum.inum, rbio->bio.bi_iter.bi_sector));
|
||||
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
offset_into_extent = iter->pos.offset -
|
||||
offset_into_extent = iter.pos.offset -
|
||||
bkey_start_offset(k.k);
|
||||
sectors = k.k->size - offset_into_extent;
|
||||
|
||||
@ -855,7 +867,7 @@ retry:
|
||||
if (bkey_extent_is_allocation(k.k))
|
||||
bch2_add_page_sectors(&rbio->bio, k);
|
||||
|
||||
bch2_read_extent(trans, rbio, iter->pos,
|
||||
bch2_read_extent(trans, rbio, iter.pos,
|
||||
data_btree, k, offset_into_extent, flags);
|
||||
|
||||
if (flags & BCH_READ_LAST_FRAGMENT)
|
||||
@ -864,12 +876,14 @@ retry:
|
||||
swap(rbio->bio.bi_iter.bi_size, bytes);
|
||||
bio_advance(&rbio->bio, bytes);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
||||
if (ret) {
|
||||
bch_err_inum_ratelimited(c, inum,
|
||||
bch_err_inum_ratelimited(c, inum.inum,
|
||||
"read error %i from btree lookup", ret);
|
||||
rbio->bio.bi_status = BLK_STS_IOERR;
|
||||
bio_endio(&rbio->bio);
|
||||
@ -884,7 +898,6 @@ void bch2_readahead(struct readahead_control *ractl)
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct page *page;
|
||||
struct readpages_iter readpages_iter;
|
||||
int ret;
|
||||
@ -893,8 +906,6 @@ void bch2_readahead(struct readahead_control *ractl)
|
||||
BUG_ON(ret);
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS_MIN,
|
||||
BTREE_ITER_SLOTS);
|
||||
|
||||
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
|
||||
|
||||
@ -915,22 +926,20 @@ void bch2_readahead(struct readahead_control *ractl)
|
||||
rbio->bio.bi_end_io = bch2_readpages_end_io;
|
||||
BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
|
||||
|
||||
bchfs_read(&trans, &iter, rbio, inode->v.i_ino,
|
||||
bchfs_read(&trans, rbio, inode_inum(inode),
|
||||
&readpages_iter);
|
||||
}
|
||||
|
||||
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
bch2_trans_exit(&trans);
|
||||
kfree(readpages_iter.pages);
|
||||
}
|
||||
|
||||
static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
|
||||
u64 inum, struct page *page)
|
||||
subvol_inum inum, struct page *page)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
|
||||
bch2_page_state_create(page, __GFP_NOFAIL);
|
||||
|
||||
@ -940,12 +949,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
|
||||
BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS_MIN,
|
||||
BTREE_ITER_SLOTS);
|
||||
|
||||
bchfs_read(&trans, &iter, rbio, inum, NULL);
|
||||
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
bchfs_read(&trans, rbio, inum, NULL);
|
||||
bch2_trans_exit(&trans);
|
||||
}
|
||||
|
||||
@ -959,7 +963,7 @@ int bch2_readpage(struct file *file, struct page *page)
|
||||
rbio = rbio_init(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read), opts);
|
||||
rbio->bio.bi_end_io = bch2_readpages_end_io;
|
||||
|
||||
__bchfs_readpage(c, rbio, inode->v.i_ino, page);
|
||||
__bchfs_readpage(c, rbio, inode_inum(inode), page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -982,7 +986,7 @@ static int bch2_read_single_page(struct page *page,
|
||||
rbio->bio.bi_private = &done;
|
||||
rbio->bio.bi_end_io = bch2_read_single_page_end_io;
|
||||
|
||||
__bchfs_readpage(c, rbio, inode->v.i_ino, page);
|
||||
__bchfs_readpage(c, rbio, inode_inum(inode), page);
|
||||
wait_for_completion(&done);
|
||||
|
||||
ret = blk_status_to_errno(rbio->bio.bi_status);
|
||||
@ -1126,6 +1130,7 @@ static void bch2_writepage_io_alloc(struct bch_fs *c,
|
||||
op->nr_replicas = nr_replicas;
|
||||
op->res.nr_replicas = nr_replicas;
|
||||
op->write_point = writepoint_hashed(inode->ei_last_dirtied);
|
||||
op->subvol = inode->ei_subvol;
|
||||
op->pos = POS(inode->v.i_ino, sector);
|
||||
op->wbio.bio.bi_iter.bi_sector = sector;
|
||||
op->wbio.bio.bi_opf = wbc_to_write_flags(wbc);
|
||||
@ -1758,7 +1763,7 @@ start:
|
||||
if (iter->count)
|
||||
closure_get(&dio->cl);
|
||||
|
||||
bch2_read(c, rbio_init(bio, opts), inode->v.i_ino);
|
||||
bch2_read(c, rbio_init(bio, opts), inode_inum(inode));
|
||||
}
|
||||
|
||||
iter->count += shorten;
|
||||
@ -1813,6 +1818,50 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
|
||||
/* O_DIRECT writes */
|
||||
|
||||
static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum,
|
||||
u64 offset, u64 size,
|
||||
unsigned nr_replicas, bool compressed)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 end = offset + size;
|
||||
u32 snapshot;
|
||||
bool ret = true;
|
||||
int err;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
err = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_extents,
|
||||
SPOS(inum.inum, offset, snapshot),
|
||||
BTREE_ITER_SLOTS, k, err) {
|
||||
if (bkey_cmp(bkey_start_pos(k.k), POS(inum.inum, end)) >= 0)
|
||||
break;
|
||||
|
||||
if (k.k->p.snapshot != snapshot ||
|
||||
nr_replicas > bch2_bkey_replicas(c, k) ||
|
||||
(!compressed && bch2_bkey_sectors_compressed(k))) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
offset = iter.pos.offset;
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
err:
|
||||
if (err == -EINTR)
|
||||
goto retry;
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
return err ? false : ret;
|
||||
}
|
||||
|
||||
static void bch2_dio_write_loop_async(struct bch_write_op *);
|
||||
|
||||
static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
@ -1891,6 +1940,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
op_journal_seq_set(&dio->op, &inode->ei_journal_seq);
|
||||
dio->op.write_point = writepoint_hashed((unsigned long) current);
|
||||
dio->op.nr_replicas = dio->op.opts.data_replicas;
|
||||
dio->op.subvol = inode->ei_subvol;
|
||||
dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
|
||||
|
||||
if ((req->ki_flags & IOCB_DSYNC) &&
|
||||
@ -1901,8 +1951,8 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio),
|
||||
dio->op.opts.data_replicas, 0);
|
||||
if (unlikely(ret) &&
|
||||
!bch2_check_range_allocated(c, dio->op.pos,
|
||||
bio_sectors(bio),
|
||||
!bch2_check_range_allocated(c, inode_inum(inode),
|
||||
dio->op.pos.offset, bio_sectors(bio),
|
||||
dio->op.opts.data_replicas,
|
||||
dio->op.opts.compression != 0))
|
||||
goto err;
|
||||
@ -2146,9 +2196,9 @@ out:
|
||||
|
||||
/* truncate: */
|
||||
|
||||
static inline int range_has_data(struct bch_fs *c,
|
||||
struct bpos start,
|
||||
struct bpos end)
|
||||
static inline int range_has_data(struct bch_fs *c, u32 subvol,
|
||||
struct bpos start,
|
||||
struct bpos end)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
@ -2156,6 +2206,12 @@ static inline int range_has_data(struct bch_fs *c,
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(&trans, subvol, &start.snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_extents, start, 0, k, ret) {
|
||||
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
|
||||
@ -2166,7 +2222,11 @@ static inline int range_has_data(struct bch_fs *c,
|
||||
break;
|
||||
}
|
||||
}
|
||||
start = iter.pos;
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
||||
return bch2_trans_exit(&trans) ?: ret;
|
||||
}
|
||||
@ -2198,7 +2258,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
|
||||
* XXX: we're doing two index lookups when we end up reading the
|
||||
* page
|
||||
*/
|
||||
ret = range_has_data(c,
|
||||
ret = range_has_data(c, inode->ei_subvol,
|
||||
POS(inode->v.i_ino, index << PAGE_SECTOR_SHIFT),
|
||||
POS(inode->v.i_ino, (index + 1) << PAGE_SECTOR_SHIFT));
|
||||
if (ret <= 0)
|
||||
@ -2332,7 +2392,7 @@ int bch2_truncate(struct user_namespace *mnt_userns,
|
||||
inode_dio_wait(&inode->v);
|
||||
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
|
||||
|
||||
ret = bch2_inode_find_by_inum(c, inode->v.i_ino, &inode_u);
|
||||
ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -2390,7 +2450,7 @@ int bch2_truncate(struct user_namespace *mnt_userns,
|
||||
|
||||
truncate_setsize(&inode->v, iattr->ia_size);
|
||||
|
||||
ret = bch2_fpunch(c, inode->v.i_ino,
|
||||
ret = bch2_fpunch(c, inode_inum(inode),
|
||||
round_up(iattr->ia_size, block_bytes(c)) >> 9,
|
||||
U64_MAX, &inode->ei_journal_seq, &i_sectors_delta);
|
||||
i_sectors_acct(c, inode, NULL, i_sectors_delta);
|
||||
@ -2450,7 +2510,7 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len
|
||||
if (discard_start < discard_end) {
|
||||
s64 i_sectors_delta = 0;
|
||||
|
||||
ret = bch2_fpunch(c, inode->v.i_ino,
|
||||
ret = bch2_fpunch(c, inode_inum(inode),
|
||||
discard_start, discard_end,
|
||||
&inode->ei_journal_seq,
|
||||
&i_sectors_delta);
|
||||
@ -2529,7 +2589,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
|
||||
} else {
|
||||
s64 i_sectors_delta = 0;
|
||||
|
||||
ret = bch2_fpunch(c, inode->v.i_ino,
|
||||
ret = bch2_fpunch(c, inode_inum(inode),
|
||||
offset >> 9, (offset + len) >> 9,
|
||||
&inode->ei_journal_seq,
|
||||
&i_sectors_delta);
|
||||
@ -2556,6 +2616,18 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
|
||||
struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
|
||||
struct bpos atomic_end;
|
||||
unsigned trigger_flags = 0;
|
||||
u32 snapshot;
|
||||
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(&trans,
|
||||
inode->ei_subvol, &snapshot);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
bch2_btree_iter_set_snapshot(&src, snapshot);
|
||||
bch2_btree_iter_set_snapshot(&dst, snapshot);
|
||||
bch2_btree_iter_set_snapshot(&del, snapshot);
|
||||
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
@ -2676,9 +2748,17 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
|
||||
struct bkey_i_reservation reservation;
|
||||
struct bkey_s_c k;
|
||||
unsigned sectors;
|
||||
u32 snapshot;
|
||||
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(&trans,
|
||||
inode->ei_subvol, &snapshot);
|
||||
if (ret)
|
||||
goto bkey_err;
|
||||
|
||||
bch2_btree_iter_set_snapshot(&iter, snapshot);
|
||||
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
if ((ret = bkey_err(k)))
|
||||
goto bkey_err;
|
||||
@ -2725,7 +2805,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
|
||||
reservation.v.nr_replicas = disk_res.nr_replicas;
|
||||
}
|
||||
|
||||
ret = bch2_extent_update(&trans, &iter, &reservation.k_i,
|
||||
ret = bch2_extent_update(&trans, inode_inum(inode), &iter,
|
||||
&reservation.k_i,
|
||||
&disk_res, &inode->ei_journal_seq,
|
||||
0, &i_sectors_delta, true);
|
||||
i_sectors_acct(c, inode, "a_res, i_sectors_delta);
|
||||
@ -2927,8 +3008,8 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
|
||||
mark_range_unallocated(src, pos_src, pos_src + aligned_len);
|
||||
|
||||
ret = bch2_remap_range(c,
|
||||
POS(dst->v.i_ino, pos_dst >> 9),
|
||||
POS(src->v.i_ino, pos_src >> 9),
|
||||
inode_inum(dst), pos_dst >> 9,
|
||||
inode_inum(src), pos_src >> 9,
|
||||
aligned_len >> 9,
|
||||
&dst->ei_journal_seq,
|
||||
pos_dst + len, &i_sectors_delta);
|
||||
@ -3019,7 +3100,9 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
subvol_inum inum = inode_inum(inode);
|
||||
u64 isize, next_data = MAX_LFS_FILESIZE;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
isize = i_size_read(&inode->v);
|
||||
@ -3027,9 +3110,15 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
|
||||
return -ENXIO;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_extents,
|
||||
POS(inode->v.i_ino, offset >> 9), 0, k, ret) {
|
||||
SPOS(inode->v.i_ino, offset >> 9, snapshot), 0, k, ret) {
|
||||
if (k.k->p.inode != inode->v.i_ino) {
|
||||
break;
|
||||
} else if (bkey_extent_is_data(k.k)) {
|
||||
@ -3039,6 +3128,9 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
||||
ret = bch2_trans_exit(&trans) ?: ret;
|
||||
if (ret)
|
||||
@ -3115,7 +3207,9 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
subvol_inum inum = inode_inum(inode);
|
||||
u64 isize, next_hole = MAX_LFS_FILESIZE;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
isize = i_size_read(&inode->v);
|
||||
@ -3123,9 +3217,15 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
|
||||
return -ENXIO;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_extents,
|
||||
POS(inode->v.i_ino, offset >> 9),
|
||||
SPOS(inode->v.i_ino, offset >> 9, snapshot),
|
||||
BTREE_ITER_SLOTS, k, ret) {
|
||||
if (k.k->p.inode != inode->v.i_ino) {
|
||||
next_hole = bch2_seek_pagecache_hole(&inode->v,
|
||||
@ -3143,6 +3243,9 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
||||
ret = bch2_trans_exit(&trans) ?: ret;
|
||||
if (ret)
|
||||
|
@ -10,7 +10,11 @@
|
||||
#include "quota.h"
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/fsnotify.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/writeback.h>
|
||||
|
||||
#define FS_IOC_GOINGDOWN _IOR('X', 125, __u32)
|
||||
#define FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */
|
||||
@ -192,7 +196,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
|
||||
char *kname = NULL;
|
||||
struct qstr qstr;
|
||||
int ret = 0;
|
||||
u64 inum;
|
||||
subvol_inum inum;
|
||||
|
||||
kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL);
|
||||
if (!kname)
|
||||
@ -205,10 +209,8 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
|
||||
qstr.len = ret;
|
||||
qstr.name = kname;
|
||||
|
||||
ret = -ENOENT;
|
||||
inum = bch2_dirent_lookup(c, src->v.i_ino, &hash,
|
||||
&qstr);
|
||||
if (!inum)
|
||||
ret = bch2_dirent_lookup(c, inode_inum(src), &hash, &qstr, &inum);
|
||||
if (ret)
|
||||
goto err1;
|
||||
|
||||
vinode = bch2_vfs_inode_get(c, inum);
|
||||
@ -294,6 +296,154 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
|
||||
struct bch_ioctl_subvolume arg)
|
||||
{
|
||||
struct inode *dir;
|
||||
struct bch_inode_info *inode;
|
||||
struct user_namespace *s_user_ns;
|
||||
struct dentry *dst_dentry;
|
||||
struct path src_path, dst_path;
|
||||
int how = LOOKUP_FOLLOW;
|
||||
int error;
|
||||
subvol_inum snapshot_src = { 0 };
|
||||
unsigned lookup_flags = 0;
|
||||
unsigned create_flags = BCH_CREATE_SUBVOL;
|
||||
|
||||
if (arg.flags & ~(BCH_SUBVOL_SNAPSHOT_CREATE|
|
||||
BCH_SUBVOL_SNAPSHOT_RO))
|
||||
return -EINVAL;
|
||||
|
||||
if (!(arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
|
||||
(arg.src_ptr ||
|
||||
(arg.flags & BCH_SUBVOL_SNAPSHOT_RO)))
|
||||
return -EINVAL;
|
||||
|
||||
if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
|
||||
create_flags |= BCH_CREATE_SNAPSHOT;
|
||||
|
||||
if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)
|
||||
create_flags |= BCH_CREATE_SNAPSHOT_RO;
|
||||
|
||||
/* why do we need this lock? */
|
||||
down_read(&c->vfs_sb->s_umount);
|
||||
|
||||
if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
|
||||
sync_inodes_sb(c->vfs_sb);
|
||||
retry:
|
||||
if (arg.src_ptr) {
|
||||
error = user_path_at(arg.dirfd,
|
||||
(const char __user *)(unsigned long)arg.src_ptr,
|
||||
how, &src_path);
|
||||
if (error)
|
||||
goto err1;
|
||||
|
||||
if (src_path.dentry->d_sb->s_fs_info != c) {
|
||||
path_put(&src_path);
|
||||
error = -EXDEV;
|
||||
goto err1;
|
||||
}
|
||||
|
||||
snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode));
|
||||
}
|
||||
|
||||
dst_dentry = user_path_create(arg.dirfd,
|
||||
(const char __user *)(unsigned long)arg.dst_ptr,
|
||||
&dst_path, lookup_flags);
|
||||
error = PTR_ERR_OR_ZERO(dst_dentry);
|
||||
if (error)
|
||||
goto err2;
|
||||
|
||||
if (dst_dentry->d_sb->s_fs_info != c) {
|
||||
error = -EXDEV;
|
||||
goto err3;
|
||||
}
|
||||
|
||||
if (dst_dentry->d_inode) {
|
||||
error = -EEXIST;
|
||||
goto err3;
|
||||
}
|
||||
|
||||
dir = dst_path.dentry->d_inode;
|
||||
if (IS_DEADDIR(dir)) {
|
||||
error = -ENOENT;
|
||||
goto err3;
|
||||
}
|
||||
|
||||
s_user_ns = dir->i_sb->s_user_ns;
|
||||
if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
|
||||
!kgid_has_mapping(s_user_ns, current_fsgid())) {
|
||||
error = -EOVERFLOW;
|
||||
goto err3;
|
||||
}
|
||||
|
||||
error = inode_permission(file_mnt_user_ns(filp),
|
||||
dir, MAY_WRITE | MAY_EXEC);
|
||||
if (error)
|
||||
goto err3;
|
||||
|
||||
if (!IS_POSIXACL(dir))
|
||||
arg.mode &= ~current_umask();
|
||||
|
||||
error = security_path_mkdir(&dst_path, dst_dentry, arg.mode);
|
||||
if (error)
|
||||
goto err3;
|
||||
|
||||
if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
|
||||
!arg.src_ptr)
|
||||
snapshot_src.subvol = to_bch_ei(dir)->ei_inode.bi_subvol;
|
||||
|
||||
inode = __bch2_create(file_mnt_user_ns(filp), to_bch_ei(dir),
|
||||
dst_dentry, arg.mode|S_IFDIR,
|
||||
0, snapshot_src, create_flags);
|
||||
error = PTR_ERR_OR_ZERO(inode);
|
||||
if (error)
|
||||
goto err3;
|
||||
|
||||
d_instantiate(dst_dentry, &inode->v);
|
||||
fsnotify_mkdir(dir, dst_dentry);
|
||||
err3:
|
||||
done_path_create(&dst_path, dst_dentry);
|
||||
err2:
|
||||
if (arg.src_ptr)
|
||||
path_put(&src_path);
|
||||
|
||||
if (retry_estale(error, lookup_flags)) {
|
||||
lookup_flags |= LOOKUP_REVAL;
|
||||
goto retry;
|
||||
}
|
||||
err1:
|
||||
up_read(&c->vfs_sb->s_umount);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
|
||||
struct bch_ioctl_subvolume arg)
|
||||
{
|
||||
struct path path;
|
||||
int ret = 0;
|
||||
|
||||
if (arg.flags)
|
||||
return -EINVAL;
|
||||
|
||||
ret = user_path_at(arg.dirfd,
|
||||
(const char __user *)(unsigned long)arg.dst_ptr,
|
||||
LOOKUP_FOLLOW, &path);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (path.dentry->d_sb->s_fs_info != c) {
|
||||
path_put(&path);
|
||||
return -EXDEV;
|
||||
}
|
||||
|
||||
ret = __bch2_unlink(path.dentry->d_parent->d_inode, path.dentry, 1);
|
||||
path_put(&path);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
{
|
||||
struct bch_inode_info *inode = file_bch_inode(file);
|
||||
@ -324,6 +474,22 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
case FS_IOC_GOINGDOWN:
|
||||
return bch2_ioc_goingdown(c, (u32 __user *) arg);
|
||||
|
||||
case BCH_IOCTL_SUBVOLUME_CREATE: {
|
||||
struct bch_ioctl_subvolume i;
|
||||
|
||||
if (copy_from_user(&i, (void __user *) arg, sizeof(i)))
|
||||
return -EFAULT;
|
||||
return bch2_ioctl_subvolume_create(c, file, i);
|
||||
}
|
||||
|
||||
case BCH_IOCTL_SUBVOLUME_DESTROY: {
|
||||
struct bch_ioctl_subvolume i;
|
||||
|
||||
if (copy_from_user(&i, (void __user *) arg, sizeof(i)))
|
||||
return -EFAULT;
|
||||
return bch2_ioctl_subvolume_destroy(c, file, i);
|
||||
}
|
||||
|
||||
default:
|
||||
return bch2_fs_ioctl(c, cmd, (void __user *) arg);
|
||||
}
|
||||
|
161
libbcachefs/fs.c
161
libbcachefs/fs.c
@ -36,7 +36,7 @@
|
||||
|
||||
static struct kmem_cache *bch2_inode_cache;
|
||||
|
||||
static void bch2_vfs_inode_init(struct bch_fs *,
|
||||
static void bch2_vfs_inode_init(struct bch_fs *, subvol_inum,
|
||||
struct bch_inode_info *,
|
||||
struct bch_inode_unpacked *);
|
||||
|
||||
@ -149,7 +149,7 @@ int __must_check bch2_write_inode(struct bch_fs *c,
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_inode_peek(&trans, &iter, &inode_u, inode->v.i_ino,
|
||||
ret = bch2_inode_peek(&trans, &iter, &inode_u, inode_inum(inode),
|
||||
BTREE_ITER_INTENT) ?:
|
||||
(set ? set(inode, &inode_u, p) : 0) ?:
|
||||
bch2_inode_write(&trans, &iter, &inode_u) ?:
|
||||
@ -208,13 +208,42 @@ int bch2_fs_quota_transfer(struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum)
|
||||
static int bch2_iget5_test(struct inode *vinode, void *p)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||
subvol_inum *inum = p;
|
||||
|
||||
return inode->ei_subvol == inum->subvol &&
|
||||
inode->ei_inode.bi_inum == inum->inum;
|
||||
}
|
||||
|
||||
static int bch2_iget5_set(struct inode *vinode, void *p)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||
subvol_inum *inum = p;
|
||||
|
||||
inode->v.i_ino = inum->inum;
|
||||
inode->ei_subvol = inum->subvol;
|
||||
inode->ei_inode.bi_inum = inum->inum;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned bch2_inode_hash(subvol_inum inum)
|
||||
{
|
||||
return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
|
||||
}
|
||||
|
||||
struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
|
||||
{
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct bch_inode_info *inode;
|
||||
int ret;
|
||||
|
||||
inode = to_bch_ei(iget_locked(c->vfs_sb, inum));
|
||||
inode = to_bch_ei(iget5_locked(c->vfs_sb,
|
||||
bch2_inode_hash(inum),
|
||||
bch2_iget5_test,
|
||||
bch2_iget5_set,
|
||||
&inum));
|
||||
if (unlikely(!inode))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
if (!(inode->v.i_state & I_NEW))
|
||||
@ -226,26 +255,20 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
bch2_vfs_inode_init(c, inode, &inode_u);
|
||||
bch2_vfs_inode_init(c, inum, inode, &inode_u);
|
||||
|
||||
inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum);
|
||||
inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum.inum);
|
||||
|
||||
unlock_new_inode(&inode->v);
|
||||
|
||||
return &inode->v;
|
||||
}
|
||||
|
||||
static int inum_test(struct inode *inode, void *p)
|
||||
{
|
||||
unsigned long *ino = p;
|
||||
|
||||
return *ino == inode->i_ino;
|
||||
}
|
||||
|
||||
static struct bch_inode_info *
|
||||
struct bch_inode_info *
|
||||
__bch2_create(struct user_namespace *mnt_userns,
|
||||
struct bch_inode_info *dir, struct dentry *dentry,
|
||||
umode_t mode, dev_t rdev, bool tmpfile)
|
||||
umode_t mode, dev_t rdev, subvol_inum snapshot_src,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = dir->v.i_sb->s_fs_info;
|
||||
struct btree_trans trans;
|
||||
@ -253,6 +276,7 @@ __bch2_create(struct user_namespace *mnt_userns,
|
||||
struct bch_inode_info *inode, *old;
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct posix_acl *default_acl = NULL, *acl = NULL;
|
||||
subvol_inum inum;
|
||||
u64 journal_seq = 0;
|
||||
int ret;
|
||||
|
||||
@ -273,20 +297,23 @@ __bch2_create(struct user_namespace *mnt_userns,
|
||||
|
||||
bch2_inode_init_early(c, &inode_u);
|
||||
|
||||
if (!tmpfile)
|
||||
if (!(flags & BCH_CREATE_TMPFILE))
|
||||
mutex_lock(&dir->ei_update_lock);
|
||||
|
||||
bch2_trans_init(&trans, c, 8,
|
||||
2048 + (!tmpfile ? dentry->d_name.len : 0));
|
||||
2048 + (!(flags & BCH_CREATE_TMPFILE)
|
||||
? dentry->d_name.len : 0));
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_create_trans(&trans, dir->v.i_ino, &dir_u, &inode_u,
|
||||
!tmpfile ? &dentry->d_name : NULL,
|
||||
ret = bch2_create_trans(&trans,
|
||||
inode_inum(dir), &dir_u, &inode_u,
|
||||
!(flags & BCH_CREATE_TMPFILE)
|
||||
? &dentry->d_name : NULL,
|
||||
from_kuid(mnt_userns, current_fsuid()),
|
||||
from_kgid(mnt_userns, current_fsgid()),
|
||||
mode, rdev,
|
||||
default_acl, acl) ?:
|
||||
default_acl, acl, snapshot_src, flags) ?:
|
||||
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
|
||||
KEY_TYPE_QUOTA_PREALLOC);
|
||||
if (unlikely(ret))
|
||||
@ -302,14 +329,17 @@ err_before_quota:
|
||||
goto err_trans;
|
||||
}
|
||||
|
||||
if (!tmpfile) {
|
||||
if (!(flags & BCH_CREATE_TMPFILE)) {
|
||||
bch2_inode_update_after_write(c, dir, &dir_u,
|
||||
ATTR_MTIME|ATTR_CTIME);
|
||||
journal_seq_copy(c, dir, journal_seq);
|
||||
mutex_unlock(&dir->ei_update_lock);
|
||||
}
|
||||
|
||||
bch2_vfs_inode_init(c, inode, &inode_u);
|
||||
inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
|
||||
inum.inum = inode_u.bi_inum;
|
||||
|
||||
bch2_vfs_inode_init(c, inum, inode, &inode_u);
|
||||
journal_seq_copy(c, inode, journal_seq);
|
||||
|
||||
set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
|
||||
@ -322,8 +352,12 @@ err_before_quota:
|
||||
*/
|
||||
|
||||
inode->v.i_state |= I_CREATING;
|
||||
old = to_bch_ei(inode_insert5(&inode->v, inode->v.i_ino,
|
||||
inum_test, NULL, &inode->v.i_ino));
|
||||
|
||||
old = to_bch_ei(inode_insert5(&inode->v,
|
||||
bch2_inode_hash(inum),
|
||||
bch2_iget5_test,
|
||||
bch2_iget5_set,
|
||||
&inum));
|
||||
BUG_ON(!old);
|
||||
|
||||
if (unlikely(old != inode)) {
|
||||
@ -350,7 +384,7 @@ err:
|
||||
posix_acl_release(acl);
|
||||
return inode;
|
||||
err_trans:
|
||||
if (!tmpfile)
|
||||
if (!(flags & BCH_CREATE_TMPFILE))
|
||||
mutex_unlock(&dir->ei_update_lock);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
@ -369,12 +403,13 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
|
||||
struct bch_inode_info *dir = to_bch_ei(vdir);
|
||||
struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
|
||||
struct inode *vinode = NULL;
|
||||
u64 inum;
|
||||
subvol_inum inum = { .subvol = 1 };
|
||||
int ret;
|
||||
|
||||
inum = bch2_dirent_lookup(c, dir->v.i_ino, &hash,
|
||||
&dentry->d_name);
|
||||
ret = bch2_dirent_lookup(c, inode_inum(dir), &hash,
|
||||
&dentry->d_name, &inum);
|
||||
|
||||
if (inum)
|
||||
if (!ret)
|
||||
vinode = bch2_vfs_inode_get(c, inum);
|
||||
|
||||
return d_splice_alias(vinode, dentry);
|
||||
@ -385,7 +420,8 @@ static int bch2_mknod(struct user_namespace *mnt_userns,
|
||||
umode_t mode, dev_t rdev)
|
||||
{
|
||||
struct bch_inode_info *inode =
|
||||
__bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, rdev, false);
|
||||
__bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, rdev,
|
||||
(subvol_inum) { 0 }, 0);
|
||||
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
@ -415,8 +451,8 @@ static int __bch2_link(struct bch_fs *c,
|
||||
|
||||
ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, 0,
|
||||
bch2_link_trans(&trans,
|
||||
dir->v.i_ino,
|
||||
inode->v.i_ino, &dir_u, &inode_u,
|
||||
inode_inum(dir), &dir_u,
|
||||
inode_inum(inode), &inode_u,
|
||||
&dentry->d_name));
|
||||
|
||||
if (likely(!ret)) {
|
||||
@ -452,7 +488,8 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
|
||||
int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
|
||||
int deleting_snapshot)
|
||||
{
|
||||
struct bch_fs *c = vdir->i_sb->s_fs_info;
|
||||
struct bch_inode_info *dir = to_bch_ei(vdir);
|
||||
@ -467,8 +504,9 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
|
||||
ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
bch2_unlink_trans(&trans,
|
||||
dir->v.i_ino, &dir_u,
|
||||
&inode_u, &dentry->d_name));
|
||||
inode_inum(dir), &dir_u,
|
||||
&inode_u, &dentry->d_name,
|
||||
deleting_snapshot));
|
||||
|
||||
if (likely(!ret)) {
|
||||
BUG_ON(inode_u.bi_inum != inode->v.i_ino);
|
||||
@ -486,6 +524,11 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
|
||||
{
|
||||
return __bch2_unlink(vdir, dentry, -1);
|
||||
}
|
||||
|
||||
static int bch2_symlink(struct user_namespace *mnt_userns,
|
||||
struct inode *vdir, struct dentry *dentry,
|
||||
const char *symname)
|
||||
@ -494,7 +537,8 @@ static int bch2_symlink(struct user_namespace *mnt_userns,
|
||||
struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
|
||||
int ret;
|
||||
|
||||
inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0, true);
|
||||
inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
|
||||
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
|
||||
if (unlikely(IS_ERR(inode)))
|
||||
return PTR_ERR(inode);
|
||||
|
||||
@ -587,8 +631,8 @@ static int bch2_rename2(struct user_namespace *mnt_userns,
|
||||
|
||||
ret = __bch2_trans_do(&trans, NULL, &journal_seq, 0,
|
||||
bch2_rename_trans(&trans,
|
||||
src_dir->v.i_ino, &src_dir_u,
|
||||
dst_dir->v.i_ino, &dst_dir_u,
|
||||
inode_inum(src_dir), &src_dir_u,
|
||||
inode_inum(dst_dir), &dst_dir_u,
|
||||
&src_inode_u,
|
||||
&dst_inode_u,
|
||||
&src_dentry->d_name,
|
||||
@ -711,7 +755,7 @@ retry:
|
||||
kfree(acl);
|
||||
acl = NULL;
|
||||
|
||||
ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode->v.i_ino,
|
||||
ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode),
|
||||
BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
goto btree_err;
|
||||
@ -719,7 +763,8 @@ retry:
|
||||
bch2_setattr_copy(mnt_userns, inode, &inode_u, attr);
|
||||
|
||||
if (attr->ia_valid & ATTR_MODE) {
|
||||
ret = bch2_acl_chmod(&trans, &inode_u, inode_u.bi_mode, &acl);
|
||||
ret = bch2_acl_chmod(&trans, inode_inum(inode), &inode_u,
|
||||
inode_u.bi_mode, &acl);
|
||||
if (ret)
|
||||
goto btree_err;
|
||||
}
|
||||
@ -810,7 +855,8 @@ static int bch2_tmpfile(struct user_namespace *mnt_userns,
|
||||
struct inode *vdir, struct dentry *dentry, umode_t mode)
|
||||
{
|
||||
struct bch_inode_info *inode =
|
||||
__bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, 0, true);
|
||||
__bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, 0,
|
||||
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
|
||||
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
@ -885,6 +931,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
|
||||
struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
|
||||
unsigned offset_into_extent, sectors;
|
||||
bool have_extent = false;
|
||||
u32 snapshot;
|
||||
int ret = 0;
|
||||
|
||||
ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
|
||||
@ -894,15 +941,21 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
|
||||
if (start + len < start)
|
||||
return -EINVAL;
|
||||
|
||||
start >>= 9;
|
||||
|
||||
bch2_bkey_buf_init(&cur);
|
||||
bch2_bkey_buf_init(&prev);
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
|
||||
POS(ei->v.i_ino, start >> 9), 0);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(&trans, ei->ei_subvol, &snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
|
||||
SPOS(ei->v.i_ino, start, snapshot), 0);
|
||||
|
||||
while ((k = bch2_btree_iter_peek(&iter)).k &&
|
||||
!(ret = bkey_err(k)) &&
|
||||
bkey_cmp(iter.pos, end) < 0) {
|
||||
@ -951,7 +1004,9 @@ retry:
|
||||
bch2_btree_iter_set_pos(&iter,
|
||||
POS(iter.pos.inode, iter.pos.offset + sectors));
|
||||
}
|
||||
|
||||
start = iter.pos.offset;
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
||||
@ -959,7 +1014,6 @@ retry:
|
||||
ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
|
||||
FIEMAP_EXTENT_LAST);
|
||||
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
ret = bch2_trans_exit(&trans) ?: ret;
|
||||
bch2_bkey_buf_exit(&cur, c);
|
||||
bch2_bkey_buf_exit(&prev, c);
|
||||
@ -996,7 +1050,7 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
|
||||
if (!dir_emit_dots(file, ctx))
|
||||
return 0;
|
||||
|
||||
return bch2_readdir(c, inode->v.i_ino, ctx);
|
||||
return bch2_readdir(c, inode_inum(inode), ctx);
|
||||
}
|
||||
|
||||
static const struct file_operations bch_file_operations = {
|
||||
@ -1096,6 +1150,7 @@ static const struct address_space_operations bch_address_space_operations = {
|
||||
.error_remove_page = generic_error_remove_page,
|
||||
};
|
||||
|
||||
#if 0
|
||||
static struct inode *bch2_nfs_get_inode(struct super_block *sb,
|
||||
u64 ino, u32 generation)
|
||||
{
|
||||
@ -1129,14 +1184,15 @@ static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *fid,
|
||||
return generic_fh_to_parent(sb, fid, fh_len, fh_type,
|
||||
bch2_nfs_get_inode);
|
||||
}
|
||||
#endif
|
||||
|
||||
static const struct export_operations bch_export_ops = {
|
||||
.fh_to_dentry = bch2_fh_to_dentry,
|
||||
.fh_to_parent = bch2_fh_to_parent,
|
||||
//.fh_to_dentry = bch2_fh_to_dentry,
|
||||
//.fh_to_parent = bch2_fh_to_parent,
|
||||
//.get_parent = bch2_get_parent,
|
||||
};
|
||||
|
||||
static void bch2_vfs_inode_init(struct bch_fs *c,
|
||||
static void bch2_vfs_inode_init(struct bch_fs *c, subvol_inum inum,
|
||||
struct bch_inode_info *inode,
|
||||
struct bch_inode_unpacked *bi)
|
||||
{
|
||||
@ -1152,6 +1208,7 @@ static void bch2_vfs_inode_init(struct bch_fs *c,
|
||||
inode->ei_journal_seq = 0;
|
||||
inode->ei_quota_reserved = 0;
|
||||
inode->ei_qid = bch_qid(bi);
|
||||
inode->ei_subvol = inum.subvol;
|
||||
|
||||
inode->v.i_mapping->a_ops = &bch_address_space_operations;
|
||||
|
||||
@ -1249,7 +1306,7 @@ static void bch2_evict_inode(struct inode *vinode)
|
||||
KEY_TYPE_QUOTA_WARN);
|
||||
bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
|
||||
KEY_TYPE_QUOTA_WARN);
|
||||
bch2_inode_rm(c, inode->v.i_ino, true);
|
||||
bch2_inode_rm(c, inode_inum(inode), true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1593,7 +1650,7 @@ got_sb:
|
||||
sb->s_flags |= SB_POSIXACL;
|
||||
#endif
|
||||
|
||||
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO);
|
||||
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
|
||||
if (IS_ERR(vinode)) {
|
||||
bch_err(c, "error mounting: error getting root inode %i",
|
||||
(int) PTR_ERR(vinode));
|
||||
|
@ -45,10 +45,20 @@ struct bch_inode_info {
|
||||
struct mutex ei_quota_lock;
|
||||
struct bch_qid ei_qid;
|
||||
|
||||
u32 ei_subvol;
|
||||
|
||||
/* copy of inode in btree: */
|
||||
struct bch_inode_unpacked ei_inode;
|
||||
};
|
||||
|
||||
static inline subvol_inum inode_inum(struct bch_inode_info *inode)
|
||||
{
|
||||
return (subvol_inum) {
|
||||
.subvol = inode->ei_subvol,
|
||||
.inum = inode->ei_inode.bi_inum,
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* Set if we've gotten a btree error for this inode, and thus the vfs inode and
|
||||
* btree inode may be inconsistent:
|
||||
@ -135,6 +145,10 @@ struct bch_inode_unpacked;
|
||||
|
||||
#ifndef NO_BCACHEFS_FS
|
||||
|
||||
struct bch_inode_info *
|
||||
__bch2_create(struct user_namespace *, struct bch_inode_info *,
|
||||
struct dentry *, umode_t, dev_t, subvol_inum, unsigned);
|
||||
|
||||
int bch2_fs_quota_transfer(struct bch_fs *,
|
||||
struct bch_inode_info *,
|
||||
struct bch_qid,
|
||||
@ -154,7 +168,7 @@ static inline int bch2_set_projid(struct bch_fs *c,
|
||||
KEY_TYPE_QUOTA_PREALLOC);
|
||||
}
|
||||
|
||||
struct inode *bch2_vfs_inode_get(struct bch_fs *, u64);
|
||||
struct inode *bch2_vfs_inode_get(struct bch_fs *, subvol_inum);
|
||||
|
||||
/* returns 0 if we want to do the update, or error is passed up */
|
||||
typedef int (*inode_set_fn)(struct bch_inode_info *,
|
||||
@ -170,6 +184,7 @@ int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
|
||||
int bch2_setattr_nonsize(struct user_namespace *,
|
||||
struct bch_inode_info *,
|
||||
struct iattr *);
|
||||
int __bch2_unlink(struct inode *, struct dentry *, int);
|
||||
|
||||
void bch2_vfs_exit(void);
|
||||
int bch2_vfs_init(void);
|
||||
|
1376
libbcachefs/fsck.c
1376
libbcachefs/fsck.c
File diff suppressed because it is too large
Load Diff
@ -6,8 +6,10 @@
|
||||
#include "btree_update.h"
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "extent_update.h"
|
||||
#include "inode.h"
|
||||
#include "str_hash.h"
|
||||
#include "subvolume.h"
|
||||
#include "varint.h"
|
||||
|
||||
#include <linux/random.h>
|
||||
@ -295,15 +297,21 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
|
||||
int bch2_inode_peek(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bch_inode_unpacked *inode,
|
||||
u64 inum, unsigned flags)
|
||||
subvol_inum inum, unsigned flags)
|
||||
{
|
||||
struct bkey_s_c k;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
if (trans->c->opts.inodes_use_key_cache)
|
||||
if (0 && trans->c->opts.inodes_use_key_cache)
|
||||
flags |= BTREE_ITER_CACHED;
|
||||
|
||||
bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, inum), flags);
|
||||
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_trans_iter_init(trans, iter, BTREE_ID_inodes,
|
||||
SPOS(0, inum.inum, snapshot), flags);
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
@ -340,8 +348,8 @@ int bch2_inode_write(struct btree_trans *trans,
|
||||
|
||||
const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
|
||||
struct bch_inode_unpacked unpacked;
|
||||
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
|
||||
struct bch_inode_unpacked unpacked;
|
||||
|
||||
if (k.k->p.inode)
|
||||
return "nonzero k.p.inode";
|
||||
@ -368,6 +376,9 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
unpacked.bi_nlink != 0)
|
||||
return "flagged as unlinked but bi_nlink != 0";
|
||||
|
||||
if (unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode))
|
||||
return "subvolume root but not a directory";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -482,6 +493,9 @@ static inline u32 bkey_generation(struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This just finds an empty slot:
|
||||
*/
|
||||
int bch2_inode_create(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bch_inode_unpacked *inode_u,
|
||||
@ -581,19 +595,77 @@ found_slot:
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached)
|
||||
static int bch2_inode_delete_keys(struct btree_trans *trans,
|
||||
subvol_inum inum, enum btree_id id)
|
||||
{
|
||||
u64 offset = 0;
|
||||
int ret = 0;
|
||||
|
||||
while (!ret || ret == -EINTR) {
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_i delete;
|
||||
u32 snapshot;
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, id,
|
||||
SPOS(inum.inum, offset, snapshot),
|
||||
BTREE_ITER_INTENT);
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
|
||||
if (!k.k || iter.pos.inode != inum.inum) {
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bkey_init(&delete.k);
|
||||
delete.k.p = iter.pos;
|
||||
|
||||
if (btree_node_type_is_extents(iter.btree_id)) {
|
||||
unsigned max_sectors =
|
||||
min_t(u64, U64_MAX - iter.pos.offset,
|
||||
KEY_SIZE_MAX & (~0 << trans->c->block_bits));
|
||||
|
||||
/* create the biggest key we can */
|
||||
bch2_key_resize(&delete.k, max_sectors);
|
||||
|
||||
ret = bch2_extent_trim_atomic(trans, &iter, &delete);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &delete, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
err:
|
||||
offset = iter.pos.offset;
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_inode_rm(struct bch_fs *c, subvol_inum inum, bool cached)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter = { NULL };
|
||||
struct bkey_i_inode_generation delete;
|
||||
struct bpos start = POS(inode_nr, 0);
|
||||
struct bpos end = POS(inode_nr + 1, 0);
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct bkey_s_c k;
|
||||
unsigned iter_flags = BTREE_ITER_INTENT;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
if (cached && c->opts.inodes_use_key_cache)
|
||||
if (0 && cached && c->opts.inodes_use_key_cache)
|
||||
iter_flags |= BTREE_ITER_CACHED;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 1024);
|
||||
@ -606,19 +678,20 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached)
|
||||
* XXX: the dirent could ideally would delete whiteouts when they're no
|
||||
* longer needed
|
||||
*/
|
||||
ret = bch2_btree_delete_range_trans(&trans, BTREE_ID_extents,
|
||||
start, end, NULL) ?:
|
||||
bch2_btree_delete_range_trans(&trans, BTREE_ID_xattrs,
|
||||
start, end, NULL) ?:
|
||||
bch2_btree_delete_range_trans(&trans, BTREE_ID_dirents,
|
||||
start, end, NULL);
|
||||
ret = bch2_inode_delete_keys(&trans, inum, BTREE_ID_extents) ?:
|
||||
bch2_inode_delete_keys(&trans, inum, BTREE_ID_xattrs) ?:
|
||||
bch2_inode_delete_keys(&trans, inum, BTREE_ID_dirents);
|
||||
if (ret)
|
||||
goto err;
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes,
|
||||
POS(0, inode_nr), iter_flags);
|
||||
SPOS(0, inum.inum, snapshot), iter_flags);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
|
||||
ret = bkey_err(k);
|
||||
@ -628,13 +701,20 @@ retry:
|
||||
if (k.k->type != KEY_TYPE_inode) {
|
||||
bch2_fs_inconsistent(trans.c,
|
||||
"inode %llu not found when deleting",
|
||||
inode_nr);
|
||||
inum.inum);
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u);
|
||||
|
||||
/* Subvolume root? */
|
||||
if (inode_u.bi_subvol) {
|
||||
ret = bch2_subvolume_delete(&trans, inode_u.bi_subvol, -1);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
bkey_inode_generation_init(&delete.k_i);
|
||||
delete.k.p = iter.pos;
|
||||
delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
|
||||
@ -651,20 +731,22 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
|
||||
static int bch2_inode_find_by_inum_trans(struct btree_trans *trans,
|
||||
subvol_inum inum,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
struct btree_iter iter = { NULL };
|
||||
struct btree_iter iter;
|
||||
int ret;
|
||||
|
||||
ret = bch2_inode_peek(trans, &iter, inode, inode_nr, 0);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
ret = bch2_inode_peek(trans, &iter, inode, inum, 0);
|
||||
if (!ret)
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
|
||||
int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
return bch2_trans_do(c, NULL, NULL, 0,
|
||||
bch2_inode_find_by_inum_trans(&trans, inode_nr, inode));
|
||||
bch2_inode_find_by_inum_trans(&trans, inum, inode));
|
||||
}
|
||||
|
@ -58,7 +58,7 @@ int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
|
||||
void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);
|
||||
|
||||
int bch2_inode_peek(struct btree_trans *, struct btree_iter *,
|
||||
struct bch_inode_unpacked *, u64, unsigned);
|
||||
struct bch_inode_unpacked *, subvol_inum, unsigned);
|
||||
int bch2_inode_write(struct btree_trans *, struct btree_iter *,
|
||||
struct bch_inode_unpacked *);
|
||||
|
||||
@ -74,9 +74,10 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
|
||||
int bch2_inode_create(struct btree_trans *, struct btree_iter *,
|
||||
struct bch_inode_unpacked *, u32, u64);
|
||||
|
||||
int bch2_inode_rm(struct bch_fs *, u64, bool);
|
||||
int bch2_inode_rm(struct bch_fs *, subvol_inum, bool);
|
||||
|
||||
int bch2_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *);
|
||||
int bch2_inode_find_by_inum(struct bch_fs *, subvol_inum,
|
||||
struct bch_inode_unpacked *);
|
||||
|
||||
static inline struct bch_io_opts bch2_inode_opts_get(struct bch_inode_unpacked *inode)
|
||||
{
|
||||
|
128
libbcachefs/io.c
128
libbcachefs/io.c
@ -27,6 +27,7 @@
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "rebalance.h"
|
||||
#include "subvolume.h"
|
||||
#include "super.h"
|
||||
#include "super-io.h"
|
||||
|
||||
@ -220,7 +221,8 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
|
||||
: 0;
|
||||
|
||||
if (!*usage_increasing &&
|
||||
(new_replicas > bch2_bkey_replicas(c, old) ||
|
||||
(new->k.p.snapshot != old.k->p.snapshot ||
|
||||
new_replicas > bch2_bkey_replicas(c, old) ||
|
||||
(!new_compressed && bch2_bkey_sectors_compressed(old))))
|
||||
*usage_increasing = true;
|
||||
|
||||
@ -256,6 +258,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
int bch2_extent_update(struct btree_trans *trans,
|
||||
subvol_inum inum,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_i *k,
|
||||
struct disk_reservation *disk_res,
|
||||
@ -314,8 +317,8 @@ int bch2_extent_update(struct btree_trans *trans,
|
||||
struct btree_iter inode_iter;
|
||||
struct bch_inode_unpacked inode_u;
|
||||
|
||||
ret = bch2_inode_peek(trans, &inode_iter, &inode_u,
|
||||
k->k.p.inode, BTREE_ITER_INTENT);
|
||||
ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inum,
|
||||
BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -371,22 +374,37 @@ int bch2_extent_update(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns -EINTR if we had to drop locks:
|
||||
*/
|
||||
int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bpos end, u64 *journal_seq,
|
||||
s64 *i_sectors_delta)
|
||||
subvol_inum inum, u64 end,
|
||||
u64 *journal_seq, s64 *i_sectors_delta)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
|
||||
struct bpos end_pos = POS(inum.inum, end);
|
||||
struct bkey_s_c k;
|
||||
int ret = 0, ret2 = 0;
|
||||
u32 snapshot;
|
||||
|
||||
while ((bch2_trans_begin(trans),
|
||||
(k = bch2_btree_iter_peek(iter)).k) &&
|
||||
bkey_cmp(iter->pos, end) < 0) {
|
||||
while (1) {
|
||||
struct disk_reservation disk_res =
|
||||
bch2_disk_reservation_init(c, 0);
|
||||
struct bkey_i delete;
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
goto btree_err;
|
||||
|
||||
bch2_btree_iter_set_snapshot(iter, snapshot);
|
||||
|
||||
k = bch2_btree_iter_peek(iter);
|
||||
if (bkey_cmp(iter->pos, end_pos) >= 0)
|
||||
break;
|
||||
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto btree_err;
|
||||
@ -396,9 +414,9 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
|
||||
|
||||
/* create the biggest key we can */
|
||||
bch2_key_resize(&delete.k, max_sectors);
|
||||
bch2_cut_back(end, &delete);
|
||||
bch2_cut_back(end_pos, &delete);
|
||||
|
||||
ret = bch2_extent_update(trans, iter, &delete,
|
||||
ret = bch2_extent_update(trans, inum, iter, &delete,
|
||||
&disk_res, journal_seq,
|
||||
0, i_sectors_delta, false);
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
@ -411,36 +429,31 @@ btree_err:
|
||||
break;
|
||||
}
|
||||
|
||||
if (bkey_cmp(iter->pos, end) > 0) {
|
||||
bch2_btree_iter_set_pos(iter, end);
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
}
|
||||
if (bkey_cmp(iter->pos, end_pos) > 0)
|
||||
bch2_btree_iter_set_pos(iter, end_pos);
|
||||
|
||||
return ret ?: ret2;
|
||||
}
|
||||
|
||||
int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end,
|
||||
int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
|
||||
u64 *journal_seq, s64 *i_sectors_delta)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
|
||||
POS(inum, start),
|
||||
BTREE_ITER_INTENT);
|
||||
POS(inum.inum, start),
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
ret = bch2_fpunch_at(&trans, &iter, POS(inum, end),
|
||||
ret = bch2_fpunch_at(&trans, &iter, inum, end,
|
||||
journal_seq, i_sectors_delta);
|
||||
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (ret == -EINTR)
|
||||
ret = 0;
|
||||
|
||||
return ret;
|
||||
return ret == -EINTR ? 0 : ret;
|
||||
}
|
||||
|
||||
int bch2_write_index_default(struct bch_write_op *op)
|
||||
@ -451,40 +464,51 @@ int bch2_write_index_default(struct bch_write_op *op)
|
||||
struct bkey_i *k = bch2_keylist_front(keys);
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
subvol_inum inum = {
|
||||
.subvol = op->subvol,
|
||||
.inum = k->k.p.inode,
|
||||
};
|
||||
int ret;
|
||||
|
||||
BUG_ON(!inum.subvol);
|
||||
|
||||
bch2_bkey_buf_init(&sk);
|
||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
|
||||
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
|
||||
bkey_start_pos(&k->k),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
|
||||
do {
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
k = bch2_keylist_front(keys);
|
||||
bch2_bkey_buf_copy(&sk, c, k);
|
||||
|
||||
k->k.p.snapshot = iter.snapshot;
|
||||
ret = bch2_subvolume_get_snapshot(&trans, inum.subvol,
|
||||
&sk.k->k.p.snapshot);
|
||||
if (ret == -EINTR)
|
||||
continue;
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bch2_bkey_buf_realloc(&sk, c, k->k.u64s);
|
||||
bkey_copy(sk.k, k);
|
||||
bch2_cut_front(iter.pos, sk.k);
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
|
||||
bkey_start_pos(&sk.k->k),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
|
||||
ret = bch2_extent_update(&trans, &iter, sk.k,
|
||||
ret = bch2_extent_update(&trans, inum, &iter, sk.k,
|
||||
&op->res, op_journal_seq(op),
|
||||
op->new_i_size, &op->i_sectors_delta,
|
||||
op->flags & BCH_WRITE_CHECK_ENOSPC);
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
if (ret == -EINTR)
|
||||
continue;
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (bkey_cmp(iter.pos, k->k.p) >= 0)
|
||||
bch2_keylist_pop_front(keys);
|
||||
bch2_keylist_pop_front(&op->insert_keys);
|
||||
else
|
||||
bch2_cut_front(iter.pos, k);
|
||||
} while (!bch2_keylist_empty(keys));
|
||||
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
bch2_trans_exit(&trans);
|
||||
bch2_bkey_buf_exit(&sk, c);
|
||||
|
||||
@ -1645,7 +1669,7 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
|
||||
}
|
||||
|
||||
static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
|
||||
struct bvec_iter bvec_iter, u64 inode,
|
||||
struct bvec_iter bvec_iter,
|
||||
struct bch_io_failures *failed,
|
||||
unsigned flags)
|
||||
{
|
||||
@ -1709,7 +1733,10 @@ static void bch2_rbio_retry(struct work_struct *work)
|
||||
struct bch_fs *c = rbio->c;
|
||||
struct bvec_iter iter = rbio->bvec_iter;
|
||||
unsigned flags = rbio->flags;
|
||||
u64 inode = rbio->read_pos.inode;
|
||||
subvol_inum inum = {
|
||||
.subvol = rbio->subvol,
|
||||
.inum = rbio->read_pos.inode,
|
||||
};
|
||||
struct bch_io_failures failed = { .nr = 0 };
|
||||
|
||||
trace_read_retry(&rbio->bio);
|
||||
@ -1725,12 +1752,12 @@ static void bch2_rbio_retry(struct work_struct *work)
|
||||
flags &= ~BCH_READ_MAY_PROMOTE;
|
||||
|
||||
if (flags & BCH_READ_NODECODE) {
|
||||
bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
|
||||
bch2_read_retry_nodecode(c, rbio, iter, &failed, flags);
|
||||
} else {
|
||||
flags &= ~BCH_READ_LAST_FRAGMENT;
|
||||
flags |= BCH_READ_MUST_CLONE;
|
||||
|
||||
__bch2_read(c, rbio, iter, inode, &failed, flags);
|
||||
__bch2_read(c, rbio, iter, inum, &failed, flags);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1804,7 +1831,8 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
|
||||
if (!bch2_bkey_narrow_crcs(new, new_crc))
|
||||
goto out;
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, new, 0);
|
||||
ret = bch2_trans_update(trans, &iter, new,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
@ -2172,6 +2200,7 @@ get_bio:
|
||||
/* XXX: only initialize this if needed */
|
||||
rbio->devs_have = bch2_bkey_devs(k);
|
||||
rbio->pick = pick;
|
||||
rbio->subvol = orig->subvol;
|
||||
rbio->read_pos = read_pos;
|
||||
rbio->data_btree = data_btree;
|
||||
rbio->data_pos = data_pos;
|
||||
@ -2274,25 +2303,31 @@ out_read_done:
|
||||
}
|
||||
|
||||
void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
|
||||
struct bvec_iter bvec_iter, u64 inode,
|
||||
struct bvec_iter bvec_iter, subvol_inum inum,
|
||||
struct bch_io_failures *failed, unsigned flags)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_buf sk;
|
||||
struct bkey_s_c k;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
BUG_ON(flags & BCH_READ_NODECODE);
|
||||
|
||||
bch2_bkey_buf_init(&sk);
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
|
||||
POS(inode, bvec_iter.bi_sector),
|
||||
BTREE_ITER_SLOTS);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
iter = (struct btree_iter) { NULL };
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
|
||||
SPOS(inum.inum, bvec_iter.bi_sector, snapshot),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_FILTER_SNAPSHOTS);
|
||||
while (1) {
|
||||
unsigned bytes, sectors, offset_into_extent;
|
||||
enum btree_id data_btree = BTREE_ID_extents;
|
||||
@ -2307,7 +2342,7 @@ retry:
|
||||
}
|
||||
|
||||
bch2_btree_iter_set_pos(&iter,
|
||||
POS(inode, bvec_iter.bi_sector));
|
||||
POS(inum.inum, bvec_iter.bi_sector));
|
||||
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
@ -2357,16 +2392,17 @@ retry:
|
||||
swap(bvec_iter.bi_size, bytes);
|
||||
bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID)
|
||||
goto retry;
|
||||
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
bch2_trans_exit(&trans);
|
||||
bch2_bkey_buf_exit(&sk, c);
|
||||
|
||||
if (ret) {
|
||||
bch_err_inum_ratelimited(c, inode,
|
||||
bch_err_inum_ratelimited(c, inum.inum,
|
||||
"read error %i from btree lookup", ret);
|
||||
rbio->bio.bi_status = BLK_STS_IOERR;
|
||||
bch2_rbio_done(rbio);
|
||||
|
@ -63,12 +63,13 @@ static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
|
||||
|
||||
int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_i *, bool *, bool *, s64 *, s64 *);
|
||||
int bch2_extent_update(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_i *, struct disk_reservation *,
|
||||
u64 *, u64, s64 *, bool);
|
||||
int bch2_extent_update(struct btree_trans *, subvol_inum,
|
||||
struct btree_iter *, struct bkey_i *,
|
||||
struct disk_reservation *, u64 *, u64, s64 *, bool);
|
||||
|
||||
int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
|
||||
struct bpos, u64 *, s64 *);
|
||||
int bch2_fpunch(struct bch_fs *c, u64, u64, u64, u64 *, s64 *);
|
||||
subvol_inum, u64, u64 *, s64 *);
|
||||
int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, u64 *, s64 *);
|
||||
|
||||
int bch2_write_index_default(struct bch_write_op *);
|
||||
|
||||
@ -90,6 +91,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
|
||||
op->devs_have.nr = 0;
|
||||
op->target = 0;
|
||||
op->opts = opts;
|
||||
op->subvol = 0;
|
||||
op->pos = POS_MAX;
|
||||
op->version = ZERO_VERSION;
|
||||
op->write_point = (struct write_point_specifier) { 0 };
|
||||
@ -157,10 +159,10 @@ static inline void bch2_read_extent(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
|
||||
u64, struct bch_io_failures *, unsigned flags);
|
||||
subvol_inum, struct bch_io_failures *, unsigned flags);
|
||||
|
||||
static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
|
||||
u64 inode)
|
||||
subvol_inum inum)
|
||||
{
|
||||
struct bch_io_failures failed = { .nr = 0 };
|
||||
|
||||
@ -168,8 +170,9 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
|
||||
|
||||
rbio->c = c;
|
||||
rbio->start_time = local_clock();
|
||||
rbio->subvol = inum.subvol;
|
||||
|
||||
__bch2_read(c, rbio, rbio->bio.bi_iter, inode, &failed,
|
||||
__bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed,
|
||||
BCH_READ_RETRY_IF_STALE|
|
||||
BCH_READ_MAY_PROMOTE|
|
||||
BCH_READ_USER_MAPPED);
|
||||
|
@ -62,6 +62,7 @@ struct bch_read_bio {
|
||||
/*
|
||||
* pos we read from - different from data_pos for indirect extents:
|
||||
*/
|
||||
u32 subvol;
|
||||
struct bpos read_pos;
|
||||
|
||||
/*
|
||||
@ -122,6 +123,7 @@ struct bch_write_op {
|
||||
u16 nonce;
|
||||
struct bch_io_opts opts;
|
||||
|
||||
u32 subvol;
|
||||
struct bpos pos;
|
||||
struct bversion version;
|
||||
|
||||
|
@ -48,7 +48,8 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
|
||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
||||
|
||||
bch2_trans_iter_init(&trans, &iter, btree_id, POS_MIN,
|
||||
BTREE_ITER_PREFETCH);
|
||||
BTREE_ITER_PREFETCH|
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
|
||||
while ((k = bch2_btree_iter_peek(&iter)).k &&
|
||||
!(ret = bkey_err(k))) {
|
||||
@ -74,7 +75,8 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
|
||||
bch2_btree_iter_set_pos(&iter, bkey_start_pos(&sk.k->k));
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_trans_update(&trans, &iter, sk.k, 0) ?:
|
||||
bch2_trans_update(&trans, &iter, sk.k,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(&trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "journal_reclaim.h"
|
||||
#include "move.h"
|
||||
#include "replicas.h"
|
||||
#include "subvolume.h"
|
||||
#include "super-io.h"
|
||||
#include "keylist.h"
|
||||
|
||||
@ -53,6 +54,81 @@ struct moving_context {
|
||||
wait_queue_head_t wait;
|
||||
};
|
||||
|
||||
static int insert_snapshot_whiteouts(struct btree_trans *trans,
|
||||
enum btree_id id,
|
||||
struct bpos old_pos,
|
||||
struct bpos new_pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter, update_iter;
|
||||
struct bkey_s_c k;
|
||||
struct snapshots_seen s;
|
||||
int ret;
|
||||
|
||||
if (!btree_type_has_snapshots(id))
|
||||
return 0;
|
||||
|
||||
snapshots_seen_init(&s);
|
||||
|
||||
if (!bkey_cmp(old_pos, new_pos))
|
||||
return 0;
|
||||
|
||||
if (!snapshot_t(c, old_pos.snapshot)->children[0])
|
||||
return 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, id, old_pos,
|
||||
BTREE_ITER_NOT_EXTENTS|
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
while (1) {
|
||||
next:
|
||||
k = bch2_btree_iter_prev(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (bkey_cmp(old_pos, k.k->p))
|
||||
break;
|
||||
|
||||
if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) {
|
||||
struct bkey_i *update;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < s.nr; i++)
|
||||
if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, s.d[i]))
|
||||
goto next;
|
||||
|
||||
update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(update);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bkey_init(&update->k);
|
||||
update->k.p = new_pos;
|
||||
update->k.p.snapshot = k.k->p.snapshot;
|
||||
|
||||
bch2_trans_iter_init(trans, &update_iter, id, update->k.p,
|
||||
BTREE_ITER_NOT_EXTENTS|
|
||||
BTREE_ITER_ALL_SNAPSHOTS|
|
||||
BTREE_ITER_INTENT);
|
||||
ret = bch2_btree_iter_traverse(&update_iter) ?:
|
||||
bch2_trans_update(trans, &update_iter, update,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||
bch2_trans_iter_exit(trans, &update_iter);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = snapshots_seen_add(c, &s, k.k->p.snapshot);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
kfree(s.d);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_migrate_index_update(struct bch_write_op *op)
|
||||
{
|
||||
struct bch_fs *c = op->c;
|
||||
@ -166,7 +242,10 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
|
||||
|
||||
next_pos = insert->k.p;
|
||||
|
||||
ret = bch2_trans_update(&trans, &iter, insert, 0) ?:
|
||||
ret = insert_snapshot_whiteouts(&trans, m->btree_id,
|
||||
k.k->p, insert->k.p) ?:
|
||||
bch2_trans_update(&trans, &iter, insert,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(&trans, &op->res,
|
||||
op_journal_seq(op),
|
||||
BTREE_INSERT_NOFAIL|
|
||||
@ -581,7 +660,8 @@ static int __bch2_move_data(struct bch_fs *c,
|
||||
stats->pos = start;
|
||||
|
||||
bch2_trans_iter_init(&trans, &iter, btree_id, start,
|
||||
BTREE_ITER_PREFETCH);
|
||||
BTREE_ITER_PREFETCH|
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
|
||||
if (rate)
|
||||
bch2_ratelimit_reset(rate);
|
||||
|
@ -63,7 +63,7 @@ const char * const bch2_member_states[] = {
|
||||
|
||||
#undef x
|
||||
|
||||
const char * const bch2_d_types[DT_MAX] = {
|
||||
const char * const bch2_d_types[BCH_DT_MAX] = {
|
||||
[DT_UNKNOWN] = "unknown",
|
||||
[DT_FIFO] = "fifo",
|
||||
[DT_CHR] = "chr",
|
||||
@ -73,6 +73,7 @@ const char * const bch2_d_types[DT_MAX] = {
|
||||
[DT_LNK] = "lnk",
|
||||
[DT_SOCK] = "sock",
|
||||
[DT_WHT] = "whiteout",
|
||||
[DT_SUBVOL] = "subvol",
|
||||
};
|
||||
|
||||
void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src)
|
||||
|
@ -215,19 +215,19 @@ enum opt_type {
|
||||
BCH_SB_POSIX_ACL, true, \
|
||||
NULL, "Enable POSIX acls") \
|
||||
x(usrquota, u8, \
|
||||
OPT_FORMAT|OPT_MOUNT, \
|
||||
0, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_USRQUOTA, false, \
|
||||
NO_SB_OPT, false, \
|
||||
NULL, "Enable user quotas") \
|
||||
x(grpquota, u8, \
|
||||
OPT_FORMAT|OPT_MOUNT, \
|
||||
0, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_GRPQUOTA, false, \
|
||||
NO_SB_OPT, false, \
|
||||
NULL, "Enable group quotas") \
|
||||
x(prjquota, u8, \
|
||||
OPT_FORMAT|OPT_MOUNT, \
|
||||
0, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_PRJQUOTA, false, \
|
||||
NO_SB_OPT, false, \
|
||||
NULL, "Enable project quotas") \
|
||||
x(degraded, u8, \
|
||||
OPT_MOUNT, \
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "quota.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "subvolume.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/sort.h>
|
||||
@ -961,6 +962,81 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_fs_initialize_subvolumes(struct bch_fs *c)
|
||||
{
|
||||
struct bkey_i_snapshot root_snapshot;
|
||||
struct bkey_i_subvolume root_volume;
|
||||
int ret;
|
||||
|
||||
bkey_snapshot_init(&root_snapshot.k_i);
|
||||
root_snapshot.k.p.offset = U32_MAX;
|
||||
root_snapshot.v.flags = 0;
|
||||
root_snapshot.v.parent = 0;
|
||||
root_snapshot.v.subvol = BCACHEFS_ROOT_SUBVOL;
|
||||
root_snapshot.v.pad = 0;
|
||||
SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_snapshots,
|
||||
&root_snapshot.k_i,
|
||||
NULL, NULL, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
||||
bkey_subvolume_init(&root_volume.k_i);
|
||||
root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
|
||||
root_volume.v.flags = 0;
|
||||
root_volume.v.snapshot = cpu_to_le32(U32_MAX);
|
||||
root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_subvolumes,
|
||||
&root_volume.k_i,
|
||||
NULL, NULL, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bch_inode_unpacked inode;
|
||||
struct bkey_inode_buf *packed;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
|
||||
POS(0, BCACHEFS_ROOT_INO), 0);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_inode) {
|
||||
bch_err(c, "root inode not found");
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &inode);
|
||||
BUG_ON(ret);
|
||||
|
||||
inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
|
||||
|
||||
packed = bch2_trans_kmalloc(trans, sizeof(*packed));
|
||||
ret = PTR_ERR_OR_ZERO(packed);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_inode_pack(c, packed, &inode);
|
||||
ret = bch2_trans_update(trans, &iter, &packed->inode.k_i, 0);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_fs_recovery(struct bch_fs *c)
|
||||
{
|
||||
const char *err = "cannot allocate memory";
|
||||
@ -1017,11 +1093,12 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
c->opts.version_upgrade = true;
|
||||
c->opts.fsck = true;
|
||||
c->opts.fix_errors = FSCK_OPT_YES;
|
||||
}
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_btree_ptr_sectors_written) {
|
||||
} else if (c->sb.version < bcachefs_metadata_version_btree_ptr_sectors_written) {
|
||||
bch_info(c, "version prior to btree_ptr_sectors_written, upgrade required");
|
||||
c->opts.version_upgrade = true;
|
||||
} else if (c->sb.version < bcachefs_metadata_version_snapshot) {
|
||||
bch_info(c, "filesystem version is prior to snapshot field - upgrading");
|
||||
c->opts.version_upgrade = true;
|
||||
}
|
||||
|
||||
ret = bch2_blacklist_table_initialize(c);
|
||||
@ -1190,6 +1267,29 @@ use_clean:
|
||||
bch_verbose(c, "alloc write done");
|
||||
}
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_snapshot) {
|
||||
err = "error creating root snapshot node";
|
||||
ret = bch2_fs_initialize_subvolumes(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch_verbose(c, "reading snapshots table");
|
||||
err = "error reading snapshots table";
|
||||
ret = bch2_fs_snapshots_start(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "reading snapshots done");
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_snapshot) {
|
||||
/* set bi_subvol on root inode */
|
||||
err = "error upgrade root inode for subvolumes";
|
||||
ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
|
||||
bch2_fs_upgrade_for_subvolumes(&trans));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (c->opts.fsck) {
|
||||
bch_info(c, "starting fsck");
|
||||
err = "error in fsck";
|
||||
@ -1350,9 +1450,22 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
err = "error creating root snapshot node";
|
||||
ret = bch2_fs_initialize_subvolumes(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch_verbose(c, "reading snapshots table");
|
||||
err = "error reading snapshots table";
|
||||
ret = bch2_fs_snapshots_start(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "reading snapshots done");
|
||||
|
||||
bch2_inode_init(c, &root_inode, 0, 0,
|
||||
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
|
||||
root_inode.bi_inum = BCACHEFS_ROOT_INO;
|
||||
root_inode.bi_inum = BCACHEFS_ROOT_INO;
|
||||
root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
|
||||
bch2_inode_pack(c, &packed_inode, &root_inode);
|
||||
packed_inode.inode.k.p.snapshot = U32_MAX;
|
||||
|
||||
@ -1367,11 +1480,12 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
|
||||
err = "error creating lost+found";
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
bch2_create_trans(&trans, BCACHEFS_ROOT_INO,
|
||||
bch2_create_trans(&trans,
|
||||
BCACHEFS_ROOT_SUBVOL_INUM,
|
||||
&root_inode, &lostfound_inode,
|
||||
&lostfound,
|
||||
0, 0, S_IFDIR|0700, 0,
|
||||
NULL, NULL));
|
||||
NULL, NULL, (subvol_inum) { 0 }, 0));
|
||||
if (ret) {
|
||||
bch_err(c, "error creating lost+found");
|
||||
goto err;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "inode.h"
|
||||
#include "io.h"
|
||||
#include "reflink.h"
|
||||
#include "subvolume.h"
|
||||
|
||||
#include <linux/sched/signal.h>
|
||||
|
||||
@ -197,7 +198,8 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
|
||||
}
|
||||
|
||||
s64 bch2_remap_range(struct bch_fs *c,
|
||||
struct bpos dst_start, struct bpos src_start,
|
||||
subvol_inum dst_inum, u64 dst_offset,
|
||||
subvol_inum src_inum, u64 src_offset,
|
||||
u64 remap_sectors, u64 *journal_seq,
|
||||
u64 new_i_size, s64 *i_sectors_delta)
|
||||
{
|
||||
@ -205,9 +207,12 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
struct btree_iter dst_iter, src_iter;
|
||||
struct bkey_s_c src_k;
|
||||
struct bkey_buf new_dst, new_src;
|
||||
struct bpos dst_start = POS(dst_inum.inum, dst_offset);
|
||||
struct bpos src_start = POS(src_inum.inum, src_offset);
|
||||
struct bpos dst_end = dst_start, src_end = src_start;
|
||||
struct bpos src_want;
|
||||
u64 dst_done;
|
||||
u32 dst_snapshot, src_snapshot;
|
||||
int ret = 0, ret2 = 0;
|
||||
|
||||
if (!percpu_ref_tryget(&c->writes))
|
||||
@ -238,6 +243,20 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
break;
|
||||
}
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(&trans, src_inum.subvol,
|
||||
&src_snapshot);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
bch2_btree_iter_set_snapshot(&src_iter, src_snapshot);
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(&trans, dst_inum.subvol,
|
||||
&dst_snapshot);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
bch2_btree_iter_set_snapshot(&dst_iter, dst_snapshot);
|
||||
|
||||
dst_done = dst_iter.pos.offset - dst_start.offset;
|
||||
src_want = POS(src_start.inode, src_start.offset + dst_done);
|
||||
bch2_btree_iter_set_pos(&src_iter, src_want);
|
||||
@ -248,11 +267,11 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
continue;
|
||||
|
||||
if (bkey_cmp(src_want, src_iter.pos) < 0) {
|
||||
ret = bch2_fpunch_at(&trans, &dst_iter,
|
||||
bpos_min(dst_end,
|
||||
POS(dst_iter.pos.inode, dst_iter.pos.offset +
|
||||
src_iter.pos.offset - src_want.offset)),
|
||||
journal_seq, i_sectors_delta);
|
||||
ret = bch2_fpunch_at(&trans, &dst_iter, dst_inum,
|
||||
min(dst_end.offset,
|
||||
dst_iter.pos.offset +
|
||||
src_iter.pos.offset - src_want.offset),
|
||||
journal_seq, i_sectors_delta);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -289,8 +308,9 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
bch2_key_resize(&new_dst.k->k,
|
||||
min(src_k.k->p.offset - src_want.offset,
|
||||
dst_end.offset - dst_iter.pos.offset));
|
||||
ret = bch2_extent_update(&trans, &dst_iter, new_dst.k,
|
||||
&disk_res, journal_seq,
|
||||
|
||||
ret = bch2_extent_update(&trans, dst_inum, &dst_iter,
|
||||
new_dst.k, &disk_res, journal_seq,
|
||||
new_i_size, i_sectors_delta,
|
||||
true);
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
@ -311,7 +331,7 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret2 = bch2_inode_peek(&trans, &inode_iter, &inode_u,
|
||||
dst_start.inode, BTREE_ITER_INTENT);
|
||||
dst_inum, BTREE_ITER_INTENT);
|
||||
|
||||
if (!ret2 &&
|
||||
inode_u.bi_size < new_i_size) {
|
||||
|
@ -57,7 +57,7 @@ static inline __le64 *bkey_refcount(struct bkey_i *k)
|
||||
}
|
||||
}
|
||||
|
||||
s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos,
|
||||
u64, u64 *, u64, s64 *);
|
||||
s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64,
|
||||
subvol_inum, u64, u64, u64 *, u64, s64 *);
|
||||
|
||||
#endif /* _BCACHEFS_REFLINK_H */
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "error.h"
|
||||
#include "inode.h"
|
||||
#include "siphash.h"
|
||||
#include "subvolume.h"
|
||||
#include "super.h"
|
||||
|
||||
#include <linux/crc32c.h>
|
||||
@ -144,16 +145,21 @@ bch2_hash_lookup(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
u64 inode, const void *key,
|
||||
subvol_inum inum, const void *key,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bkey_s_c k;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for_each_btree_key(trans, *iter, desc.btree_id,
|
||||
POS(inode, desc.hash_key(info, key)),
|
||||
SPOS(inum.inum, desc.hash_key(info, key), snapshot),
|
||||
BTREE_ITER_SLOTS|flags, k, ret) {
|
||||
if (iter->pos.inode != inode)
|
||||
if (iter->pos.inode != inum.inum)
|
||||
break;
|
||||
|
||||
if (k.k->type == desc.key_type) {
|
||||
@ -176,15 +182,20 @@ bch2_hash_hole(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
u64 inode, const void *key)
|
||||
subvol_inum inum, const void *key)
|
||||
{
|
||||
struct bkey_s_c k;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for_each_btree_key(trans, *iter, desc.btree_id,
|
||||
POS(inode, desc.hash_key(info, key)),
|
||||
SPOS(inum.inum, desc.hash_key(info, key), snapshot),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
|
||||
if (iter->pos.inode != inode)
|
||||
if (iter->pos.inode != inum.inum)
|
||||
break;
|
||||
|
||||
if (k.k->type != desc.key_type)
|
||||
@ -229,17 +240,25 @@ static __always_inline
|
||||
int bch2_hash_set(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
u64 inode, struct bkey_i *insert, int flags)
|
||||
subvol_inum inum,
|
||||
struct bkey_i *insert, int flags)
|
||||
{
|
||||
struct btree_iter iter, slot = { NULL };
|
||||
struct bkey_s_c k;
|
||||
bool found = false;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for_each_btree_key(trans, iter, desc.btree_id,
|
||||
POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))),
|
||||
SPOS(inum.inum,
|
||||
desc.hash_bkey(info, bkey_i_to_s_c(insert)),
|
||||
snapshot),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
|
||||
if (iter.pos.inode != inode)
|
||||
if (iter.pos.inode != inum.inum)
|
||||
break;
|
||||
|
||||
if (k.k->type == desc.key_type) {
|
||||
@ -288,7 +307,8 @@ static __always_inline
|
||||
int bch2_hash_delete_at(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
struct btree_iter *iter)
|
||||
struct btree_iter *iter,
|
||||
unsigned update_flags)
|
||||
{
|
||||
struct bkey_i *delete;
|
||||
int ret;
|
||||
@ -306,24 +326,24 @@ int bch2_hash_delete_at(struct btree_trans *trans,
|
||||
delete->k.p = iter->pos;
|
||||
delete->k.type = ret ? KEY_TYPE_hash_whiteout : KEY_TYPE_deleted;
|
||||
|
||||
return bch2_trans_update(trans, iter, delete, 0);
|
||||
return bch2_trans_update(trans, iter, delete, update_flags);
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
int bch2_hash_delete(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
u64 inode, const void *key)
|
||||
subvol_inum inum, const void *key)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
int ret;
|
||||
|
||||
ret = bch2_hash_lookup(trans, &iter, desc, info, inode, key,
|
||||
ret = bch2_hash_lookup(trans, &iter, desc, info, inum, key,
|
||||
BTREE_ITER_INTENT);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = bch2_hash_delete_at(trans, desc, info, &iter);
|
||||
ret = bch2_hash_delete_at(trans, desc, info, &iter, 0);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
981
libbcachefs/subvolume.c
Normal file
981
libbcachefs/subvolume.c
Normal file
@ -0,0 +1,981 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_key_cache.h"
|
||||
#include "btree_update.h"
|
||||
#include "error.h"
|
||||
#include "subvolume.h"
|
||||
|
||||
/* Snapshot tree: */
|
||||
|
||||
static void bch2_delete_dead_snapshots_work(struct work_struct *);
|
||||
static void bch2_delete_dead_snapshots(struct bch_fs *);
|
||||
|
||||
void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
|
||||
|
||||
pr_buf(out, "is_subvol %llu deleted %llu parent %u children %u %u subvol %u",
|
||||
BCH_SNAPSHOT_SUBVOL(s.v),
|
||||
BCH_SNAPSHOT_DELETED(s.v),
|
||||
le32_to_cpu(s.v->parent),
|
||||
le32_to_cpu(s.v->children[0]),
|
||||
le32_to_cpu(s.v->children[1]),
|
||||
le32_to_cpu(s.v->subvol));
|
||||
}
|
||||
|
||||
const char *bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_snapshot s;
|
||||
u32 i, id;
|
||||
|
||||
if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0 ||
|
||||
bkey_cmp(k.k->p, POS(0, 1)) < 0)
|
||||
return "bad pos";
|
||||
|
||||
if (bkey_val_bytes(k.k) != sizeof(struct bch_snapshot))
|
||||
return "bad val size";
|
||||
|
||||
s = bkey_s_c_to_snapshot(k);
|
||||
|
||||
id = le32_to_cpu(s.v->parent);
|
||||
if (id && id <= k.k->p.offset)
|
||||
return "bad parent node";
|
||||
|
||||
if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]))
|
||||
return "children not normalized";
|
||||
|
||||
if (s.v->children[0] &&
|
||||
s.v->children[0] == s.v->children[1])
|
||||
return "duplicate child nodes";
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
id = le32_to_cpu(s.v->children[i]);
|
||||
|
||||
if (id >= k.k->p.offset)
|
||||
return "bad child node";
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int bch2_mark_snapshot(struct bch_fs *c,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
u64 journal_seq, unsigned flags)
|
||||
{
|
||||
struct snapshot_t *t;
|
||||
|
||||
t = genradix_ptr_alloc(&c->snapshots,
|
||||
U32_MAX - new.k->p.offset,
|
||||
GFP_KERNEL);
|
||||
if (!t)
|
||||
return -ENOMEM;
|
||||
|
||||
if (new.k->type == KEY_TYPE_snapshot) {
|
||||
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
|
||||
|
||||
t->parent = le32_to_cpu(s.v->parent);
|
||||
t->children[0] = le32_to_cpu(s.v->children[0]);
|
||||
t->children[1] = le32_to_cpu(s.v->children[1]);
|
||||
t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
|
||||
} else {
|
||||
t->parent = 0;
|
||||
t->children[0] = 0;
|
||||
t->children[1] = 0;
|
||||
t->subvol = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int subvol_lookup(struct btree_trans *trans, unsigned id, struct bch_subvolume *s)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes, POS(0, id), 0);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k) ?: k.k->type == KEY_TYPE_subvolume ? 0 : -ENOENT;
|
||||
|
||||
if (!ret)
|
||||
*s = *bkey_s_c_to_subvolume(k).v;
|
||||
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int snapshot_lookup(struct btree_trans *trans, u32 id,
|
||||
struct bch_snapshot *s)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS(0, id),
|
||||
BTREE_ITER_WITH_UPDATES);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k) ?: k.k->type == KEY_TYPE_snapshot ? 0 : -ENOENT;
|
||||
|
||||
if (!ret)
|
||||
*s = *bkey_s_c_to_snapshot(k).v;
|
||||
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int snapshot_live(struct btree_trans *trans, u32 id)
|
||||
{
|
||||
struct bch_snapshot v;
|
||||
int ret;
|
||||
|
||||
if (!id)
|
||||
return 0;
|
||||
|
||||
ret = lockrestart_do(trans, snapshot_lookup(trans, id, &v));
|
||||
if (ret == -ENOENT)
|
||||
bch_err(trans->c, "snapshot node %u not found", id);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return !BCH_SNAPSHOT_DELETED(&v);
|
||||
}
|
||||
|
||||
static int bch2_snapshots_set_equiv(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_snapshot snap;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
for_each_btree_key(trans, iter, BTREE_ID_snapshots,
|
||||
POS_MIN, 0, k, ret) {
|
||||
u32 id = k.k->p.offset, child[2];
|
||||
unsigned nr_live = 0, live_idx;
|
||||
|
||||
if (k.k->type != KEY_TYPE_snapshot)
|
||||
continue;
|
||||
|
||||
snap = bkey_s_c_to_snapshot(k);
|
||||
child[0] = le32_to_cpu(snap.v->children[0]);
|
||||
child[1] = le32_to_cpu(snap.v->children[1]);
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
ret = snapshot_live(trans, child[i]);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
if (ret)
|
||||
live_idx = i;
|
||||
nr_live += ret;
|
||||
}
|
||||
|
||||
snapshot_t(c, id)->equiv = nr_live == 1
|
||||
? snapshot_t(c, child[live_idx])->equiv
|
||||
: id;
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (ret)
|
||||
bch_err(c, "error walking snapshots: %i", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* fsck: */
|
||||
static int bch2_snapshot_check(struct btree_trans *trans,
|
||||
struct bkey_s_c_snapshot s)
|
||||
{
|
||||
struct bch_subvolume subvol;
|
||||
struct bch_snapshot v;
|
||||
u32 i, id;
|
||||
int ret;
|
||||
|
||||
id = le32_to_cpu(s.v->subvol);
|
||||
ret = lockrestart_do(trans, subvol_lookup(trans, id, &subvol));
|
||||
if (ret == -ENOENT)
|
||||
bch_err(trans->c, "snapshot node %llu has nonexistent subvolume %u",
|
||||
s.k->p.offset, id);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (BCH_SNAPSHOT_SUBVOL(s.v) != (le32_to_cpu(subvol.snapshot) == s.k->p.offset)) {
|
||||
bch_err(trans->c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
|
||||
s.k->p.offset);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
id = le32_to_cpu(s.v->parent);
|
||||
if (id) {
|
||||
ret = lockrestart_do(trans, snapshot_lookup(trans, id, &v));
|
||||
if (ret == -ENOENT)
|
||||
bch_err(trans->c, "snapshot node %llu has nonexistent parent %u",
|
||||
s.k->p.offset, id);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (le32_to_cpu(v.children[0]) != s.k->p.offset &&
|
||||
le32_to_cpu(v.children[1]) != s.k->p.offset) {
|
||||
bch_err(trans->c, "snapshot parent %u missing pointer to child %llu",
|
||||
id, s.k->p.offset);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 2 && s.v->children[i]; i++) {
|
||||
id = le32_to_cpu(s.v->children[i]);
|
||||
|
||||
ret = lockrestart_do(trans, snapshot_lookup(trans, id, &v));
|
||||
if (ret == -ENOENT)
|
||||
bch_err(trans->c, "snapshot node %llu has nonexistent child %u",
|
||||
s.k->p.offset, id);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (le32_to_cpu(v.parent) != s.k->p.offset) {
|
||||
bch_err(trans->c, "snapshot child %u has wrong parent (got %u should be %llu)",
|
||||
id, le32_to_cpu(v.parent), s.k->p.offset);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_fs_snapshots_check(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bch_snapshot s;
|
||||
unsigned id;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
|
||||
POS_MIN, 0, k, ret) {
|
||||
if (k.k->type != KEY_TYPE_snapshot)
|
||||
continue;
|
||||
|
||||
ret = bch2_snapshot_check(&trans, bkey_s_c_to_snapshot(k));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
if (ret) {
|
||||
bch_err(c, "error %i checking snapshots", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_subvolumes,
|
||||
POS_MIN, 0, k, ret) {
|
||||
if (k.k->type != KEY_TYPE_subvolume)
|
||||
continue;
|
||||
again_2:
|
||||
id = le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot);
|
||||
ret = snapshot_lookup(&trans, id, &s);
|
||||
|
||||
if (ret == -EINTR) {
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
goto again_2;
|
||||
} else if (ret == -ENOENT)
|
||||
bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
|
||||
k.k->p.offset, id);
|
||||
else if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
err:
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_fs_snapshots_exit(struct bch_fs *c)
|
||||
{
|
||||
genradix_free(&c->snapshots);
|
||||
}
|
||||
|
||||
int bch2_fs_snapshots_start(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
bool have_deleted = false;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
|
||||
POS_MIN, 0, k, ret) {
|
||||
if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0)
|
||||
break;
|
||||
|
||||
if (k.k->type != KEY_TYPE_snapshot) {
|
||||
bch_err(c, "found wrong key type %u in snapshot node table",
|
||||
k.k->type);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v))
|
||||
have_deleted = true;
|
||||
|
||||
ret = bch2_mark_snapshot(c, bkey_s_c_null, k, 0, 0);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_snapshots_set_equiv(&trans);
|
||||
if (ret)
|
||||
goto err;
|
||||
err:
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (!ret && have_deleted) {
|
||||
bch_info(c, "restarting deletion of dead snapshots");
|
||||
if (c->opts.fsck) {
|
||||
bch2_delete_dead_snapshots_work(&c->snapshot_delete_work);
|
||||
} else {
|
||||
bch2_delete_dead_snapshots(c);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark a snapshot as deleted, for future cleanup:
|
||||
*/
|
||||
static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_i_snapshot *s;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS(0, id),
|
||||
BTREE_ITER_INTENT);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_snapshot) {
|
||||
bch2_fs_inconsistent(trans->c, "missing snapshot %u", id);
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* already deleted? */
|
||||
if (BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v))
|
||||
goto err;
|
||||
|
||||
s = bch2_trans_kmalloc(trans, sizeof(*s));
|
||||
ret = PTR_ERR_OR_ZERO(s);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bkey_reassemble(&s->k_i, k);
|
||||
|
||||
SET_BCH_SNAPSHOT_DELETED(&s->v, true);
|
||||
ret = bch2_trans_update(trans, &iter, &s->k_i, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
|
||||
{
|
||||
struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_snapshot s;
|
||||
struct bkey_i_snapshot *parent;
|
||||
u32 parent_id;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS(0, id),
|
||||
BTREE_ITER_INTENT);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_snapshot) {
|
||||
bch2_fs_inconsistent(trans->c, "missing snapshot %u", id);
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
s = bkey_s_c_to_snapshot(k);
|
||||
|
||||
BUG_ON(!BCH_SNAPSHOT_DELETED(s.v));
|
||||
parent_id = le32_to_cpu(s.v->parent);
|
||||
|
||||
if (parent_id) {
|
||||
bch2_trans_iter_init(trans, &p_iter, BTREE_ID_snapshots,
|
||||
POS(0, parent_id),
|
||||
BTREE_ITER_INTENT);
|
||||
k = bch2_btree_iter_peek_slot(&p_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_snapshot) {
|
||||
bch2_fs_inconsistent(trans->c, "missing snapshot %u", parent_id);
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
parent = bch2_trans_kmalloc(trans, sizeof(*parent));
|
||||
ret = PTR_ERR_OR_ZERO(parent);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bkey_reassemble(&parent->k_i, k);
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
if (le32_to_cpu(parent->v.children[i]) == id)
|
||||
break;
|
||||
|
||||
if (i == 2)
|
||||
bch_err(trans->c, "snapshot %u missing child pointer to %u",
|
||||
parent_id, id);
|
||||
else
|
||||
parent->v.children[i] = 0;
|
||||
|
||||
if (le32_to_cpu(parent->v.children[0]) <
|
||||
le32_to_cpu(parent->v.children[1]))
|
||||
swap(parent->v.children[0],
|
||||
parent->v.children[1]);
|
||||
|
||||
ret = bch2_trans_update(trans, &p_iter, &parent->k_i, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_btree_delete_at(trans, &iter, 0);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &p_iter);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
|
||||
u32 *new_snapids,
|
||||
u32 *snapshot_subvols,
|
||||
unsigned nr_snapids)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_i_snapshot *n;
|
||||
struct bkey_s_c k;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
|
||||
POS_MIN, BTREE_ITER_INTENT);
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < nr_snapids; i++) {
|
||||
k = bch2_btree_iter_prev_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!k.k || !k.k->p.offset) {
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
}
|
||||
|
||||
n = bch2_trans_kmalloc(trans, sizeof(*n));
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bkey_snapshot_init(&n->k_i);
|
||||
n->k.p = iter.pos;
|
||||
n->v.flags = 0;
|
||||
n->v.parent = cpu_to_le32(parent);
|
||||
n->v.subvol = cpu_to_le32(snapshot_subvols[i]);
|
||||
n->v.pad = 0;
|
||||
SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
|
||||
|
||||
bch2_trans_update(trans, &iter, &n->k_i, 0);
|
||||
|
||||
ret = bch2_mark_snapshot(trans->c, bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0, 0);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
new_snapids[i] = iter.pos.offset;
|
||||
}
|
||||
|
||||
if (parent) {
|
||||
bch2_btree_iter_set_pos(&iter, POS(0, parent));
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_snapshot) {
|
||||
bch_err(trans->c, "snapshot %u not found", parent);
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
n = bch2_trans_kmalloc(trans, sizeof(*n));
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bkey_reassemble(&n->k_i, k);
|
||||
|
||||
if (n->v.children[0] || n->v.children[1]) {
|
||||
bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
n->v.children[0] = cpu_to_le32(new_snapids[0]);
|
||||
n->v.children[1] = cpu_to_le32(new_snapids[1]);
|
||||
SET_BCH_SNAPSHOT_SUBVOL(&n->v, false);
|
||||
bch2_trans_update(trans, &iter, &n->k_i, 0);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* List of snapshot IDs that are being deleted: */
|
||||
struct snapshot_id_list {
|
||||
u32 nr;
|
||||
u32 size;
|
||||
u32 *d;
|
||||
};
|
||||
|
||||
static bool snapshot_list_has_id(struct snapshot_id_list *s, u32 id)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < s->nr; i++)
|
||||
if (id == s->d[i])
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int snapshot_id_add(struct snapshot_id_list *s, u32 id)
|
||||
{
|
||||
BUG_ON(snapshot_list_has_id(s, id));
|
||||
|
||||
if (s->nr == s->size) {
|
||||
size_t new_size = max(8U, s->size * 2);
|
||||
void *n = krealloc(s->d,
|
||||
new_size * sizeof(s->d[0]),
|
||||
GFP_KERNEL);
|
||||
if (!n) {
|
||||
pr_err("error allocating snapshot ID list");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
s->d = n;
|
||||
s->size = new_size;
|
||||
};
|
||||
|
||||
s->d[s->nr++] = id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans,
|
||||
struct snapshot_id_list *deleted,
|
||||
enum btree_id btree_id)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct snapshot_id_list equiv_seen = { 0 };
|
||||
struct bpos last_pos = POS_MIN;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* XXX: We should also delete whiteouts that no longer overwrite
|
||||
* anything
|
||||
*/
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, btree_id, POS_MIN,
|
||||
BTREE_ITER_INTENT|
|
||||
BTREE_ITER_PREFETCH|
|
||||
BTREE_ITER_NOT_EXTENTS|
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
|
||||
while ((bch2_trans_begin(trans),
|
||||
(k = bch2_btree_iter_peek(&iter)).k) &&
|
||||
!(ret = bkey_err(k))) {
|
||||
u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv;
|
||||
|
||||
if (bkey_cmp(k.k->p, last_pos))
|
||||
equiv_seen.nr = 0;
|
||||
last_pos = k.k->p;
|
||||
|
||||
if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
|
||||
snapshot_list_has_id(&equiv_seen, equiv)) {
|
||||
if (btree_id == BTREE_ID_inodes &&
|
||||
bch2_btree_key_cache_flush(trans, btree_id, iter.pos))
|
||||
continue;
|
||||
|
||||
ret = __bch2_trans_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_btree_delete_at(trans, &iter,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
|
||||
if (ret)
|
||||
break;
|
||||
} else {
|
||||
ret = snapshot_id_add(&equiv_seen, equiv);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_btree_iter_advance(&iter);
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
kfree(equiv_seen.d);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_delete_dead_snapshots_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_snapshot snap;
|
||||
struct snapshot_id_list deleted = { 0 };
|
||||
u32 i, id, children[2];
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
/*
|
||||
* For every snapshot node: If we have no live children and it's not
|
||||
* pointed to by a subvolume, delete it:
|
||||
*/
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
|
||||
POS_MIN, 0, k, ret) {
|
||||
if (k.k->type != KEY_TYPE_snapshot)
|
||||
continue;
|
||||
|
||||
snap = bkey_s_c_to_snapshot(k);
|
||||
if (BCH_SNAPSHOT_DELETED(snap.v) ||
|
||||
BCH_SNAPSHOT_SUBVOL(snap.v))
|
||||
continue;
|
||||
|
||||
children[0] = le32_to_cpu(snap.v->children[0]);
|
||||
children[1] = le32_to_cpu(snap.v->children[1]);
|
||||
|
||||
ret = snapshot_live(&trans, children[0]) ?:
|
||||
snapshot_live(&trans, children[1]);
|
||||
if (ret < 0)
|
||||
break;
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
bch2_snapshot_node_set_deleted(&trans, iter.pos.offset));
|
||||
if (ret) {
|
||||
bch_err(c, "error deleting snapshot %llu: %i", iter.pos.offset, ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
if (ret) {
|
||||
bch_err(c, "error walking snapshots: %i", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_snapshots_set_equiv(&trans);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
|
||||
POS_MIN, 0, k, ret) {
|
||||
if (k.k->type != KEY_TYPE_snapshot)
|
||||
continue;
|
||||
|
||||
snap = bkey_s_c_to_snapshot(k);
|
||||
if (BCH_SNAPSHOT_DELETED(snap.v)) {
|
||||
ret = snapshot_id_add(&deleted, k.k->p.offset);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
if (ret) {
|
||||
bch_err(c, "error walking snapshots: %i", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (id = 0; id < BTREE_ID_NR; id++) {
|
||||
if (!btree_type_has_snapshots(id))
|
||||
continue;
|
||||
|
||||
ret = bch2_snapshot_delete_keys_btree(&trans, &deleted, id);
|
||||
if (ret) {
|
||||
bch_err(c, "error deleting snapshot keys: %i", ret);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < deleted.nr; i++) {
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
bch2_snapshot_node_delete(&trans, deleted.d[i]));
|
||||
if (ret) {
|
||||
bch_err(c, "error deleting snapshot %u: %i",
|
||||
deleted.d[i], ret);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
err:
|
||||
kfree(deleted.d);
|
||||
bch2_trans_exit(&trans);
|
||||
percpu_ref_put(&c->writes);
|
||||
}
|
||||
|
||||
static void bch2_delete_dead_snapshots(struct bch_fs *c)
|
||||
{
|
||||
if (unlikely(!percpu_ref_tryget(&c->writes)))
|
||||
return;
|
||||
|
||||
if (!queue_work(system_long_wq, &c->snapshot_delete_work))
|
||||
percpu_ref_put(&c->writes);
|
||||
}
|
||||
|
||||
static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
|
||||
struct btree_trans_commit_hook *h)
|
||||
{
|
||||
bch2_delete_dead_snapshots(trans->c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Subvolumes: */
|
||||
|
||||
const char *bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
if (bkey_cmp(k.k->p, SUBVOL_POS_MIN) < 0)
|
||||
return "invalid pos";
|
||||
|
||||
if (bkey_cmp(k.k->p, SUBVOL_POS_MAX) > 0)
|
||||
return "invalid pos";
|
||||
|
||||
if (bkey_val_bytes(k.k) != sizeof(struct bch_subvolume))
|
||||
return "bad val size";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
|
||||
|
||||
pr_buf(out, "root %llu snapshot id %u",
|
||||
le64_to_cpu(s.v->inode),
|
||||
le32_to_cpu(s.v->snapshot));
|
||||
}
|
||||
|
||||
int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol,
|
||||
u32 *snapid)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes,
|
||||
POS(0, subvol),
|
||||
BTREE_ITER_CACHED|
|
||||
BTREE_ITER_WITH_UPDATES);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_subvolume) {
|
||||
bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvol);
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
*snapid = le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* XXX: mark snapshot id for deletion, walk btree and delete: */
|
||||
int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid,
|
||||
int deleting_snapshot)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_subvolume subvol;
|
||||
struct btree_trans_commit_hook *h;
|
||||
struct bkey_i *delete;
|
||||
u32 snapid;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes,
|
||||
POS(0, subvolid),
|
||||
BTREE_ITER_CACHED|
|
||||
BTREE_ITER_INTENT);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_subvolume) {
|
||||
bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvolid);
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
subvol = bkey_s_c_to_subvolume(k);
|
||||
snapid = le32_to_cpu(subvol.v->snapshot);
|
||||
|
||||
if (deleting_snapshot >= 0 &&
|
||||
deleting_snapshot != BCH_SUBVOLUME_SNAP(subvol.v)) {
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
delete = bch2_trans_kmalloc(trans, sizeof(*delete));
|
||||
ret = PTR_ERR_OR_ZERO(delete);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bkey_init(&delete->k);
|
||||
delete->k.p = iter.pos;
|
||||
ret = bch2_trans_update(trans, &iter, delete, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_snapshot_node_set_deleted(trans, snapid);
|
||||
|
||||
h = bch2_trans_kmalloc(trans, sizeof(*h));
|
||||
ret = PTR_ERR_OR_ZERO(h);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
h->fn = bch2_delete_dead_snapshots_hook;
|
||||
bch2_trans_commit_hook(trans, h);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
|
||||
u32 src_subvolid,
|
||||
u32 *new_subvolid,
|
||||
u32 *new_snapshotid,
|
||||
bool ro)
|
||||
{
|
||||
struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
|
||||
struct bkey_i_subvolume *new_subvol = NULL;
|
||||
struct bkey_i_subvolume *src_subvol = NULL;
|
||||
struct bkey_s_c k;
|
||||
u32 parent = 0, new_nodes[2], snapshot_subvols[2];
|
||||
int ret = 0;
|
||||
|
||||
for_each_btree_key(trans, dst_iter, BTREE_ID_subvolumes, SUBVOL_POS_MIN,
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
|
||||
if (bkey_cmp(k.k->p, SUBVOL_POS_MAX) > 0)
|
||||
break;
|
||||
if (bkey_deleted(k.k))
|
||||
goto found_slot;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
found_slot:
|
||||
snapshot_subvols[0] = dst_iter.pos.offset;
|
||||
snapshot_subvols[1] = src_subvolid;
|
||||
|
||||
if (src_subvolid) {
|
||||
/* Creating a snapshot: */
|
||||
src_subvol = bch2_trans_kmalloc(trans, sizeof(*src_subvol));
|
||||
ret = PTR_ERR_OR_ZERO(src_subvol);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_trans_iter_init(trans, &src_iter, BTREE_ID_subvolumes,
|
||||
POS(0, src_subvolid),
|
||||
BTREE_ITER_CACHED|
|
||||
BTREE_ITER_INTENT);
|
||||
k = bch2_btree_iter_peek_slot(&src_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_subvolume) {
|
||||
bch_err(trans->c, "subvolume %u not found", src_subvolid);
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bkey_reassemble(&src_subvol->k_i, k);
|
||||
parent = le32_to_cpu(src_subvol->v.snapshot);
|
||||
}
|
||||
|
||||
ret = bch2_snapshot_node_create(trans, parent, new_nodes,
|
||||
snapshot_subvols,
|
||||
src_subvolid ? 2 : 1);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (src_subvolid) {
|
||||
src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]);
|
||||
bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0);
|
||||
}
|
||||
|
||||
new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol));
|
||||
ret = PTR_ERR_OR_ZERO(new_subvol);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bkey_subvolume_init(&new_subvol->k_i);
|
||||
new_subvol->v.flags = 0;
|
||||
new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
|
||||
new_subvol->v.inode = cpu_to_le64(inode);
|
||||
SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
|
||||
SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
|
||||
new_subvol->k.p = dst_iter.pos;
|
||||
bch2_trans_update(trans, &dst_iter, &new_subvol->k_i, 0);
|
||||
|
||||
*new_subvolid = new_subvol->k.p.offset;
|
||||
*new_snapshotid = new_nodes[0];
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &src_iter);
|
||||
bch2_trans_iter_exit(trans, &dst_iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_fs_subvolumes_init(struct bch_fs *c)
|
||||
{
|
||||
INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
|
||||
return 0;
|
||||
}
|
115
libbcachefs/subvolume.h
Normal file
115
libbcachefs/subvolume.h
Normal file
@ -0,0 +1,115 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_SUBVOLUME_H
|
||||
#define _BCACHEFS_SUBVOLUME_H
|
||||
|
||||
void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
const char *bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_snapshot (struct bkey_ops) { \
|
||||
.key_invalid = bch2_snapshot_invalid, \
|
||||
.val_to_text = bch2_snapshot_to_text, \
|
||||
}
|
||||
|
||||
int bch2_mark_snapshot(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_s_c, u64, unsigned);
|
||||
|
||||
static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
|
||||
{
|
||||
return genradix_ptr(&c->snapshots, U32_MAX - id);
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
|
||||
{
|
||||
return snapshot_t(c, id)->parent;
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_internal_node(struct bch_fs *c, u32 id)
|
||||
{
|
||||
struct snapshot_t *s = snapshot_t(c, id);
|
||||
|
||||
return s->children[0] || s->children[1];
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
|
||||
{
|
||||
struct snapshot_t *s;
|
||||
u32 parent = bch2_snapshot_parent(c, id);
|
||||
|
||||
if (!parent)
|
||||
return 0;
|
||||
|
||||
s = snapshot_t(c, bch2_snapshot_parent(c, id));
|
||||
if (id == s->children[0])
|
||||
return s->children[1];
|
||||
if (id == s->children[1])
|
||||
return s->children[0];
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
|
||||
{
|
||||
while (id && id < ancestor)
|
||||
id = bch2_snapshot_parent(c, id);
|
||||
|
||||
return id == ancestor;
|
||||
}
|
||||
|
||||
struct snapshots_seen {
|
||||
struct bpos pos;
|
||||
size_t nr;
|
||||
size_t size;
|
||||
u32 *d;
|
||||
};
|
||||
|
||||
static inline void snapshots_seen_exit(struct snapshots_seen *s)
|
||||
{
|
||||
kfree(s->d);
|
||||
s->d = NULL;
|
||||
}
|
||||
|
||||
static inline void snapshots_seen_init(struct snapshots_seen *s)
|
||||
{
|
||||
memset(s, 0, sizeof(*s));
|
||||
}
|
||||
|
||||
static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id)
|
||||
{
|
||||
if (s->nr == s->size) {
|
||||
size_t new_size = max(s->size, 128UL) * 2;
|
||||
u32 *d = krealloc(s->d, new_size * sizeof(s->d[0]), GFP_KERNEL);
|
||||
|
||||
if (!d) {
|
||||
bch_err(c, "error reallocating snapshots_seen table (new size %zu)",
|
||||
new_size);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
s->size = new_size;
|
||||
s->d = d;
|
||||
}
|
||||
|
||||
s->d[s->nr++] = id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_fs_snapshots_check(struct bch_fs *);
|
||||
void bch2_fs_snapshots_exit(struct bch_fs *);
|
||||
int bch2_fs_snapshots_start(struct bch_fs *);
|
||||
|
||||
const char *bch2_subvolume_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_subvolume (struct bkey_ops) { \
|
||||
.key_invalid = bch2_subvolume_invalid, \
|
||||
.val_to_text = bch2_subvolume_to_text, \
|
||||
}
|
||||
|
||||
int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
|
||||
|
||||
int bch2_subvolume_delete(struct btree_trans *, u32, int);
|
||||
int bch2_subvolume_create(struct btree_trans *, u64, u32,
|
||||
u32 *, u32 *, bool);
|
||||
|
||||
int bch2_fs_subvolumes_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_SUBVOLUME_H */
|
@ -39,6 +39,7 @@
|
||||
#include "rebalance.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "subvolume.h"
|
||||
#include "super.h"
|
||||
#include "super-io.h"
|
||||
#include "sysfs.h"
|
||||
@ -468,6 +469,7 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
for (i = 0; i < BCH_TIME_STAT_NR; i++)
|
||||
bch2_time_stats_exit(&c->times[i]);
|
||||
|
||||
bch2_fs_snapshots_exit(c);
|
||||
bch2_fs_quota_exit(c);
|
||||
bch2_fs_fsio_exit(c);
|
||||
bch2_fs_ec_exit(c);
|
||||
@ -686,6 +688,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
mutex_init(&c->usage_scratch_lock);
|
||||
|
||||
mutex_init(&c->bio_bounce_pages_lock);
|
||||
mutex_init(&c->snapshot_table_lock);
|
||||
|
||||
spin_lock_init(&c->btree_write_error_lock);
|
||||
|
||||
@ -789,6 +792,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ||
|
||||
bch2_fs_btree_iter_init(c) ||
|
||||
bch2_fs_btree_interior_update_init(c) ||
|
||||
bch2_fs_subvolumes_init(c) ||
|
||||
bch2_fs_io_init(c) ||
|
||||
bch2_fs_encryption_init(c) ||
|
||||
bch2_fs_compress_init(c) ||
|
||||
|
@ -128,7 +128,7 @@ static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info
|
||||
int ret;
|
||||
|
||||
ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash,
|
||||
inode->v.i_ino,
|
||||
inode_inum(inode),
|
||||
&X_SEARCH(type, name, strlen(name)),
|
||||
0);
|
||||
if (ret)
|
||||
@ -160,7 +160,7 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
|
||||
bch2_xattr_get_trans(&trans, inode, name, buffer, size, type));
|
||||
}
|
||||
|
||||
int bch2_xattr_set(struct btree_trans *trans, u64 inum,
|
||||
int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
|
||||
const struct bch_hash_info *hash_info,
|
||||
const char *name, const void *value, size_t size,
|
||||
int type, int flags)
|
||||
@ -282,13 +282,21 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct xattr_buf buf = { .buf = buffer, .len = buffer_size };
|
||||
u64 inum = dentry->d_inode->i_ino;
|
||||
u64 offset = 0, inum = inode->ei_inode.bi_inum;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
iter = (struct btree_iter) { NULL };
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(&trans, inode->ei_subvol, &snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_xattrs,
|
||||
POS(inum, 0), 0, k, ret) {
|
||||
SPOS(inum, offset, snapshot), 0, k, ret) {
|
||||
BUG_ON(k.k->p.inode < inum);
|
||||
|
||||
if (k.k->p.inode > inum)
|
||||
@ -301,7 +309,12 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
offset = iter.pos.offset;
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
||||
ret = bch2_trans_exit(&trans) ?: ret;
|
||||
|
||||
@ -340,7 +353,7 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler,
|
||||
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
|
||||
|
||||
return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0,
|
||||
bch2_xattr_set(&trans, inode->v.i_ino, &hash,
|
||||
bch2_xattr_set(&trans, inode_inum(inode), &hash,
|
||||
name, value, size,
|
||||
handler->flags, flags));
|
||||
}
|
||||
|
@ -39,7 +39,8 @@ struct bch_inode_info;
|
||||
int bch2_xattr_get(struct bch_fs *, struct bch_inode_info *,
|
||||
const char *, void *, size_t, int);
|
||||
|
||||
int bch2_xattr_set(struct btree_trans *, u64, const struct bch_hash_info *,
|
||||
int bch2_xattr_set(struct btree_trans *, subvol_inum,
|
||||
const struct bch_hash_info *,
|
||||
const char *, const void *, size_t, int, int);
|
||||
|
||||
ssize_t bch2_xattr_list(struct dentry *, char *, size_t);
|
||||
|
Loading…
Reference in New Issue
Block a user