mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-02 00:00:03 +03:00
Update bcachefs sources to 940d6ca657 bcachefs: acl code improvements
This commit is contained in:
parent
17e2f2775b
commit
75c7148e0a
@ -1 +1 @@
|
||||
2cb70a82bc0ca05d8c3cf666d221badd5724e339
|
||||
940d6ca657ea70758f3f43323bfd531019a40d3c
|
||||
|
@ -239,8 +239,8 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
|
||||
const struct xattr_handler *h = xattr_resolve_name(&attr);
|
||||
|
||||
int ret = __bch2_xattr_set(c, dst->bi_inum, &hash_info, attr,
|
||||
val, val_size, 0, h->flags, NULL);
|
||||
int ret = bch2_xattr_set(c, dst->bi_inum, &hash_info, attr,
|
||||
val, val_size, 0, h->flags, NULL);
|
||||
if (ret < 0)
|
||||
die("error creating xattr: %s", strerror(-ret));
|
||||
}
|
||||
|
@ -4,25 +4,6 @@
|
||||
struct super_block;
|
||||
struct inode;
|
||||
|
||||
/* The hash is always the low bits of hash_len */
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define HASH_LEN_DECLARE u32 hash; u32 len
|
||||
#else
|
||||
#define HASH_LEN_DECLARE u32 len; u32 hash
|
||||
#endif
|
||||
|
||||
struct qstr {
|
||||
union {
|
||||
struct {
|
||||
HASH_LEN_DECLARE;
|
||||
};
|
||||
u64 hash_len;
|
||||
};
|
||||
const unsigned char *name;
|
||||
};
|
||||
|
||||
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
|
||||
|
||||
struct dentry {
|
||||
struct super_block *d_sb;
|
||||
struct inode *d_inode;
|
||||
|
@ -222,4 +222,23 @@ static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *
|
||||
BUILD_BUG_ON_ZERO((perms) & 2) + \
|
||||
(perms))
|
||||
|
||||
/* The hash is always the low bits of hash_len */
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define HASH_LEN_DECLARE u32 hash; u32 len
|
||||
#else
|
||||
#define HASH_LEN_DECLARE u32 len; u32 hash
|
||||
#endif
|
||||
|
||||
struct qstr {
|
||||
union {
|
||||
struct {
|
||||
HASH_LEN_DECLARE;
|
||||
};
|
||||
u64 hash_len;
|
||||
};
|
||||
const unsigned char *name;
|
||||
};
|
||||
|
||||
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
|
||||
|
||||
#endif
|
||||
|
@ -12,96 +12,175 @@
|
||||
#include "fs.h"
|
||||
#include "xattr.h"
|
||||
|
||||
static inline size_t bch2_acl_size(unsigned nr_short, unsigned nr_long)
|
||||
{
|
||||
return sizeof(bch_acl_header) +
|
||||
sizeof(bch_acl_entry_short) * nr_short +
|
||||
sizeof(bch_acl_entry) * nr_long;
|
||||
}
|
||||
|
||||
static inline int acl_to_xattr_type(int type)
|
||||
{
|
||||
switch (type) {
|
||||
case ACL_TYPE_ACCESS:
|
||||
return BCH_XATTR_INDEX_POSIX_ACL_ACCESS;
|
||||
case ACL_TYPE_DEFAULT:
|
||||
return BCH_XATTR_INDEX_POSIX_ACL_DEFAULT;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert from filesystem to in-memory representation.
|
||||
*/
|
||||
static struct posix_acl *bch2_acl_from_disk(const void *value, size_t size)
|
||||
{
|
||||
const char *end = (char *)value + size;
|
||||
int n, count;
|
||||
const void *p, *end = value + size;
|
||||
struct posix_acl *acl;
|
||||
struct posix_acl_entry *out;
|
||||
unsigned count = 0;
|
||||
|
||||
if (!value)
|
||||
return NULL;
|
||||
if (size < sizeof(bch_acl_header))
|
||||
return ERR_PTR(-EINVAL);
|
||||
goto invalid;
|
||||
if (((bch_acl_header *)value)->a_version !=
|
||||
cpu_to_le32(BCH_ACL_VERSION))
|
||||
return ERR_PTR(-EINVAL);
|
||||
value = (char *)value + sizeof(bch_acl_header);
|
||||
count = bch2_acl_count(size);
|
||||
if (count < 0)
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (count == 0)
|
||||
return NULL;
|
||||
acl = posix_acl_alloc(count, GFP_KERNEL);
|
||||
if (!acl)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
for (n = 0; n < count; n++) {
|
||||
bch_acl_entry *entry =
|
||||
(bch_acl_entry *)value;
|
||||
if ((char *)value + sizeof(bch_acl_entry_short) > end)
|
||||
goto fail;
|
||||
acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
|
||||
acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
|
||||
switch (acl->a_entries[n].e_tag) {
|
||||
goto invalid;
|
||||
|
||||
p = value + sizeof(bch_acl_header);
|
||||
while (p < end) {
|
||||
const bch_acl_entry *entry = p;
|
||||
|
||||
if (p + sizeof(bch_acl_entry_short) > end)
|
||||
goto invalid;
|
||||
|
||||
switch (le16_to_cpu(entry->e_tag)) {
|
||||
case ACL_USER_OBJ:
|
||||
case ACL_GROUP_OBJ:
|
||||
case ACL_MASK:
|
||||
case ACL_OTHER:
|
||||
value = (char *)value +
|
||||
sizeof(bch_acl_entry_short);
|
||||
p += sizeof(bch_acl_entry_short);
|
||||
break;
|
||||
|
||||
case ACL_USER:
|
||||
value = (char *)value + sizeof(bch_acl_entry);
|
||||
if ((char *)value > end)
|
||||
goto fail;
|
||||
acl->a_entries[n].e_uid =
|
||||
make_kuid(&init_user_ns,
|
||||
le32_to_cpu(entry->e_id));
|
||||
case ACL_GROUP:
|
||||
p += sizeof(bch_acl_entry);
|
||||
break;
|
||||
default:
|
||||
goto invalid;
|
||||
}
|
||||
|
||||
count++;
|
||||
}
|
||||
|
||||
if (p > end)
|
||||
goto invalid;
|
||||
|
||||
if (!count)
|
||||
return NULL;
|
||||
|
||||
acl = posix_acl_alloc(count, GFP_KERNEL);
|
||||
if (!acl)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
out = acl->a_entries;
|
||||
|
||||
p = value + sizeof(bch_acl_header);
|
||||
while (p < end) {
|
||||
const bch_acl_entry *in = p;
|
||||
|
||||
out->e_tag = le16_to_cpu(in->e_tag);
|
||||
out->e_perm = le16_to_cpu(in->e_perm);
|
||||
|
||||
switch (out->e_tag) {
|
||||
case ACL_USER_OBJ:
|
||||
case ACL_GROUP_OBJ:
|
||||
case ACL_MASK:
|
||||
case ACL_OTHER:
|
||||
p += sizeof(bch_acl_entry_short);
|
||||
break;
|
||||
case ACL_USER:
|
||||
out->e_uid = make_kuid(&init_user_ns,
|
||||
le32_to_cpu(in->e_id));
|
||||
p += sizeof(bch_acl_entry);
|
||||
break;
|
||||
case ACL_GROUP:
|
||||
value = (char *)value + sizeof(bch_acl_entry);
|
||||
if ((char *)value > end)
|
||||
goto fail;
|
||||
acl->a_entries[n].e_gid =
|
||||
make_kgid(&init_user_ns,
|
||||
le32_to_cpu(entry->e_id));
|
||||
out->e_gid = make_kgid(&init_user_ns,
|
||||
le32_to_cpu(in->e_id));
|
||||
p += sizeof(bch_acl_entry);
|
||||
break;
|
||||
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
if (value != end)
|
||||
goto fail;
|
||||
return acl;
|
||||
|
||||
fail:
|
||||
posix_acl_release(acl);
|
||||
out++;
|
||||
}
|
||||
|
||||
BUG_ON(out != acl->a_entries + acl->a_count);
|
||||
|
||||
return acl;
|
||||
invalid:
|
||||
pr_err("invalid acl entry");
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
#define acl_for_each_entry(acl, acl_e) \
|
||||
for (acl_e = acl->a_entries; \
|
||||
acl_e < acl->a_entries + acl->a_count; \
|
||||
acl_e++)
|
||||
|
||||
/*
|
||||
* Convert from in-memory to filesystem representation.
|
||||
*/
|
||||
static void *bch2_acl_to_disk(const struct posix_acl *acl, size_t *size)
|
||||
static struct bkey_i_xattr *
|
||||
bch2_acl_to_xattr(const struct posix_acl *acl,
|
||||
int type)
|
||||
{
|
||||
bch_acl_header *ext_acl;
|
||||
char *e;
|
||||
size_t n;
|
||||
struct bkey_i_xattr *xattr;
|
||||
bch_acl_header *acl_header;
|
||||
const struct posix_acl_entry *acl_e;
|
||||
void *outptr;
|
||||
unsigned nr_short = 0, nr_long = 0, acl_len, u64s;
|
||||
|
||||
*size = bch2_acl_size(acl->a_count);
|
||||
ext_acl = kmalloc(sizeof(bch_acl_header) + acl->a_count *
|
||||
sizeof(bch_acl_entry), GFP_KERNEL);
|
||||
if (!ext_acl)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
ext_acl->a_version = cpu_to_le32(BCH_ACL_VERSION);
|
||||
e = (char *)ext_acl + sizeof(bch_acl_header);
|
||||
for (n = 0; n < acl->a_count; n++) {
|
||||
const struct posix_acl_entry *acl_e = &acl->a_entries[n];
|
||||
bch_acl_entry *entry = (bch_acl_entry *)e;
|
||||
acl_for_each_entry(acl, acl_e) {
|
||||
switch (acl_e->e_tag) {
|
||||
case ACL_USER:
|
||||
case ACL_GROUP:
|
||||
nr_long++;
|
||||
break;
|
||||
case ACL_USER_OBJ:
|
||||
case ACL_GROUP_OBJ:
|
||||
case ACL_MASK:
|
||||
case ACL_OTHER:
|
||||
nr_short++;
|
||||
break;
|
||||
default:
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
acl_len = bch2_acl_size(nr_short, nr_long);
|
||||
u64s = BKEY_U64s + xattr_val_u64s(0, acl_len);
|
||||
|
||||
if (u64s > U8_MAX)
|
||||
return ERR_PTR(-E2BIG);
|
||||
|
||||
xattr = kmalloc(u64s * sizeof(u64), GFP_KERNEL);
|
||||
if (IS_ERR(xattr))
|
||||
return xattr;
|
||||
|
||||
bkey_xattr_init(&xattr->k_i);
|
||||
xattr->k.u64s = u64s;
|
||||
xattr->v.x_type = acl_to_xattr_type(type);
|
||||
xattr->v.x_name_len = 0,
|
||||
xattr->v.x_val_len = cpu_to_le16(acl_len);
|
||||
|
||||
acl_header = xattr_val(&xattr->v);
|
||||
acl_header->a_version = cpu_to_le32(BCH_ACL_VERSION);
|
||||
|
||||
outptr = (void *) acl_header + sizeof(*acl_header);
|
||||
|
||||
acl_for_each_entry(acl, acl_e) {
|
||||
bch_acl_entry *entry = outptr;
|
||||
|
||||
entry->e_tag = cpu_to_le16(acl_e->e_tag);
|
||||
entry->e_perm = cpu_to_le16(acl_e->e_perm);
|
||||
@ -109,70 +188,54 @@ static void *bch2_acl_to_disk(const struct posix_acl *acl, size_t *size)
|
||||
case ACL_USER:
|
||||
entry->e_id = cpu_to_le32(
|
||||
from_kuid(&init_user_ns, acl_e->e_uid));
|
||||
e += sizeof(bch_acl_entry);
|
||||
outptr += sizeof(bch_acl_entry);
|
||||
break;
|
||||
case ACL_GROUP:
|
||||
entry->e_id = cpu_to_le32(
|
||||
from_kgid(&init_user_ns, acl_e->e_gid));
|
||||
e += sizeof(bch_acl_entry);
|
||||
outptr += sizeof(bch_acl_entry);
|
||||
break;
|
||||
|
||||
case ACL_USER_OBJ:
|
||||
case ACL_GROUP_OBJ:
|
||||
case ACL_MASK:
|
||||
case ACL_OTHER:
|
||||
e += sizeof(bch_acl_entry_short);
|
||||
outptr += sizeof(bch_acl_entry_short);
|
||||
break;
|
||||
|
||||
default:
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
return (char *)ext_acl;
|
||||
|
||||
fail:
|
||||
kfree(ext_acl);
|
||||
return ERR_PTR(-EINVAL);
|
||||
BUG_ON(outptr != xattr_val(&xattr->v) + acl_len);
|
||||
|
||||
return xattr;
|
||||
}
|
||||
|
||||
struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
int name_index;
|
||||
char *value = NULL;
|
||||
struct posix_acl *acl;
|
||||
int ret;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c_xattr xattr;
|
||||
struct bkey_s_c k;
|
||||
struct posix_acl *acl = NULL;
|
||||
int name_index = acl_to_xattr_type(type);
|
||||
|
||||
switch (type) {
|
||||
case ACL_TYPE_ACCESS:
|
||||
name_index = BCH_XATTR_INDEX_POSIX_ACL_ACCESS;
|
||||
break;
|
||||
case ACL_TYPE_DEFAULT:
|
||||
name_index = BCH_XATTR_INDEX_POSIX_ACL_DEFAULT;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
k = bch2_xattr_get_iter(c, &iter, inode, "", name_index);
|
||||
if (IS_ERR(k.k)) {
|
||||
if (PTR_ERR(k.k) != -ENOENT)
|
||||
acl = ERR_CAST(k.k);
|
||||
goto out;
|
||||
}
|
||||
ret = bch2_xattr_get(c, inode, "", NULL, 0, name_index);
|
||||
if (ret > 0) {
|
||||
value = kmalloc(ret, GFP_KERNEL);
|
||||
if (!value)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
ret = bch2_xattr_get(c, inode, "", value,
|
||||
ret, name_index);
|
||||
}
|
||||
if (ret > 0)
|
||||
acl = bch2_acl_from_disk(value, ret);
|
||||
else if (ret == -ENODATA || ret == -ENOSYS)
|
||||
acl = NULL;
|
||||
else
|
||||
acl = ERR_PTR(ret);
|
||||
kfree(value);
|
||||
|
||||
xattr = bkey_s_c_to_xattr(k);
|
||||
|
||||
acl = bch2_acl_from_disk(xattr_val(xattr.v),
|
||||
le16_to_cpu(xattr.v->x_val_len));
|
||||
|
||||
if (!IS_ERR(acl))
|
||||
set_cached_acl(&inode->v, type, acl);
|
||||
|
||||
out:
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
return acl;
|
||||
}
|
||||
|
||||
@ -180,37 +243,31 @@ int __bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
int name_index;
|
||||
void *value = NULL;
|
||||
size_t size = 0;
|
||||
int ret;
|
||||
|
||||
switch (type) {
|
||||
case ACL_TYPE_ACCESS:
|
||||
name_index = BCH_XATTR_INDEX_POSIX_ACL_ACCESS;
|
||||
break;
|
||||
case ACL_TYPE_DEFAULT:
|
||||
name_index = BCH_XATTR_INDEX_POSIX_ACL_DEFAULT;
|
||||
if (!S_ISDIR(inode->v.i_mode))
|
||||
return acl ? -EACCES : 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
if (type == ACL_TYPE_DEFAULT &&
|
||||
!S_ISDIR(inode->v.i_mode))
|
||||
return acl ? -EACCES : 0;
|
||||
|
||||
if (acl) {
|
||||
value = bch2_acl_to_disk(acl, &size);
|
||||
if (IS_ERR(value))
|
||||
return (int)PTR_ERR(value);
|
||||
struct bkey_i_xattr *xattr =
|
||||
bch2_acl_to_xattr(acl, type);
|
||||
if (IS_ERR(xattr))
|
||||
return PTR_ERR(xattr);
|
||||
|
||||
ret = bch2_hash_set(bch2_xattr_hash_desc, &inode->ei_str_hash,
|
||||
c, inode->v.i_ino, &inode->ei_journal_seq,
|
||||
&xattr->k_i, 0);
|
||||
kfree(xattr);
|
||||
} else {
|
||||
struct xattr_search_key search =
|
||||
X_SEARCH(acl_to_xattr_type(type), "", 0);
|
||||
|
||||
ret = bch2_hash_delete(bch2_xattr_hash_desc, &inode->ei_str_hash,
|
||||
c, inode->v.i_ino, &inode->ei_journal_seq,
|
||||
&search);
|
||||
}
|
||||
|
||||
ret = bch2_xattr_set(c, inode, "", value, size, 0, name_index);
|
||||
kfree(value);
|
||||
|
||||
if (ret == -ERANGE)
|
||||
ret = -E2BIG;
|
||||
|
||||
if (!ret)
|
||||
set_cached_acl(&inode->v, type, acl);
|
||||
|
||||
|
@ -20,35 +20,6 @@ typedef struct {
|
||||
__le32 a_version;
|
||||
} bch_acl_header;
|
||||
|
||||
static inline size_t bch2_acl_size(int count)
|
||||
{
|
||||
if (count <= 4) {
|
||||
return sizeof(bch_acl_header) +
|
||||
count * sizeof(bch_acl_entry_short);
|
||||
} else {
|
||||
return sizeof(bch_acl_header) +
|
||||
4 * sizeof(bch_acl_entry_short) +
|
||||
(count - 4) * sizeof(bch_acl_entry);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int bch2_acl_count(size_t size)
|
||||
{
|
||||
ssize_t s;
|
||||
|
||||
size -= sizeof(bch_acl_header);
|
||||
s = size - 4 * sizeof(bch_acl_entry_short);
|
||||
if (s < 0) {
|
||||
if (size % sizeof(bch_acl_entry_short))
|
||||
return -1;
|
||||
return size / sizeof(bch_acl_entry_short);
|
||||
} else {
|
||||
if (s % sizeof(bch_acl_entry))
|
||||
return -1;
|
||||
return s / sizeof(bch_acl_entry) + 4;
|
||||
}
|
||||
}
|
||||
|
||||
struct posix_acl;
|
||||
|
||||
extern struct posix_acl *bch2_get_acl(struct inode *, int);
|
||||
|
@ -52,21 +52,6 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
|
||||
k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64));
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark a key as deleted without changing the size of the value (i.e. modifying
|
||||
* keys in the btree in place)
|
||||
*/
|
||||
static inline void __set_bkey_deleted(struct bkey *k)
|
||||
{
|
||||
k->type = KEY_TYPE_DELETED;
|
||||
}
|
||||
|
||||
static inline void set_bkey_deleted(struct bkey *k)
|
||||
{
|
||||
__set_bkey_deleted(k);
|
||||
set_bkey_val_u64s(k, 0);
|
||||
}
|
||||
|
||||
#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_DELETED)
|
||||
|
||||
#define bkey_whiteout(_k) \
|
||||
@ -284,6 +269,16 @@ static inline struct bpos bkey_successor(struct bpos p)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct bpos bkey_predecessor(struct bpos p)
|
||||
{
|
||||
struct bpos ret = p;
|
||||
|
||||
if (!ret.offset--)
|
||||
BUG_ON(!ret.inode--);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline u64 bkey_start_offset(const struct bkey *k)
|
||||
{
|
||||
return k->p.offset - k->size;
|
||||
|
@ -987,6 +987,10 @@ void bch2_bset_init_next(struct bch_fs *c, struct btree *b,
|
||||
set_btree_bset(b, t, i);
|
||||
}
|
||||
|
||||
/*
|
||||
* find _some_ key in the same bset as @k that precedes @k - not necessarily the
|
||||
* immediate predecessor:
|
||||
*/
|
||||
static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
|
||||
struct bkey_packed *k)
|
||||
{
|
||||
@ -1025,40 +1029,31 @@ static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
|
||||
return p;
|
||||
}
|
||||
|
||||
struct bkey_packed *bch2_bkey_prev_all(struct btree *b, struct bset_tree *t,
|
||||
struct bkey_packed *k)
|
||||
struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
|
||||
struct bset_tree *t,
|
||||
struct bkey_packed *k,
|
||||
unsigned min_key_type)
|
||||
{
|
||||
struct bkey_packed *p;
|
||||
|
||||
p = __bkey_prev(b, t, k);
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
while (bkey_next(p) != k)
|
||||
p = bkey_next(p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
struct bkey_packed *bch2_bkey_prev(struct btree *b, struct bset_tree *t,
|
||||
struct bkey_packed *k)
|
||||
{
|
||||
while (1) {
|
||||
struct bkey_packed *p, *i, *ret = NULL;
|
||||
|
||||
p = __bkey_prev(b, t, k);
|
||||
if (!p)
|
||||
return NULL;
|
||||
struct bkey_packed *p, *i, *ret = NULL, *orig_k = k;
|
||||
|
||||
while ((p = __bkey_prev(b, t, k)) && !ret) {
|
||||
for (i = p; i != k; i = bkey_next(i))
|
||||
if (!bkey_deleted(i))
|
||||
if (i->type >= min_key_type)
|
||||
ret = i;
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
k = p;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
|
||||
BUG_ON(ret >= orig_k);
|
||||
|
||||
for (i = ret ? bkey_next(ret) : btree_bkey_first(b, t);
|
||||
i != orig_k;
|
||||
i = bkey_next(i))
|
||||
BUG_ON(i->type >= min_key_type);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Insert */
|
||||
@ -1677,7 +1672,7 @@ void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool __btree_node_iter_used(struct btree_node_iter *iter)
|
||||
static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
|
||||
{
|
||||
unsigned n = ARRAY_SIZE(iter->data);
|
||||
|
||||
@ -1690,69 +1685,68 @@ static inline bool __btree_node_iter_used(struct btree_node_iter *iter)
|
||||
/*
|
||||
* Expensive:
|
||||
*/
|
||||
struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter,
|
||||
struct btree *b)
|
||||
struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *iter,
|
||||
struct btree *b,
|
||||
unsigned min_key_type)
|
||||
{
|
||||
struct bkey_packed *k, *prev = NULL;
|
||||
struct bkey_packed *orig_pos = bch2_btree_node_iter_peek_all(iter, b);
|
||||
struct btree_node_iter_set *set;
|
||||
struct bset_tree *t;
|
||||
struct bset_tree *prev_t;
|
||||
unsigned end, used;
|
||||
unsigned end;
|
||||
|
||||
bch2_btree_node_iter_verify(iter, b);
|
||||
|
||||
for_each_bset(b, t) {
|
||||
k = bch2_bkey_prev_all(b, t,
|
||||
bch2_btree_node_iter_bset_pos(iter, b, t));
|
||||
k = bch2_bkey_prev_filter(b, t,
|
||||
bch2_btree_node_iter_bset_pos(iter, b, t),
|
||||
min_key_type);
|
||||
if (k &&
|
||||
(!prev || __btree_node_iter_cmp(iter->is_extents, b,
|
||||
k, prev) > 0)) {
|
||||
prev = k;
|
||||
prev_t = t;
|
||||
end = t->end_offset;
|
||||
}
|
||||
}
|
||||
|
||||
if (!prev)
|
||||
return NULL;
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* We're manually memmoving instead of just calling sort() to ensure the
|
||||
* prev we picked ends up in slot 0 - sort won't necessarily put it
|
||||
* there because of duplicate deleted keys:
|
||||
*/
|
||||
end = __btree_node_key_to_offset(b, btree_bkey_last(b, prev_t));
|
||||
btree_node_iter_for_each(iter, set)
|
||||
if (set->end == end) {
|
||||
memmove(&iter->data[1],
|
||||
&iter->data[0],
|
||||
(void *) set - (void *) &iter->data[0]);
|
||||
goto out;
|
||||
}
|
||||
if (set->end == end)
|
||||
goto found;
|
||||
|
||||
used = __btree_node_iter_used(iter);
|
||||
BUG_ON(used >= ARRAY_SIZE(iter->data));
|
||||
BUG_ON(set != &iter->data[__btree_node_iter_used(iter)]);
|
||||
found:
|
||||
BUG_ON(set >= iter->data + ARRAY_SIZE(iter->data));
|
||||
|
||||
memmove(&iter->data[1],
|
||||
&iter->data[0],
|
||||
(void *) &iter->data[used] - (void *) &iter->data[0]);
|
||||
out:
|
||||
(void *) set - (void *) &iter->data[0]);
|
||||
|
||||
iter->data[0].k = __btree_node_key_to_offset(b, prev);
|
||||
iter->data[0].end = end;
|
||||
out:
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
|
||||
struct btree_node_iter iter2 = *iter;
|
||||
|
||||
if (prev)
|
||||
bch2_btree_node_iter_advance(&iter2, b);
|
||||
|
||||
while ((k = bch2_btree_node_iter_peek_all(&iter2, b)) != orig_pos) {
|
||||
BUG_ON(k->type >= min_key_type);
|
||||
bch2_btree_node_iter_advance(&iter2, b);
|
||||
}
|
||||
}
|
||||
|
||||
return prev;
|
||||
}
|
||||
|
||||
struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *iter,
|
||||
struct btree *b)
|
||||
{
|
||||
struct bkey_packed *k;
|
||||
|
||||
do {
|
||||
k = bch2_btree_node_iter_prev_all(iter, b);
|
||||
} while (k && bkey_deleted(k));
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter,
|
||||
struct btree *b,
|
||||
struct bkey *u)
|
||||
|
@ -393,10 +393,21 @@ static inline bool btree_iter_pos_cmp_p_or_unp(const struct btree *b,
|
||||
}
|
||||
|
||||
struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *);
|
||||
struct bkey_packed *bch2_bkey_prev_all(struct btree *, struct bset_tree *,
|
||||
struct bkey_packed *);
|
||||
struct bkey_packed *bch2_bkey_prev(struct btree *, struct bset_tree *,
|
||||
struct bkey_packed *);
|
||||
|
||||
struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *,
|
||||
struct bkey_packed *, unsigned);
|
||||
|
||||
static inline struct bkey_packed *
|
||||
bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
|
||||
{
|
||||
return bch2_bkey_prev_filter(b, t, k, 0);
|
||||
}
|
||||
|
||||
static inline struct bkey_packed *
|
||||
bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
|
||||
{
|
||||
return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_DISCARD + 1);
|
||||
}
|
||||
|
||||
enum bch_extent_overlap {
|
||||
BCH_EXTENT_OVERLAP_ALL = 0,
|
||||
@ -471,9 +482,11 @@ static inline int __btree_node_iter_cmp(bool is_extents,
|
||||
* For extents, bkey_deleted() is used as a proxy for k->size == 0, so
|
||||
* deleted keys have to sort last.
|
||||
*/
|
||||
return bkey_cmp_packed(b, l, r) ?: is_extents
|
||||
? (int) bkey_deleted(l) - (int) bkey_deleted(r)
|
||||
: (int) bkey_deleted(r) - (int) bkey_deleted(l);
|
||||
return bkey_cmp_packed(b, l, r)
|
||||
?: (is_extents
|
||||
? (int) bkey_deleted(l) - (int) bkey_deleted(r)
|
||||
: (int) bkey_deleted(r) - (int) bkey_deleted(l))
|
||||
?: (l > r) - (l < r);
|
||||
}
|
||||
|
||||
static inline int btree_node_iter_cmp(struct btree_node_iter *iter,
|
||||
@ -512,25 +525,34 @@ __bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
|
||||
return __btree_node_offset_to_key(b, iter->data->k);
|
||||
}
|
||||
|
||||
static inline struct bkey_packed *
|
||||
bch2_btree_node_iter_peek_filter(struct btree_node_iter *iter,
|
||||
struct btree *b,
|
||||
unsigned min_key_type)
|
||||
{
|
||||
while (!bch2_btree_node_iter_end(iter)) {
|
||||
struct bkey_packed *k = __bch2_btree_node_iter_peek_all(iter, b);
|
||||
|
||||
if (k->type >= min_key_type)
|
||||
return k;
|
||||
|
||||
bch2_btree_node_iter_advance(iter, b);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct bkey_packed *
|
||||
bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
|
||||
struct btree *b)
|
||||
{
|
||||
return bch2_btree_node_iter_end(iter)
|
||||
? NULL
|
||||
: __bch2_btree_node_iter_peek_all(iter, b);
|
||||
return bch2_btree_node_iter_peek_filter(iter, b, 0);
|
||||
}
|
||||
|
||||
static inline struct bkey_packed *
|
||||
bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b)
|
||||
{
|
||||
struct bkey_packed *ret;
|
||||
|
||||
while ((ret = bch2_btree_node_iter_peek_all(iter, b)) &&
|
||||
bkey_deleted(ret))
|
||||
bch2_btree_node_iter_advance(iter, b);
|
||||
|
||||
return ret;
|
||||
return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_DISCARD + 1);
|
||||
}
|
||||
|
||||
static inline struct bkey_packed *
|
||||
@ -544,10 +566,20 @@ bch2_btree_node_iter_next_all(struct btree_node_iter *iter, struct btree *b)
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *,
|
||||
struct btree *);
|
||||
struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *,
|
||||
struct btree *);
|
||||
struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *,
|
||||
struct btree *, unsigned);
|
||||
|
||||
static inline struct bkey_packed *
|
||||
bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, struct btree *b)
|
||||
{
|
||||
return bch2_btree_node_iter_prev_filter(iter, b, 0);
|
||||
}
|
||||
|
||||
static inline struct bkey_packed *
|
||||
bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
|
||||
{
|
||||
return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterates over all _live_ keys - skipping deleted (and potentially
|
||||
|
@ -577,10 +577,11 @@ err:
|
||||
|
||||
/* Slowpath, don't want it inlined into btree_iter_traverse() */
|
||||
static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
struct btree_iter *iter,
|
||||
const struct bkey_i *k,
|
||||
unsigned level,
|
||||
enum six_lock_type lock_type)
|
||||
struct btree_iter *iter,
|
||||
const struct bkey_i *k,
|
||||
unsigned level,
|
||||
enum six_lock_type lock_type,
|
||||
bool sync)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
@ -590,6 +591,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
* been freed:
|
||||
*/
|
||||
BUG_ON(!btree_node_locked(iter, level + 1));
|
||||
BUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
|
||||
b = bch2_btree_node_mem_alloc(c);
|
||||
if (IS_ERR(b))
|
||||
@ -623,9 +625,15 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
if (btree_node_read_locked(iter, level + 1))
|
||||
btree_node_unlock(iter, level + 1);
|
||||
|
||||
bch2_btree_node_read(c, b, true);
|
||||
bch2_btree_node_read(c, b, sync);
|
||||
|
||||
six_unlock_write(&b->lock);
|
||||
|
||||
if (!sync) {
|
||||
six_unlock_intent(&b->lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (lock_type == SIX_LOCK_read)
|
||||
six_lock_downgrade(&b->lock);
|
||||
|
||||
@ -643,7 +651,8 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
*/
|
||||
struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
|
||||
const struct bkey_i *k, unsigned level,
|
||||
enum six_lock_type lock_type)
|
||||
enum six_lock_type lock_type,
|
||||
bool may_drop_locks)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
@ -670,7 +679,7 @@ retry:
|
||||
* else we could read in a btree node from disk that's been
|
||||
* freed:
|
||||
*/
|
||||
b = bch2_btree_node_fill(c, iter, k, level, lock_type);
|
||||
b = bch2_btree_node_fill(c, iter, k, level, lock_type, true);
|
||||
|
||||
/* We raced and found the btree node in the cache */
|
||||
if (!b)
|
||||
@ -710,7 +719,8 @@ retry:
|
||||
if (btree_node_read_locked(iter, level + 1))
|
||||
btree_node_unlock(iter, level + 1);
|
||||
|
||||
if (!btree_node_lock(b, k->k.p, level, iter, lock_type))
|
||||
if (!btree_node_lock(b, k->k.p, level, iter,
|
||||
lock_type, may_drop_locks))
|
||||
return ERR_PTR(-EINTR);
|
||||
|
||||
if (unlikely(PTR_HASH(&b->key) != PTR_HASH(k) ||
|
||||
@ -778,18 +788,17 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
|
||||
k = bch2_btree_node_iter_peek_all(&node_iter, parent);
|
||||
BUG_ON(bkey_cmp_left_packed(parent, k, &b->key.k.p));
|
||||
|
||||
do {
|
||||
k = sib == btree_prev_sib
|
||||
? bch2_btree_node_iter_prev_all(&node_iter, parent)
|
||||
: (bch2_btree_node_iter_advance(&node_iter, parent),
|
||||
bch2_btree_node_iter_peek_all(&node_iter, parent));
|
||||
if (!k)
|
||||
goto out;
|
||||
} while (bkey_deleted(k));
|
||||
k = sib == btree_prev_sib
|
||||
? bch2_btree_node_iter_prev(&node_iter, parent)
|
||||
: (bch2_btree_node_iter_advance(&node_iter, parent),
|
||||
bch2_btree_node_iter_peek(&node_iter, parent));
|
||||
if (!k)
|
||||
goto out;
|
||||
|
||||
bch2_bkey_unpack(parent, &tmp.k, k);
|
||||
|
||||
ret = bch2_btree_node_get(c, iter, &tmp.k, level, SIX_LOCK_intent);
|
||||
ret = bch2_btree_node_get(c, iter, &tmp.k, level,
|
||||
SIX_LOCK_intent, may_drop_locks);
|
||||
|
||||
if (PTR_ERR_OR_ZERO(ret) == -EINTR && may_drop_locks) {
|
||||
struct btree_iter *linked;
|
||||
@ -809,7 +818,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
|
||||
btree_node_unlock(iter, level);
|
||||
|
||||
ret = bch2_btree_node_get(c, iter, &tmp.k, level,
|
||||
SIX_LOCK_intent);
|
||||
SIX_LOCK_intent, may_drop_locks);
|
||||
|
||||
/*
|
||||
* before btree_iter_relock() calls btree_iter_verify_locks():
|
||||
@ -838,20 +847,32 @@ out:
|
||||
(iter->uptodate >= BTREE_ITER_NEED_RELOCK ||
|
||||
!btree_node_locked(iter, level)));
|
||||
|
||||
if (!IS_ERR_OR_NULL(ret)) {
|
||||
struct btree *n1 = ret, *n2 = b;
|
||||
|
||||
if (sib != btree_prev_sib)
|
||||
swap(n1, n2);
|
||||
|
||||
BUG_ON(bkey_cmp(btree_type_successor(n1->btree_id,
|
||||
n1->key.k.p),
|
||||
n2->data->min_key));
|
||||
}
|
||||
|
||||
return ret;
|
||||
out_upgrade:
|
||||
if (may_drop_locks)
|
||||
bch2_btree_iter_upgrade(iter, level + 2);
|
||||
bch2_btree_iter_upgrade(iter, level + 2, true);
|
||||
ret = ERR_PTR(-EINTR);
|
||||
goto out;
|
||||
}
|
||||
|
||||
void bch2_btree_node_prefetch(struct bch_fs *c, const struct bkey_i *k,
|
||||
unsigned level, enum btree_id btree_id)
|
||||
void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
|
||||
const struct bkey_i *k, unsigned level)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
|
||||
BUG_ON(!btree_node_locked(iter, level + 1));
|
||||
BUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
|
||||
rcu_read_lock();
|
||||
@ -861,27 +882,7 @@ void bch2_btree_node_prefetch(struct bch_fs *c, const struct bkey_i *k,
|
||||
if (b)
|
||||
return;
|
||||
|
||||
b = bch2_btree_node_mem_alloc(c);
|
||||
if (IS_ERR(b))
|
||||
return;
|
||||
|
||||
bkey_copy(&b->key, k);
|
||||
if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
|
||||
/* raced with another fill: */
|
||||
|
||||
/* mark as unhashed... */
|
||||
bkey_i_to_extent(&b->key)->v._data[0] = 0;
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
list_add(&b->list, &bc->freeable);
|
||||
mutex_unlock(&bc->lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bch2_btree_node_read(c, b, false);
|
||||
out:
|
||||
six_unlock_write(&b->lock);
|
||||
six_unlock_intent(&b->lock);
|
||||
bch2_btree_node_fill(c, iter, k, level, SIX_LOCK_read, false);
|
||||
}
|
||||
|
||||
int bch2_print_btree_node(struct bch_fs *c, struct btree *b,
|
||||
|
@ -23,14 +23,14 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
|
||||
|
||||
struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
|
||||
const struct bkey_i *, unsigned,
|
||||
enum six_lock_type);
|
||||
enum six_lock_type, bool);
|
||||
|
||||
struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
|
||||
struct btree *, bool,
|
||||
enum btree_node_sibling);
|
||||
|
||||
void bch2_btree_node_prefetch(struct bch_fs *, const struct bkey_i *,
|
||||
unsigned, enum btree_id);
|
||||
void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
|
||||
const struct bkey_i *, unsigned);
|
||||
|
||||
void bch2_fs_btree_cache_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_cache_init(struct bch_fs *);
|
||||
|
@ -1547,7 +1547,7 @@ static void bch2_btree_node_write_error(struct bch_fs *c,
|
||||
|
||||
__bch2_btree_iter_init(&iter, c, b->btree_id, b->key.k.p,
|
||||
BTREE_MAX_DEPTH,
|
||||
b->level, 0);
|
||||
b->level, BTREE_ITER_NODES);
|
||||
retry:
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
if (ret)
|
||||
|
@ -18,7 +18,9 @@ static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *,
|
||||
|
||||
static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
|
||||
{
|
||||
return iter->l[l].b && iter->l[l].b != BTREE_ITER_NOT_END;
|
||||
return l < BTREE_MAX_DEPTH &&
|
||||
iter->l[l].b &&
|
||||
iter->l[l].b != BTREE_ITER_NOT_END;
|
||||
}
|
||||
|
||||
/* Btree node locking: */
|
||||
@ -88,10 +90,10 @@ static inline bool btree_node_lock_increment(struct btree_iter *iter,
|
||||
|
||||
bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
|
||||
{
|
||||
struct btree *b = iter->l[level].b;
|
||||
struct btree *b = btree_iter_node(iter, level);
|
||||
int want = __btree_lock_want(iter, level);
|
||||
|
||||
if (!is_btree_node(iter, level))
|
||||
if (!b || b == BTREE_ITER_NOT_END)
|
||||
return false;
|
||||
|
||||
if (race_fault())
|
||||
@ -115,12 +117,12 @@ static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
|
||||
if (!is_btree_node(iter, level))
|
||||
return false;
|
||||
|
||||
if (race_fault())
|
||||
return false;
|
||||
|
||||
if (btree_node_intent_locked(iter, level))
|
||||
return true;
|
||||
|
||||
if (race_fault())
|
||||
return false;
|
||||
|
||||
if (btree_node_locked(iter, level)
|
||||
? six_lock_tryupgrade(&b->lock)
|
||||
: six_relock_type(&b->lock, SIX_LOCK_intent, iter->lock_seq[level]))
|
||||
@ -180,7 +182,8 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
|
||||
bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
unsigned level,
|
||||
struct btree_iter *iter,
|
||||
enum six_lock_type type)
|
||||
enum six_lock_type type,
|
||||
bool may_drop_locks)
|
||||
{
|
||||
struct bch_fs *c = iter->c;
|
||||
struct btree_iter *linked;
|
||||
@ -231,10 +234,12 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
*/
|
||||
if (type == SIX_LOCK_intent &&
|
||||
linked->nodes_locked != linked->nodes_intent_locked) {
|
||||
linked->locks_want = max_t(unsigned,
|
||||
linked->locks_want,
|
||||
__fls(linked->nodes_locked) + 1);
|
||||
btree_iter_get_locks(linked, true);
|
||||
if (may_drop_locks) {
|
||||
linked->locks_want = max_t(unsigned,
|
||||
linked->locks_want,
|
||||
__fls(linked->nodes_locked) + 1);
|
||||
btree_iter_get_locks(linked, true);
|
||||
}
|
||||
ret = false;
|
||||
}
|
||||
|
||||
@ -245,10 +250,12 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
*/
|
||||
if (linked->btree_id == iter->btree_id &&
|
||||
level > __fls(linked->nodes_locked)) {
|
||||
linked->locks_want = max_t(unsigned,
|
||||
linked->locks_want,
|
||||
iter->locks_want);
|
||||
btree_iter_get_locks(linked, true);
|
||||
if (may_drop_locks) {
|
||||
linked->locks_want = max_t(unsigned,
|
||||
linked->locks_want,
|
||||
iter->locks_want);
|
||||
btree_iter_get_locks(linked, true);
|
||||
}
|
||||
ret = false;
|
||||
}
|
||||
}
|
||||
@ -265,11 +272,6 @@ void bch2_btree_iter_verify_locks(struct btree_iter *iter)
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
if (iter->uptodate == BTREE_ITER_END) {
|
||||
BUG_ON(iter->nodes_locked);
|
||||
return;
|
||||
}
|
||||
|
||||
for (l = 0; btree_iter_node(iter, l); l++) {
|
||||
if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
|
||||
!btree_node_locked(iter, l))
|
||||
@ -284,13 +286,9 @@ void bch2_btree_iter_verify_locks(struct btree_iter *iter)
|
||||
__flatten
|
||||
static bool __bch2_btree_iter_relock(struct btree_iter *iter)
|
||||
{
|
||||
if (iter->uptodate < BTREE_ITER_NEED_RELOCK)
|
||||
return true;
|
||||
|
||||
if (iter->uptodate > BTREE_ITER_NEED_TRAVERSE)
|
||||
return false;
|
||||
|
||||
return btree_iter_get_locks(iter, false);
|
||||
return iter->uptodate >= BTREE_ITER_NEED_RELOCK
|
||||
? btree_iter_get_locks(iter, false)
|
||||
: true;
|
||||
}
|
||||
|
||||
bool bch2_btree_iter_relock(struct btree_iter *iter)
|
||||
@ -332,6 +330,30 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *iter,
|
||||
unsigned new_locks_want)
|
||||
{
|
||||
unsigned l = iter->level;
|
||||
|
||||
EBUG_ON(iter->locks_want >= new_locks_want);
|
||||
|
||||
iter->locks_want = new_locks_want;
|
||||
|
||||
do {
|
||||
if (!btree_iter_node(iter, l))
|
||||
break;
|
||||
|
||||
if (!bch2_btree_node_upgrade(iter, l)) {
|
||||
iter->locks_want = l;
|
||||
return false;
|
||||
}
|
||||
|
||||
l++;
|
||||
} while (l < iter->locks_want);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void __bch2_btree_iter_downgrade(struct btree_iter *iter,
|
||||
unsigned downgrade_to)
|
||||
{
|
||||
@ -419,6 +441,12 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
|
||||
panic("next key should be before iter pos:\n%llu:%llu\n%s\n",
|
||||
iter->pos.inode, iter->pos.offset, buf);
|
||||
}
|
||||
|
||||
if (iter->uptodate == BTREE_ITER_UPTODATE &&
|
||||
(iter->flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES) {
|
||||
BUG_ON(!bkey_whiteout(&iter->k) &&
|
||||
bch2_btree_node_iter_end(&l->iter));
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
|
||||
@ -453,6 +481,8 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
|
||||
if (new_u64s &&
|
||||
btree_iter_pos_cmp_packed(b, &iter->pos, where,
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS)) {
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
|
||||
|
||||
bch2_btree_node_iter_push(node_iter, b, where, end);
|
||||
|
||||
if (!b->level &&
|
||||
@ -482,6 +512,8 @@ found:
|
||||
goto iter_current_key_not_modified;
|
||||
}
|
||||
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
|
||||
|
||||
bch2_btree_node_iter_sort(node_iter, b);
|
||||
if (!b->level && node_iter == &iter->l[0].iter)
|
||||
__btree_iter_peek_all(iter, &iter->l[0], &iter->k);
|
||||
@ -666,7 +698,8 @@ static inline bool btree_iter_pos_cmp(struct btree_iter *iter,
|
||||
static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
|
||||
struct btree *b)
|
||||
{
|
||||
return !btree_iter_pos_cmp(iter, &b->key.k);
|
||||
return !btree_iter_pos_cmp(iter, &b->key.k) &&
|
||||
bkey_cmp(b->key.k.p, POS_MAX);
|
||||
}
|
||||
|
||||
static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
|
||||
@ -788,7 +821,7 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
|
||||
|
||||
lock_type = __btree_lock_want(iter, iter->level);
|
||||
if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
|
||||
iter, lock_type)))
|
||||
iter, lock_type, true)))
|
||||
return -EINTR;
|
||||
|
||||
if (likely(b == c->btree_roots[iter->btree_id].b &&
|
||||
@ -830,9 +863,8 @@ static void btree_iter_prefetch(struct btree_iter *iter)
|
||||
break;
|
||||
|
||||
bch2_bkey_unpack(l->b, &tmp.k, k);
|
||||
bch2_btree_node_prefetch(iter->c, &tmp.k,
|
||||
iter->level - 1,
|
||||
iter->btree_id);
|
||||
bch2_btree_node_prefetch(iter->c, iter, &tmp.k,
|
||||
iter->level - 1);
|
||||
}
|
||||
|
||||
if (!was_locked)
|
||||
@ -852,7 +884,7 @@ static inline int btree_iter_down(struct btree_iter *iter)
|
||||
bch2_bkey_unpack(l->b, &tmp.k,
|
||||
bch2_btree_node_iter_peek(&l->iter, l->b));
|
||||
|
||||
b = bch2_btree_node_get(iter->c, iter, &tmp.k, level, lock_type);
|
||||
b = bch2_btree_node_get(iter->c, iter, &tmp.k, level, lock_type, true);
|
||||
if (unlikely(IS_ERR(b)))
|
||||
return PTR_ERR(b);
|
||||
|
||||
@ -872,12 +904,6 @@ static void btree_iter_up(struct btree_iter *iter)
|
||||
btree_node_unlock(iter, iter->level++);
|
||||
}
|
||||
|
||||
static void btree_iter_set_end(struct btree_iter *iter)
|
||||
{
|
||||
iter->uptodate = BTREE_ITER_END;
|
||||
__bch2_btree_iter_unlock(iter);
|
||||
}
|
||||
|
||||
int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
|
||||
|
||||
static int btree_iter_traverse_error(struct btree_iter *iter, int ret)
|
||||
@ -954,6 +980,24 @@ io_error:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
|
||||
bool check_pos)
|
||||
{
|
||||
unsigned l = iter->level;
|
||||
|
||||
while (btree_iter_node(iter, l) &&
|
||||
!(is_btree_node(iter, l) &&
|
||||
bch2_btree_node_relock(iter, l) &&
|
||||
(!check_pos ||
|
||||
btree_iter_pos_in_node(iter, iter->l[l].b)))) {
|
||||
btree_node_unlock(iter, l);
|
||||
iter->l[l].b = BTREE_ITER_NOT_END;
|
||||
l++;
|
||||
}
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the main state machine for walking down the btree - walks down to a
|
||||
* specified depth
|
||||
@ -967,45 +1011,19 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
{
|
||||
unsigned depth_want = iter->level;
|
||||
|
||||
if (unlikely(iter->uptodate == BTREE_ITER_END))
|
||||
if (unlikely(iter->level >= BTREE_MAX_DEPTH))
|
||||
return 0;
|
||||
|
||||
BUG_ON(iter->level >= BTREE_MAX_DEPTH);
|
||||
BUG_ON(!iter->l[iter->level].b);
|
||||
if (__bch2_btree_iter_relock(iter))
|
||||
return 0;
|
||||
|
||||
iter->flags &= ~BTREE_ITER_AT_END_OF_LEAF;
|
||||
|
||||
/* make sure we have all the intent locks we need - ugh */
|
||||
if (unlikely(iter->l[iter->level].b &&
|
||||
iter->level + 1 < iter->locks_want)) {
|
||||
unsigned i;
|
||||
|
||||
for (i = iter->level + 1;
|
||||
i < iter->locks_want && iter->l[i].b;
|
||||
i++)
|
||||
if (!bch2_btree_node_relock(iter, i)) {
|
||||
while (iter->level < BTREE_MAX_DEPTH &&
|
||||
iter->l[iter->level].b &&
|
||||
iter->level + 1 < iter->locks_want)
|
||||
btree_iter_up(iter);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the current node isn't locked, go up until we have a locked node
|
||||
* or run out of nodes:
|
||||
* XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
|
||||
* here unnecessary
|
||||
*/
|
||||
while (btree_iter_node(iter, iter->level) &&
|
||||
!(is_btree_node(iter, iter->level) &&
|
||||
bch2_btree_node_relock(iter, iter->level) &&
|
||||
|
||||
/*
|
||||
* XXX: correctly using BTREE_ITER_UPTODATE should make
|
||||
* comparing iter->pos against node's key unnecessary
|
||||
*/
|
||||
btree_iter_pos_in_node(iter, iter->l[iter->level].b)))
|
||||
btree_iter_up(iter);
|
||||
iter->level = btree_iter_up_until_locked(iter, true);
|
||||
|
||||
/*
|
||||
* If we've got a btree node locked (i.e. we aren't about to relock the
|
||||
@ -1049,9 +1067,6 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (__bch2_btree_iter_relock(iter))
|
||||
return 0;
|
||||
|
||||
ret = __bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret))
|
||||
ret = btree_iter_traverse_error(iter, ret);
|
||||
@ -1061,6 +1076,18 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void bch2_btree_iter_checks(struct btree_iter *iter,
|
||||
enum btree_iter_type type)
|
||||
{
|
||||
EBUG_ON(iter->btree_id >= BTREE_ID_NR);
|
||||
EBUG_ON((iter->flags & BTREE_ITER_TYPE) != type);
|
||||
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
|
||||
(iter->btree_id == BTREE_ID_EXTENTS &&
|
||||
type != BTREE_ITER_NODES));
|
||||
|
||||
bch2_btree_iter_verify_locks(iter);
|
||||
}
|
||||
|
||||
/* Iterate across nodes (leaf and interior nodes) */
|
||||
|
||||
struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
|
||||
@ -1068,24 +1095,18 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
|
||||
struct btree *b;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
|
||||
bch2_btree_iter_verify_locks(iter);
|
||||
bch2_btree_iter_checks(iter, BTREE_ITER_NODES);
|
||||
|
||||
if (iter->uptodate == BTREE_ITER_UPTODATE)
|
||||
return iter->l[iter->level].b;
|
||||
|
||||
if (unlikely(iter->uptodate == BTREE_ITER_END))
|
||||
return NULL;
|
||||
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
b = iter->l[iter->level].b;
|
||||
if (!b) {
|
||||
btree_iter_set_end(iter);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
b = btree_iter_node(iter, iter->level);
|
||||
if (!b)
|
||||
return NULL;
|
||||
|
||||
BUG_ON(bkey_cmp(b->key.k.p, iter->pos) < 0);
|
||||
|
||||
@ -1100,25 +1121,25 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
|
||||
struct btree *b;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
|
||||
bch2_btree_iter_verify_locks(iter);
|
||||
bch2_btree_iter_checks(iter, BTREE_ITER_NODES);
|
||||
|
||||
/* already got to end? */
|
||||
if (!btree_iter_node(iter, iter->level))
|
||||
return NULL;
|
||||
|
||||
btree_iter_up(iter);
|
||||
|
||||
if (!btree_iter_node(iter, iter->level)) {
|
||||
btree_iter_set_end(iter);
|
||||
if (!bch2_btree_node_relock(iter, iter->level))
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
|
||||
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!bch2_btree_node_relock(iter, iter->level)) {
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
b = iter->l[iter->level].b;
|
||||
BUG_ON(!b);
|
||||
/* got to end? */
|
||||
b = btree_iter_node(iter, iter->level);
|
||||
if (!b)
|
||||
return NULL;
|
||||
|
||||
if (bkey_cmp(iter->pos, b->key.k.p) < 0) {
|
||||
/*
|
||||
@ -1150,6 +1171,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
|
||||
}
|
||||
|
||||
iter->pos = b->key.k.p;
|
||||
iter->uptodate = BTREE_ITER_UPTODATE;
|
||||
|
||||
return b;
|
||||
}
|
||||
@ -1182,10 +1204,68 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
|
||||
|
||||
void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
|
||||
{
|
||||
EBUG_ON(bkey_cmp(new_pos, iter->pos) < 0); /* XXX handle this */
|
||||
int cmp = bkey_cmp(new_pos, iter->pos);
|
||||
unsigned level;
|
||||
|
||||
if (!cmp)
|
||||
return;
|
||||
|
||||
iter->pos = new_pos;
|
||||
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
|
||||
level = btree_iter_up_until_locked(iter, true);
|
||||
|
||||
if (btree_iter_node(iter, level)) {
|
||||
unsigned nr_advanced = 0;
|
||||
struct btree_iter_level *l = &iter->l[level];
|
||||
struct bkey_s_c k;
|
||||
struct bkey u;
|
||||
|
||||
/*
|
||||
* We might have to skip over many keys, or just a few: try
|
||||
* advancing the node iterator, and if we have to skip over too
|
||||
* many keys just reinit it (or if we're rewinding, since that
|
||||
* is expensive).
|
||||
*/
|
||||
if (cmp > 0) {
|
||||
while ((k = __btree_iter_peek_all(iter, l, &u)).k &&
|
||||
!btree_iter_pos_cmp(iter, k.k)) {
|
||||
if (nr_advanced > 8)
|
||||
goto reinit_node;
|
||||
|
||||
__btree_iter_advance(l);
|
||||
nr_advanced++;
|
||||
}
|
||||
} else {
|
||||
reinit_node:
|
||||
__btree_iter_init(iter, iter->l[level].b);
|
||||
}
|
||||
|
||||
/* Don't leave it locked if we're not supposed to: */
|
||||
if (btree_lock_want(iter, level) == BTREE_NODE_UNLOCKED)
|
||||
btree_node_unlock(iter, level);
|
||||
}
|
||||
|
||||
if (level != iter->level)
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
|
||||
else
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
struct bkey_s_c ret = { .k = &iter->k };
|
||||
|
||||
if (!bkey_deleted(&iter->k)) {
|
||||
EBUG_ON(bch2_btree_node_iter_end(&l->iter));
|
||||
ret.v = bkeyp_val(&l->b->format,
|
||||
__bch2_btree_node_iter_peek_all(&l->iter, l->b));
|
||||
}
|
||||
|
||||
if (debug_check_bkeys(iter->c) &&
|
||||
!bkey_deleted(ret.k))
|
||||
bch2_bkey_debugcheck(iter->c, l->b, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
@ -1194,26 +1274,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
|
||||
(iter->btree_id == BTREE_ID_EXTENTS));
|
||||
EBUG_ON(iter->flags & BTREE_ITER_SLOTS);
|
||||
bch2_btree_iter_verify_locks(iter);
|
||||
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
|
||||
|
||||
if (iter->uptodate == BTREE_ITER_UPTODATE) {
|
||||
struct bkey_packed *k =
|
||||
__bch2_btree_node_iter_peek_all(&l->iter, l->b);
|
||||
struct bkey_s_c ret = {
|
||||
.k = &iter->k,
|
||||
.v = bkeyp_val(&l->b->format, k)
|
||||
};
|
||||
|
||||
if (debug_check_bkeys(iter->c))
|
||||
bch2_bkey_debugcheck(iter->c, l->b, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (iter->uptodate == BTREE_ITER_END)
|
||||
return bkey_s_c_null;
|
||||
if (iter->uptodate == BTREE_ITER_UPTODATE)
|
||||
return btree_iter_peek_uptodate(iter);
|
||||
|
||||
while (1) {
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
@ -1225,14 +1289,13 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
break;
|
||||
|
||||
/* got to the end of the leaf, iterator needs to be traversed: */
|
||||
iter->pos = l->b->key.k.p;
|
||||
if (!bkey_cmp(iter->pos, POS_MAX)) {
|
||||
btree_iter_set_end(iter);
|
||||
iter->pos = l->b->key.k.p;
|
||||
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
|
||||
if (!bkey_cmp(iter->pos, POS_MAX))
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
iter->pos = btree_type_successor(iter->btree_id, iter->pos);
|
||||
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1252,14 +1315,13 @@ struct bkey_s_c bch2_btree_iter_peek_next_leaf(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
|
||||
iter->pos = l->b->key.k.p;
|
||||
if (!bkey_cmp(iter->pos, POS_MAX)) {
|
||||
btree_iter_set_end(iter);
|
||||
iter->pos = l->b->key.k.p;
|
||||
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
|
||||
if (!bkey_cmp(iter->pos, POS_MAX))
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
iter->pos = btree_type_successor(iter->btree_id, iter->pos);
|
||||
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
|
||||
return bch2_btree_iter_peek(iter);
|
||||
}
|
||||
@ -1270,10 +1332,7 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
|
||||
struct bkey_packed *p;
|
||||
struct bkey_s_c k;
|
||||
|
||||
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
|
||||
(iter->btree_id == BTREE_ID_EXTENTS));
|
||||
EBUG_ON(iter->flags & BTREE_ITER_SLOTS);
|
||||
bch2_btree_iter_verify_locks(iter);
|
||||
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
|
||||
|
||||
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
|
||||
k = bch2_btree_iter_peek(iter);
|
||||
@ -1286,7 +1345,7 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
|
||||
p = bch2_btree_node_iter_peek_all(&l->iter, l->b);
|
||||
if (unlikely(!p))
|
||||
return bch2_btree_iter_peek_next_leaf(iter);
|
||||
} while (bkey_deleted(p));
|
||||
} while (bkey_whiteout(p));
|
||||
|
||||
k = __btree_iter_unpack(iter, l, &iter->k, p);
|
||||
|
||||
@ -1295,6 +1354,51 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
|
||||
return k;
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
struct bkey_packed *p;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
|
||||
|
||||
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
|
||||
k = bch2_btree_iter_peek(iter);
|
||||
if (IS_ERR(k.k))
|
||||
return k;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
p = bch2_btree_node_iter_prev(&l->iter, l->b);
|
||||
if (likely(p))
|
||||
break;
|
||||
|
||||
iter->pos = l->b->data->min_key;
|
||||
if (!bkey_cmp(iter->pos, POS_MIN))
|
||||
return bkey_s_c_null;
|
||||
|
||||
bch2_btree_iter_set_pos(iter,
|
||||
btree_type_predecessor(iter->btree_id, iter->pos));
|
||||
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret))
|
||||
return bkey_s_c_err(ret);
|
||||
|
||||
p = bch2_btree_node_iter_peek(&l->iter, l->b);
|
||||
if (p)
|
||||
break;
|
||||
}
|
||||
|
||||
k = __btree_iter_unpack(iter, l, &iter->k, p);
|
||||
|
||||
EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0);
|
||||
|
||||
iter->pos = bkey_start_pos(k.k);
|
||||
iter->uptodate = BTREE_ITER_UPTODATE;
|
||||
return k;
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c
|
||||
__bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
{
|
||||
@ -1309,13 +1413,6 @@ recheck:
|
||||
bkey_cmp(bkey_start_pos(k.k), iter->pos) == 0)
|
||||
__btree_iter_advance(l);
|
||||
|
||||
if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
|
||||
EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0);
|
||||
EBUG_ON(bkey_deleted(k.k));
|
||||
iter->uptodate = BTREE_ITER_UPTODATE;
|
||||
return k;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we got to the end of the node, check if we need to traverse to the
|
||||
* next node:
|
||||
@ -1329,21 +1426,35 @@ recheck:
|
||||
goto recheck;
|
||||
}
|
||||
|
||||
if (k.k &&
|
||||
!bkey_whiteout(k.k) &&
|
||||
bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
|
||||
EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0);
|
||||
EBUG_ON(bkey_deleted(k.k));
|
||||
iter->uptodate = BTREE_ITER_UPTODATE;
|
||||
return k;
|
||||
}
|
||||
|
||||
/* hole */
|
||||
bkey_init(&n);
|
||||
n.p = iter->pos;
|
||||
|
||||
if (iter->flags & BTREE_ITER_IS_EXTENTS) {
|
||||
if (n.p.offset == KEY_OFFSET_MAX) {
|
||||
if (n.p.inode == KEY_INODE_MAX) {
|
||||
btree_iter_set_end(iter);
|
||||
if (n.p.inode == KEY_INODE_MAX)
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
iter->pos = bkey_successor(iter->pos);
|
||||
goto recheck;
|
||||
}
|
||||
|
||||
if (k.k && bkey_whiteout(k.k)) {
|
||||
struct btree_node_iter node_iter = l->iter;
|
||||
|
||||
k = __btree_iter_unpack(iter, l, &iter->k,
|
||||
bch2_btree_node_iter_peek(&node_iter, l->b));
|
||||
}
|
||||
|
||||
if (!k.k)
|
||||
k.k = &l->b->key.k;
|
||||
|
||||
@ -1357,35 +1468,19 @@ recheck:
|
||||
EBUG_ON(!n.size);
|
||||
}
|
||||
|
||||
iter->k = n;
|
||||
iter->k = n;
|
||||
iter->uptodate = BTREE_ITER_UPTODATE;
|
||||
return (struct bkey_s_c) { &iter->k, NULL };
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
int ret;
|
||||
|
||||
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
|
||||
(iter->btree_id == BTREE_ID_EXTENTS));
|
||||
EBUG_ON(!(iter->flags & BTREE_ITER_SLOTS));
|
||||
bch2_btree_iter_verify_locks(iter);
|
||||
bch2_btree_iter_checks(iter, BTREE_ITER_SLOTS);
|
||||
|
||||
if (iter->uptodate == BTREE_ITER_UPTODATE) {
|
||||
struct bkey_s_c ret = { .k = &iter->k };
|
||||
|
||||
if (!bkey_deleted(&iter->k))
|
||||
ret.v = bkeyp_val(&l->b->format,
|
||||
__bch2_btree_node_iter_peek_all(&l->iter, l->b));
|
||||
|
||||
if (debug_check_bkeys(iter->c))
|
||||
bch2_bkey_debugcheck(iter->c, l->b, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (iter->uptodate == BTREE_ITER_END)
|
||||
return bkey_s_c_null;
|
||||
if (iter->uptodate == BTREE_ITER_UPTODATE)
|
||||
return btree_iter_peek_uptodate(iter);
|
||||
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret))
|
||||
@ -1396,10 +1491,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
|
||||
{
|
||||
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
|
||||
(iter->btree_id == BTREE_ID_EXTENTS));
|
||||
EBUG_ON(!(iter->flags & BTREE_ITER_SLOTS));
|
||||
bch2_btree_iter_verify_locks(iter);
|
||||
bch2_btree_iter_checks(iter, BTREE_ITER_SLOTS);
|
||||
|
||||
iter->pos = btree_type_successor(iter->btree_id, iter->k.p);
|
||||
|
||||
@ -1417,6 +1509,8 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
|
||||
if (!bkey_deleted(&iter->k))
|
||||
__btree_iter_advance(&iter->l[0]);
|
||||
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
|
||||
|
||||
return __bch2_btree_iter_peek_slot(iter);
|
||||
}
|
||||
|
||||
@ -1446,10 +1540,6 @@ void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c,
|
||||
iter->l[iter->level].b = BTREE_ITER_NOT_END;
|
||||
iter->next = iter;
|
||||
|
||||
if (unlikely((flags & BTREE_ITER_IS_EXTENTS) &&
|
||||
!bkey_cmp(pos, POS_MAX)))
|
||||
iter->uptodate = BTREE_ITER_END;
|
||||
|
||||
prefetch(c->btree_roots[btree_id].b);
|
||||
}
|
||||
|
||||
|
@ -106,14 +106,18 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
|
||||
int bch2_btree_iter_unlock(struct btree_iter *);
|
||||
|
||||
bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
|
||||
bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
|
||||
|
||||
static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
|
||||
unsigned new_locks_want)
|
||||
unsigned new_locks_want,
|
||||
bool may_drop_locks)
|
||||
{
|
||||
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
|
||||
|
||||
return iter->locks_want < new_locks_want
|
||||
? __bch2_btree_iter_upgrade(iter, new_locks_want)
|
||||
? (may_drop_locks
|
||||
? __bch2_btree_iter_upgrade(iter, new_locks_want)
|
||||
: __bch2_btree_iter_upgrade_nounlock(iter, new_locks_want))
|
||||
: iter->uptodate <= BTREE_ITER_NEED_PEEK;
|
||||
}
|
||||
|
||||
@ -137,6 +141,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *, unsigned);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
|
||||
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
|
||||
@ -175,6 +180,19 @@ static inline struct bpos btree_type_successor(enum btree_id id,
|
||||
return pos;
|
||||
}
|
||||
|
||||
static inline struct bpos btree_type_predecessor(enum btree_id id,
|
||||
struct bpos pos)
|
||||
{
|
||||
if (id == BTREE_ID_INODES) {
|
||||
--pos.inode;
|
||||
pos.offset = 0;
|
||||
} else /* if (id != BTREE_ID_EXTENTS) */ {
|
||||
pos = bkey_predecessor(pos);
|
||||
}
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
static inline int __btree_iter_cmp(enum btree_id id,
|
||||
struct bpos pos,
|
||||
const struct btree_iter *r)
|
||||
@ -207,7 +225,8 @@ static inline void bch2_btree_iter_cond_resched(struct btree_iter *iter)
|
||||
#define __for_each_btree_node(_iter, _c, _btree_id, _start, \
|
||||
_locks_want, _depth, _flags, _b) \
|
||||
for (__bch2_btree_iter_init((_iter), (_c), (_btree_id), _start, \
|
||||
_locks_want, _depth, _flags), \
|
||||
_locks_want, _depth, \
|
||||
_flags|BTREE_ITER_NODES), \
|
||||
_b = bch2_btree_iter_peek_node(_iter); \
|
||||
(_b); \
|
||||
(_b) = bch2_btree_iter_next_node(_iter, _depth))
|
||||
|
@ -147,17 +147,19 @@ static inline void btree_node_lock_type(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
|
||||
bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
|
||||
struct btree_iter *, enum six_lock_type);
|
||||
struct btree_iter *, enum six_lock_type, bool);
|
||||
|
||||
static inline bool btree_node_lock(struct btree *b, struct bpos pos,
|
||||
unsigned level,
|
||||
struct btree_iter *iter,
|
||||
enum six_lock_type type)
|
||||
enum six_lock_type type,
|
||||
bool may_drop_locks)
|
||||
{
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
|
||||
return likely(six_trylock_type(&b->lock, type)) ||
|
||||
__bch2_btree_node_lock(b, pos, level, iter, type);
|
||||
__bch2_btree_node_lock(b, pos, level, iter,
|
||||
type, may_drop_locks);
|
||||
}
|
||||
|
||||
bool __bch2_btree_node_relock(struct btree_iter *, unsigned);
|
||||
|
@ -182,26 +182,32 @@ struct btree_node_iter {
|
||||
} data[MAX_BSETS];
|
||||
};
|
||||
|
||||
#define BTREE_ITER_SLOTS (1 << 0)
|
||||
#define BTREE_ITER_INTENT (1 << 1)
|
||||
#define BTREE_ITER_PREFETCH (1 << 2)
|
||||
enum btree_iter_type {
|
||||
BTREE_ITER_KEYS,
|
||||
BTREE_ITER_SLOTS,
|
||||
BTREE_ITER_NODES,
|
||||
};
|
||||
|
||||
#define BTREE_ITER_TYPE ((1 << 2) - 1)
|
||||
|
||||
#define BTREE_ITER_INTENT (1 << 2)
|
||||
#define BTREE_ITER_PREFETCH (1 << 3)
|
||||
/*
|
||||
* Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
|
||||
* @pos or the first key strictly greater than @pos
|
||||
*/
|
||||
#define BTREE_ITER_IS_EXTENTS (1 << 3)
|
||||
#define BTREE_ITER_IS_EXTENTS (1 << 4)
|
||||
/*
|
||||
* indicates we need to call bch2_btree_iter_traverse() to revalidate iterator:
|
||||
*/
|
||||
#define BTREE_ITER_AT_END_OF_LEAF (1 << 4)
|
||||
#define BTREE_ITER_ERROR (1 << 5)
|
||||
#define BTREE_ITER_AT_END_OF_LEAF (1 << 5)
|
||||
#define BTREE_ITER_ERROR (1 << 6)
|
||||
|
||||
enum btree_iter_uptodate {
|
||||
BTREE_ITER_UPTODATE = 0,
|
||||
BTREE_ITER_NEED_PEEK = 1,
|
||||
BTREE_ITER_NEED_RELOCK = 2,
|
||||
BTREE_ITER_NEED_TRAVERSE = 3,
|
||||
BTREE_ITER_END = 4,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -216,7 +222,7 @@ struct btree_iter {
|
||||
struct bpos pos;
|
||||
|
||||
u8 flags;
|
||||
unsigned uptodate:4;
|
||||
enum btree_iter_uptodate uptodate:4;
|
||||
enum btree_id btree_id:4;
|
||||
unsigned level:4,
|
||||
locks_want:4,
|
||||
|
@ -1586,7 +1586,8 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
|
||||
* XXX: figure out how far we might need to split,
|
||||
* instead of locking/reserving all the way to the root:
|
||||
*/
|
||||
if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
|
||||
if (!bch2_btree_iter_upgrade(iter, U8_MAX,
|
||||
!(flags & BTREE_INSERT_NOUNLOCK))) {
|
||||
ret = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
@ -1694,7 +1695,8 @@ retry:
|
||||
if (!down_read_trylock(&c->gc_lock))
|
||||
goto err_cycle_gc_lock;
|
||||
|
||||
if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
|
||||
if (!bch2_btree_iter_upgrade(iter, U8_MAX,
|
||||
!(flags & BTREE_INSERT_NOUNLOCK))) {
|
||||
ret = -EINTR;
|
||||
goto err_unlock;
|
||||
}
|
||||
@ -1857,7 +1859,7 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
bch2_btree_iter_upgrade(iter, U8_MAX);
|
||||
bch2_btree_iter_upgrade(iter, U8_MAX, true);
|
||||
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) {
|
||||
if (!down_read_trylock(&c->gc_lock)) {
|
||||
@ -2000,7 +2002,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
if (!bch2_btree_iter_upgrade(iter, U8_MAX))
|
||||
if (!bch2_btree_iter_upgrade(iter, U8_MAX, true))
|
||||
return -EINTR;
|
||||
|
||||
if (!down_read_trylock(&c->gc_lock)) {
|
||||
|
@ -199,14 +199,17 @@ void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
|
||||
static inline unsigned btree_update_reserve_required(struct bch_fs *c,
|
||||
struct btree *b)
|
||||
{
|
||||
unsigned depth = btree_node_root(c, b)->level - b->level + 1;
|
||||
unsigned depth = btree_node_root(c, b)->level + 1;
|
||||
|
||||
/*
|
||||
* Number of nodes we might have to allocate in a worst case btree
|
||||
* split operation - we split all the way up to the root, then allocate
|
||||
* a new root.
|
||||
* a new root, unless we're already at max depth:
|
||||
*/
|
||||
return depth * 2 + 1;
|
||||
if (depth < BTREE_MAX_DEPTH)
|
||||
return (depth - b->level) * 2 + 1;
|
||||
else
|
||||
return (depth - b->level) * 2 - 1;
|
||||
}
|
||||
|
||||
static inline void btree_node_reset_sib_u64s(struct btree *b)
|
||||
|
@ -205,8 +205,6 @@ btree_insert_key_leaf(struct btree_insert *trans,
|
||||
int old_live_u64s = b->nr.live_u64s;
|
||||
int live_u64s_added, u64s_added;
|
||||
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
|
||||
|
||||
ret = !btree_node_is_extents(b)
|
||||
? bch2_insert_fixup_key(trans, insert)
|
||||
: bch2_insert_fixup_extent(trans, insert);
|
||||
@ -430,9 +428,9 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
|
||||
BUG_ON(i->iter->level);
|
||||
BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
|
||||
BUG_ON(debug_check_bkeys(c) &&
|
||||
!bkey_deleted(&i->k->k) &&
|
||||
bch2_bkey_invalid(c, i->iter->btree_id,
|
||||
bkey_i_to_s_c(i->k)));
|
||||
BUG_ON(i->iter->uptodate == BTREE_ITER_END);
|
||||
}
|
||||
|
||||
bubble_sort(trans->entries, trans->nr, btree_trans_cmp);
|
||||
@ -444,7 +442,7 @@ retry:
|
||||
cycle_gc_lock = false;
|
||||
|
||||
trans_for_each_entry(trans, i) {
|
||||
if (!bch2_btree_iter_upgrade(i->iter, 1)) {
|
||||
if (!bch2_btree_iter_upgrade(i->iter, 1, true)) {
|
||||
ret = -EINTR;
|
||||
goto err;
|
||||
}
|
||||
@ -647,11 +645,6 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
if (bkey_cmp(iter.pos, end) >= 0)
|
||||
break;
|
||||
|
||||
if (k.k->type == KEY_TYPE_DISCARD) {
|
||||
bch2_btree_iter_next(&iter);
|
||||
continue;
|
||||
}
|
||||
|
||||
bkey_init(&delete.k);
|
||||
|
||||
/*
|
||||
@ -668,15 +661,6 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
delete.k.version = version;
|
||||
|
||||
if (iter.flags & BTREE_ITER_IS_EXTENTS) {
|
||||
/*
|
||||
* The extents btree is special - KEY_TYPE_DISCARD is
|
||||
* used for deletions, not KEY_TYPE_DELETED. This is an
|
||||
* internal implementation detail that probably
|
||||
* shouldn't be exposed (internally, KEY_TYPE_DELETED is
|
||||
* used as a proxy for k->size == 0):
|
||||
*/
|
||||
delete.k.type = KEY_TYPE_DISCARD;
|
||||
|
||||
/* create the biggest key we can */
|
||||
bch2_key_resize(&delete.k, max_sectors);
|
||||
bch2_cut_back(end, &delete.k);
|
||||
|
@ -97,7 +97,11 @@ const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
if (!len)
|
||||
return "empty name";
|
||||
|
||||
if (bkey_val_u64s(k.k) > dirent_val_u64s(len))
|
||||
/*
|
||||
* older versions of bcachefs were buggy and creating dirent
|
||||
* keys that were bigger than necessary:
|
||||
*/
|
||||
if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7))
|
||||
return "value too big";
|
||||
|
||||
if (len > BCH_NAME_MAX)
|
||||
|
@ -778,7 +778,7 @@ static bool __bch2_cut_front(struct bpos where, struct bkey_s k)
|
||||
* cause offset to point to the next bucket:
|
||||
*/
|
||||
if (!len)
|
||||
__set_bkey_deleted(k.k);
|
||||
k.k->type = KEY_TYPE_DELETED;
|
||||
else if (bkey_extent_is_data(k.k)) {
|
||||
struct bkey_s_extent e = bkey_s_to_extent(k);
|
||||
union bch_extent_entry *entry;
|
||||
@ -833,7 +833,7 @@ bool bch2_cut_back(struct bpos where, struct bkey *k)
|
||||
k->size = len;
|
||||
|
||||
if (!len)
|
||||
__set_bkey_deleted(k);
|
||||
k->type = KEY_TYPE_DELETED;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1103,7 +1103,7 @@ static void bch2_drop_subtract(struct extent_insert_state *s, struct bkey_s k)
|
||||
bch2_subtract_sectors(s, k.s_c,
|
||||
bkey_start_offset(k.k), k.k->size);
|
||||
k.k->size = 0;
|
||||
__set_bkey_deleted(k.k);
|
||||
k.k->type = KEY_TYPE_DELETED;
|
||||
}
|
||||
|
||||
static bool bch2_extent_merge_inline(struct bch_fs *,
|
||||
@ -1143,10 +1143,13 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
|
||||
struct bset_tree *t = bset_tree_last(l->b);
|
||||
struct bkey_packed *where =
|
||||
bch2_btree_node_iter_bset_pos(&l->iter, l->b, t);
|
||||
struct bkey_packed *prev = bch2_bkey_prev(l->b, t, where);
|
||||
struct bkey_packed *prev = bch2_bkey_prev_filter(l->b, t, where,
|
||||
KEY_TYPE_DISCARD);
|
||||
struct bkey_packed *next_live_key = where;
|
||||
unsigned clobber_u64s;
|
||||
|
||||
EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
|
||||
|
||||
if (prev)
|
||||
where = bkey_next(prev);
|
||||
|
||||
@ -1188,6 +1191,7 @@ static void extent_insert_committed(struct extent_insert_state *s)
|
||||
: &s->whiteout;
|
||||
BKEY_PADDED(k) split;
|
||||
|
||||
EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
|
||||
EBUG_ON(bkey_cmp(insert->k.p, s->committed) < 0);
|
||||
EBUG_ON(bkey_cmp(s->committed, bkey_start_pos(&insert->k)) < 0);
|
||||
|
||||
@ -1246,8 +1250,6 @@ __extent_insert_advance_pos(struct extent_insert_state *s,
|
||||
else
|
||||
ret = BTREE_INSERT_OK;
|
||||
|
||||
EBUG_ON(bkey_deleted(&s->insert->k->k) || !s->insert->k->k.size);
|
||||
|
||||
if (ret == BTREE_INSERT_OK)
|
||||
s->committed = next_pos;
|
||||
|
||||
@ -1446,6 +1448,7 @@ __bch2_delete_fixup_extent(struct extent_insert_state *s)
|
||||
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
|
||||
|
||||
s->whiteout = *insert;
|
||||
s->whiteout.k.type = KEY_TYPE_DISCARD;
|
||||
|
||||
while (bkey_cmp(s->committed, insert->k.p) < 0 &&
|
||||
(ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK &&
|
||||
@ -1488,6 +1491,8 @@ __bch2_delete_fixup_extent(struct extent_insert_state *s)
|
||||
bset_written(b, bset(b, t))) {
|
||||
struct bkey_i discard = *insert;
|
||||
|
||||
discard.k.type = KEY_TYPE_DISCARD;
|
||||
|
||||
switch (overlap) {
|
||||
case BCH_EXTENT_OVERLAP_FRONT:
|
||||
bch2_cut_front(bkey_start_pos(k.k), &discard);
|
||||
@ -1634,7 +1639,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
|
||||
};
|
||||
|
||||
EBUG_ON(iter->level);
|
||||
EBUG_ON(bkey_deleted(&insert->k->k) || !insert->k->k.size);
|
||||
EBUG_ON(!insert->k->k.size);
|
||||
|
||||
/*
|
||||
* As we process overlapping extents, we advance @iter->pos both to
|
||||
@ -1979,11 +1984,11 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
|
||||
return false;
|
||||
|
||||
case KEY_TYPE_DELETED:
|
||||
case KEY_TYPE_COOKIE:
|
||||
return true;
|
||||
|
||||
case KEY_TYPE_DISCARD:
|
||||
return bversion_zero(k.k->version);
|
||||
case KEY_TYPE_COOKIE:
|
||||
return false;
|
||||
|
||||
case BCH_EXTENT:
|
||||
case BCH_EXTENT_CACHED:
|
||||
@ -2051,11 +2056,6 @@ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
|
||||
int ret;
|
||||
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_DELETED:
|
||||
case KEY_TYPE_DISCARD:
|
||||
case KEY_TYPE_COOKIE:
|
||||
return 0;
|
||||
|
||||
case KEY_TYPE_ERROR:
|
||||
return -EIO;
|
||||
|
||||
@ -2069,11 +2069,8 @@ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
|
||||
|
||||
return ret;
|
||||
|
||||
case BCH_RESERVATION:
|
||||
return 0;
|
||||
|
||||
default:
|
||||
BUG();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2099,7 +2096,6 @@ enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b,
|
||||
return BCH_MERGE_NOMERGE;
|
||||
|
||||
switch (l->k.type) {
|
||||
case KEY_TYPE_DELETED:
|
||||
case KEY_TYPE_DISCARD:
|
||||
case KEY_TYPE_ERROR:
|
||||
/* These types are mergeable, and no val to check */
|
||||
|
@ -2078,6 +2078,29 @@ out:
|
||||
|
||||
/* truncate: */
|
||||
|
||||
static inline int range_has_data(struct bch_fs *c,
|
||||
struct bpos start,
|
||||
struct bpos end)
|
||||
{
|
||||
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
|
||||
start, 0, k) {
|
||||
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
|
||||
break;
|
||||
|
||||
if (bkey_extent_is_data(k.k)) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return bch2_btree_iter_unlock(&iter) ?: ret;
|
||||
}
|
||||
|
||||
static int __bch2_truncate_page(struct bch_inode_info *inode,
|
||||
pgoff_t index, loff_t start, loff_t end)
|
||||
{
|
||||
@ -2099,30 +2122,16 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
|
||||
|
||||
page = find_lock_page(mapping, index);
|
||||
if (!page) {
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k = bkey_s_c_null;
|
||||
|
||||
/*
|
||||
* XXX: we're doing two index lookups when we end up reading the
|
||||
* page
|
||||
*/
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
|
||||
POS(inode->v.i_ino,
|
||||
index << PAGE_SECTOR_SHIFT), 0, k) {
|
||||
if (bkey_cmp(bkey_start_pos(k.k),
|
||||
POS(inode->v.i_ino,
|
||||
(index + 1) << PAGE_SECTOR_SHIFT)) >= 0)
|
||||
break;
|
||||
ret = range_has_data(c,
|
||||
POS(inode->v.i_ino, index << PAGE_SECTOR_SHIFT),
|
||||
POS(inode->v.i_ino, (index + 1) << PAGE_SECTOR_SHIFT));
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
if (k.k->type != KEY_TYPE_DISCARD &&
|
||||
k.k->type != BCH_RESERVATION) {
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
goto create;
|
||||
}
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
return 0;
|
||||
create:
|
||||
page = find_or_create_page(mapping, index, GFP_KERNEL);
|
||||
if (unlikely(!page)) {
|
||||
ret = -ENOMEM;
|
||||
@ -2389,9 +2398,6 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
|
||||
|
||||
bkey_reassemble(©.k, k);
|
||||
|
||||
if (bkey_deleted(©.k.k))
|
||||
copy.k.k.type = KEY_TYPE_DISCARD;
|
||||
|
||||
bch2_cut_front(src.pos, ©.k);
|
||||
copy.k.k.p.offset -= len >> 9;
|
||||
|
||||
|
@ -252,9 +252,6 @@ static int check_extents(struct bch_fs *c)
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
|
||||
POS(BCACHEFS_ROOT_INO, 0), 0, k) {
|
||||
if (k.k->type == KEY_TYPE_DISCARD)
|
||||
continue;
|
||||
|
||||
ret = walk_inode(c, &w, k.k->p.inode);
|
||||
if (ret)
|
||||
break;
|
||||
|
@ -72,7 +72,8 @@ static void journal_seq_blacklist_flush(struct journal *j,
|
||||
n = bl->entries[i];
|
||||
mutex_unlock(&j->blacklist_lock);
|
||||
|
||||
__bch2_btree_iter_init(&iter, c, n.btree_id, n.pos, 0, 0, 0);
|
||||
__bch2_btree_iter_init(&iter, c, n.btree_id, n.pos,
|
||||
0, 0, BTREE_ITER_NODES);
|
||||
|
||||
b = bch2_btree_iter_peek_node(&iter);
|
||||
|
||||
|
@ -2,11 +2,29 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_update.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "tests.h"
|
||||
|
||||
#include "linux/kthread.h"
|
||||
#include "linux/random.h"
|
||||
|
||||
static void delete_test_keys(struct bch_fs *c)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
|
||||
POS(0, 0), POS(0, U64_MAX),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
|
||||
POS(0, 0), POS(0, U64_MAX),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
/* unit tests */
|
||||
|
||||
static void test_delete(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
@ -36,6 +54,224 @@ static void test_delete(struct bch_fs *c, u64 nr)
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
}
|
||||
|
||||
static void test_delete_written(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_i_cookie k;
|
||||
int ret;
|
||||
|
||||
bkey_cookie_init(&k.k_i);
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, k.k.p,
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0,
|
||||
BTREE_INSERT_ENTRY(&iter, &k.k_i));
|
||||
BUG_ON(ret);
|
||||
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
|
||||
ret = bch2_btree_delete_at(&iter, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
}
|
||||
|
||||
static void test_iterate(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 i;
|
||||
int ret;
|
||||
|
||||
delete_test_keys(c);
|
||||
|
||||
pr_info("inserting test keys");
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
struct bkey_i_cookie k;
|
||||
|
||||
bkey_cookie_init(&k.k_i);
|
||||
k.k.p.offset = i;
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_DIRENTS, &k.k_i,
|
||||
NULL, NULL, NULL, 0);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
pr_info("iterating forwards");
|
||||
|
||||
i = 0;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), 0, k)
|
||||
BUG_ON(k.k->p.offset != i++);
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
BUG_ON(i != nr);
|
||||
|
||||
pr_info("iterating backwards");
|
||||
|
||||
while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k))
|
||||
BUG_ON(k.k->p.offset != --i);
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
BUG_ON(i);
|
||||
}
|
||||
|
||||
static void test_iterate_extents(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 i;
|
||||
int ret;
|
||||
|
||||
delete_test_keys(c);
|
||||
|
||||
pr_info("inserting test extents");
|
||||
|
||||
for (i = 0; i < nr; i += 8) {
|
||||
struct bkey_i_cookie k;
|
||||
|
||||
bkey_cookie_init(&k.k_i);
|
||||
k.k.p.offset = i + 8;
|
||||
k.k.size = 8;
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
|
||||
NULL, NULL, NULL, 0);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
pr_info("iterating forwards");
|
||||
|
||||
i = 0;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), 0, k) {
|
||||
BUG_ON(bkey_start_offset(k.k) != i);
|
||||
i = k.k->p.offset;
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
BUG_ON(i != nr);
|
||||
|
||||
pr_info("iterating backwards");
|
||||
|
||||
while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) {
|
||||
BUG_ON(k.k->p.offset != i);
|
||||
i = bkey_start_offset(k.k);
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
BUG_ON(i);
|
||||
}
|
||||
|
||||
static void test_iterate_slots(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 i;
|
||||
int ret;
|
||||
|
||||
delete_test_keys(c);
|
||||
|
||||
pr_info("inserting test keys");
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
struct bkey_i_cookie k;
|
||||
|
||||
bkey_cookie_init(&k.k_i);
|
||||
k.k.p.offset = i * 2;
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_DIRENTS, &k.k_i,
|
||||
NULL, NULL, NULL, 0);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
pr_info("iterating forwards");
|
||||
|
||||
i = 0;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), 0, k) {
|
||||
BUG_ON(k.k->p.offset != i);
|
||||
i += 2;
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
BUG_ON(i != nr * 2);
|
||||
|
||||
pr_info("iterating forwards by slots");
|
||||
|
||||
i = 0;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0),
|
||||
BTREE_ITER_SLOTS, k) {
|
||||
BUG_ON(bkey_deleted(k.k) != (i & 1));
|
||||
BUG_ON(k.k->p.offset != i++);
|
||||
|
||||
if (i == nr * 2)
|
||||
break;
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
}
|
||||
|
||||
static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 i;
|
||||
int ret;
|
||||
|
||||
delete_test_keys(c);
|
||||
|
||||
pr_info("inserting test keys");
|
||||
|
||||
for (i = 0; i < nr; i += 16) {
|
||||
struct bkey_i_cookie k;
|
||||
|
||||
bkey_cookie_init(&k.k_i);
|
||||
k.k.p.offset = i + 16;
|
||||
k.k.size = 8;
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
|
||||
NULL, NULL, NULL, 0);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
pr_info("iterating forwards");
|
||||
|
||||
i = 0;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), 0, k) {
|
||||
BUG_ON(bkey_start_offset(k.k) != i + 8);
|
||||
BUG_ON(k.k->size != 8);
|
||||
i += 16;
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
BUG_ON(i != nr);
|
||||
|
||||
pr_info("iterating forwards by slots");
|
||||
|
||||
i = 0;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0),
|
||||
BTREE_ITER_SLOTS, k) {
|
||||
BUG_ON(bkey_deleted(k.k) != !(i % 16));
|
||||
|
||||
BUG_ON(bkey_start_offset(k.k) != i);
|
||||
BUG_ON(k.k->size != 8);
|
||||
i = k.k->p.offset;
|
||||
|
||||
if (i == nr)
|
||||
break;
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
}
|
||||
|
||||
/* perf tests */
|
||||
|
||||
static u64 test_rand(void)
|
||||
{
|
||||
u64 v;
|
||||
@ -183,7 +419,7 @@ static void seq_delete(struct bch_fs *c, u64 nr)
|
||||
int ret;
|
||||
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
|
||||
POS_MIN, POS_MAX,
|
||||
POS(0, 0), POS(0, U64_MAX),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
@ -256,6 +492,11 @@ void bch2_btree_perf_test(struct bch_fs *c, const char *testname,
|
||||
|
||||
/* a unit test, not a perf test: */
|
||||
perf_test(test_delete);
|
||||
perf_test(test_delete_written);
|
||||
perf_test(test_iterate);
|
||||
perf_test(test_iterate_extents);
|
||||
perf_test(test_iterate_slots);
|
||||
perf_test(test_iterate_slots_extents);
|
||||
|
||||
if (!j.fn) {
|
||||
pr_err("unknown test %s", testname);
|
||||
|
@ -13,24 +13,8 @@
|
||||
#include <linux/posix_acl_xattr.h>
|
||||
#include <linux/xattr.h>
|
||||
|
||||
static unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
|
||||
{
|
||||
return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) +
|
||||
name_len + val_len, sizeof(u64));
|
||||
}
|
||||
|
||||
#define xattr_val(_xattr) ((_xattr)->x_name + (_xattr)->x_name_len)
|
||||
|
||||
static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned);
|
||||
|
||||
struct xattr_search_key {
|
||||
u8 type;
|
||||
struct qstr name;
|
||||
};
|
||||
|
||||
#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key) \
|
||||
{ .type = _type, .name = QSTR_INIT(_name, _len) })
|
||||
|
||||
static u64 bch2_xattr_hash(const struct bch_hash_info *info,
|
||||
const struct xattr_search_key *key)
|
||||
{
|
||||
@ -158,6 +142,17 @@ void bch2_xattr_to_text(struct bch_fs *c, char *buf,
|
||||
}
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_xattr_get_iter(struct bch_fs *c,
|
||||
struct btree_iter *iter,
|
||||
struct bch_inode_info *inode,
|
||||
const char *name, int type)
|
||||
{
|
||||
return bch2_hash_lookup(bch2_xattr_hash_desc,
|
||||
&inode->ei_str_hash,
|
||||
c, inode->v.i_ino, iter,
|
||||
&X_SEARCH(type, name, strlen(name)));
|
||||
}
|
||||
|
||||
int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
|
||||
const char *name, void *buffer, size_t size, int type)
|
||||
{
|
||||
@ -185,19 +180,15 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __bch2_xattr_set(struct bch_fs *c, u64 inum,
|
||||
const struct bch_hash_info *hash_info,
|
||||
const char *name, const void *value, size_t size,
|
||||
int flags, int type, u64 *journal_seq)
|
||||
int bch2_xattr_set(struct bch_fs *c, u64 inum,
|
||||
const struct bch_hash_info *hash_info,
|
||||
const char *name, const void *value, size_t size,
|
||||
int flags, int type, u64 *journal_seq)
|
||||
{
|
||||
struct xattr_search_key search = X_SEARCH(type, name, strlen(name));
|
||||
int ret;
|
||||
|
||||
if (!value) {
|
||||
ret = bch2_hash_delete(bch2_xattr_hash_desc, hash_info,
|
||||
c, inum,
|
||||
journal_seq, &search);
|
||||
} else {
|
||||
if (value) {
|
||||
struct bkey_i_xattr *xattr;
|
||||
unsigned u64s = BKEY_U64s +
|
||||
xattr_val_u64s(search.name.len, size);
|
||||
@ -223,6 +214,9 @@ int __bch2_xattr_set(struct bch_fs *c, u64 inum,
|
||||
(flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
|
||||
(flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
|
||||
kfree(xattr);
|
||||
} else {
|
||||
ret = bch2_hash_delete(bch2_xattr_hash_desc, hash_info,
|
||||
c, inum, journal_seq, &search);
|
||||
}
|
||||
|
||||
if (ret == -ENOENT)
|
||||
@ -231,15 +225,6 @@ int __bch2_xattr_set(struct bch_fs *c, u64 inum,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_xattr_set(struct bch_fs *c, struct bch_inode_info *inode,
|
||||
const char *name, const void *value, size_t size,
|
||||
int flags, int type)
|
||||
{
|
||||
return __bch2_xattr_set(c, inode->v.i_ino, &inode->ei_str_hash,
|
||||
name, value, size, flags, type,
|
||||
&inode->ei_journal_seq);
|
||||
}
|
||||
|
||||
static size_t bch2_xattr_emit(struct dentry *dentry,
|
||||
const struct bch_xattr *xattr,
|
||||
char *buffer, size_t buffer_size)
|
||||
@ -323,8 +308,9 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler,
|
||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
|
||||
return bch2_xattr_set(c, inode, name, value, size, flags,
|
||||
handler->flags);
|
||||
return bch2_xattr_set(c, inode->v.i_ino, &inode->ei_str_hash,
|
||||
name, value, size, flags, handler->flags,
|
||||
&inode->ei_journal_seq);
|
||||
}
|
||||
|
||||
static const struct xattr_handler bch_xattr_user_handler = {
|
||||
|
@ -13,17 +13,37 @@ void bch2_xattr_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
.val_to_text = bch2_xattr_to_text, \
|
||||
}
|
||||
|
||||
static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
|
||||
{
|
||||
return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) +
|
||||
name_len + val_len, sizeof(u64));
|
||||
}
|
||||
|
||||
#define xattr_val(_xattr) \
|
||||
((void *) (_xattr)->x_name + (_xattr)->x_name_len)
|
||||
|
||||
struct xattr_search_key {
|
||||
u8 type;
|
||||
struct qstr name;
|
||||
};
|
||||
|
||||
#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key) \
|
||||
{ .type = _type, .name = QSTR_INIT(_name, _len) })
|
||||
|
||||
struct dentry;
|
||||
struct xattr_handler;
|
||||
struct bch_hash_info;
|
||||
struct bch_inode_info;
|
||||
|
||||
struct bkey_s_c bch2_xattr_get_iter(struct bch_fs *,
|
||||
struct btree_iter *,
|
||||
struct bch_inode_info *,
|
||||
const char *, int);
|
||||
int bch2_xattr_get(struct bch_fs *, struct bch_inode_info *,
|
||||
const char *, void *, size_t, int);
|
||||
int __bch2_xattr_set(struct bch_fs *, u64, const struct bch_hash_info *,
|
||||
const char *, const void *, size_t, int, int, u64 *);
|
||||
int bch2_xattr_set(struct bch_fs *, struct bch_inode_info *,
|
||||
const char *, const void *, size_t, int, int);
|
||||
|
||||
int bch2_xattr_set(struct bch_fs *, u64, const struct bch_hash_info *,
|
||||
const char *, const void *, size_t, int, int, u64 *);
|
||||
ssize_t bch2_xattr_list(struct dentry *, char *, size_t);
|
||||
|
||||
extern const struct xattr_handler *bch2_xattr_handlers[];
|
||||
|
Loading…
Reference in New Issue
Block a user