Update bcachefs sources to eab3b355cf bcachefs: trace transaction restarts

This commit is contained in:
Kent Overstreet 2018-07-16 03:58:54 -04:00
parent 75c7148e0a
commit 76a549d82d
32 changed files with 2105 additions and 1174 deletions

View File

@ -1 +1 @@
940d6ca657ea70758f3f43323bfd531019a40d3c
eab3b355cf6fcabbf07d7a9032c68e95cab37ad0

View File

@ -239,8 +239,9 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
const struct xattr_handler *h = xattr_resolve_name(&attr);
int ret = bch2_xattr_set(c, dst->bi_inum, &hash_info, attr,
val, val_size, 0, h->flags, NULL);
int ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC,
bch2_xattr_set(&trans, dst->bi_inum, &hash_info, attr,
val, val_size, h->flags, 0));
if (ret < 0)
die("error creating xattr: %s", strerror(-ret));
}

View File

@ -132,7 +132,8 @@ invalid:
* Convert from in-memory to filesystem representation.
*/
static struct bkey_i_xattr *
bch2_acl_to_xattr(const struct posix_acl *acl,
bch2_acl_to_xattr(struct btree_trans *trans,
const struct posix_acl *acl,
int type)
{
struct bkey_i_xattr *xattr;
@ -164,7 +165,7 @@ bch2_acl_to_xattr(const struct posix_acl *acl,
if (u64s > U8_MAX)
return ERR_PTR(-E2BIG);
xattr = kmalloc(u64s * sizeof(u64), GFP_KERNEL);
xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
if (IS_ERR(xattr))
return xattr;
@ -214,20 +215,29 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
{
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c_xattr xattr;
struct bkey_s_c k;
struct posix_acl *acl = NULL;
int name_index = acl_to_xattr_type(type);
k = bch2_xattr_get_iter(c, &iter, inode, "", name_index);
if (IS_ERR(k.k)) {
if (PTR_ERR(k.k) != -ENOENT)
acl = ERR_CAST(k.k);
bch2_trans_init(&trans, c);
retry:
bch2_trans_begin(&trans);
iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
&inode->ei_str_hash, inode->v.i_ino,
&X_SEARCH(acl_to_xattr_type(type), "", 0),
0);
if (IS_ERR(iter)) {
if (PTR_ERR(iter) == -EINTR)
goto retry;
if (PTR_ERR(iter) != -ENOENT)
acl = ERR_CAST(iter);
goto out;
}
xattr = bkey_s_c_to_xattr(k);
xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
acl = bch2_acl_from_disk(xattr_val(xattr.v),
le16_to_cpu(xattr.v->x_val_len));
@ -235,49 +245,59 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
if (!IS_ERR(acl))
set_cached_acl(&inode->v, type, acl);
out:
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
return acl;
}
int __bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
int bch2_set_acl_trans(struct btree_trans *trans,
struct bch_inode_unpacked *inode_u,
const struct bch_hash_info *hash_info,
struct posix_acl *acl, int type)
{
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
int ret;
if (type == ACL_TYPE_DEFAULT &&
!S_ISDIR(inode->v.i_mode))
!S_ISDIR(inode_u->bi_mode))
return acl ? -EACCES : 0;
if (acl) {
struct bkey_i_xattr *xattr =
bch2_acl_to_xattr(acl, type);
bch2_acl_to_xattr(trans, acl, type);
if (IS_ERR(xattr))
return PTR_ERR(xattr);
ret = bch2_hash_set(bch2_xattr_hash_desc, &inode->ei_str_hash,
c, inode->v.i_ino, &inode->ei_journal_seq,
&xattr->k_i, 0);
kfree(xattr);
ret = __bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
inode_u->bi_inum, &xattr->k_i, 0);
} else {
struct xattr_search_key search =
X_SEARCH(acl_to_xattr_type(type), "", 0);
ret = bch2_hash_delete(bch2_xattr_hash_desc, &inode->ei_str_hash,
c, inode->v.i_ino, &inode->ei_journal_seq,
&search);
ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, hash_info,
inode_u->bi_inum, &search);
}
if (!ret)
set_cached_acl(&inode->v, type, acl);
return ret == -ENOENT ? 0 : ret;
}
return ret;
static int inode_update_for_set_acl_fn(struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
void *p)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct timespec now = current_time(&inode->v);
umode_t mode = (unsigned long) p;
bi->bi_ctime = timespec_to_bch2_time(c, now);
bi->bi_mode = mode;
return 0;
}
int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
{
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_trans trans;
struct bch_inode_unpacked inode_u;
umode_t mode = inode->v.i_mode;
int ret;
@ -287,20 +307,77 @@ int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
return ret;
}
ret = __bch2_set_acl(vinode, acl, type);
if (ret)
return ret;
bch2_trans_init(&trans, c);
retry:
bch2_trans_begin(&trans);
if (mode != inode->v.i_mode) {
mutex_lock(&inode->ei_update_lock);
inode->v.i_mode = mode;
inode->v.i_ctime = current_time(&inode->v);
ret = bch2_set_acl_trans(&trans,
&inode->ei_inode,
&inode->ei_str_hash,
acl, type) ?:
bch2_write_inode_trans(&trans, inode, &inode_u,
inode_update_for_set_acl_fn,
(void *)(unsigned long) mode) ?:
bch2_trans_commit(&trans, NULL, NULL,
&inode->ei_journal_seq,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOUNLOCK);
if (ret == -EINTR)
goto retry;
if (unlikely(ret))
goto err;
ret = bch2_write_inode(c, inode);
mutex_unlock(&inode->ei_update_lock);
}
bch2_inode_update_after_write(c, inode, &inode_u,
ATTR_CTIME|ATTR_MODE);
set_cached_acl(&inode->v, type, acl);
err:
bch2_trans_exit(&trans);
return ret;
}
int bch2_acl_chmod(struct btree_trans *trans,
struct bch_inode_info *inode,
umode_t mode,
struct posix_acl **new_acl)
{
struct btree_iter *iter;
struct bkey_s_c_xattr xattr;
struct bkey_i_xattr *new;
struct posix_acl *acl;
int ret = 0;
iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc,
&inode->ei_str_hash, inode->v.i_ino,
&X_SEARCH(BCH_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0),
BTREE_ITER_INTENT);
if (IS_ERR(iter))
return PTR_ERR(iter) != -ENOENT ? PTR_ERR(iter) : 0;
xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
acl = bch2_acl_from_disk(xattr_val(xattr.v),
le16_to_cpu(xattr.v->x_val_len));
if (IS_ERR_OR_NULL(acl))
return PTR_ERR(acl);
ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
if (ret)
goto err;
new = bch2_acl_to_xattr(trans, acl, ACL_TYPE_ACCESS);
if (IS_ERR(new)) {
ret = PTR_ERR(new);
goto err;
}
bch2_trans_update(trans, iter, &new->k_i, 0);
*new_acl = acl;
acl = NULL;
err:
kfree(acl);
return ret;
}
#endif /* CONFIG_BCACHEFS_POSIX_ACL */

View File

@ -1,6 +1,11 @@
#ifndef _BCACHEFS_ACL_H
#define _BCACHEFS_ACL_H
struct bch_inode_unpacked;
struct bch_hash_info;
struct bch_inode_info;
struct posix_acl;
#ifdef CONFIG_BCACHEFS_POSIX_ACL
#define BCH_ACL_VERSION 0x0001
@ -20,20 +25,30 @@ typedef struct {
__le32 a_version;
} bch_acl_header;
struct posix_acl;
struct posix_acl *bch2_get_acl(struct inode *, int);
extern struct posix_acl *bch2_get_acl(struct inode *, int);
extern int __bch2_set_acl(struct inode *, struct posix_acl *, int);
extern int bch2_set_acl(struct inode *, struct posix_acl *, int);
int bch2_set_acl_trans(struct btree_trans *,
struct bch_inode_unpacked *,
const struct bch_hash_info *,
struct posix_acl *, int);
int bch2_set_acl(struct inode *, struct posix_acl *, int);
int bch2_acl_chmod(struct btree_trans *, struct bch_inode_info *,
umode_t, struct posix_acl **);
#else
static inline int __bch2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
static inline int bch2_set_acl_trans(struct btree_trans *trans,
struct bch_inode_unpacked *inode_u,
const struct bch_hash_info *hash_info,
struct posix_acl *acl, int type)
{
return 0;
}
static inline int bch2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
static inline int bch2_acl_chmod(struct btree_trans *trans,
struct bch_inode_info *inode,
umode_t mode,
struct posix_acl **new_acl)
{
return 0;
}

View File

@ -262,7 +262,11 @@ do { \
BCH_DEBUG_PARAM(journal_seq_verify, \
"Store the journal sequence number in the version " \
"number of every btree key, and verify that btree " \
"update ordering is preserved during recovery")
"update ordering is preserved during recovery") \
BCH_DEBUG_PARAM(inject_invalid_keys, \
"Store the journal sequence number in the version " \
"number of every btree key, and verify that btree " \
"update ordering is preserved during recovery") \
#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG()
@ -465,6 +469,7 @@ enum {
/* misc: */
BCH_FS_BDEV_MOUNTED,
BCH_FS_FSCK_FIXED_ERRORS,
BCH_FS_FSCK_UNFIXED_ERRORS,
BCH_FS_FIXED_GENS,
BCH_FS_REBUILD_REPLICAS,
BCH_FS_HOLD_BTREE_WRITES,

View File

@ -722,9 +722,7 @@ enum {
__BCH_INODE_I_SIZE_DIRTY= 5,
__BCH_INODE_I_SECTORS_DIRTY= 6,
/* not implemented yet: */
__BCH_INODE_HAS_XATTRS = 7, /* has xattrs in xattr btree */
__BCH_INODE_UNLINKED = 7,
/* bits 20+ reserved for packed fields below: */
};
@ -736,7 +734,7 @@ enum {
#define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME)
#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
#define BCH_INODE_HAS_XATTRS (1 << __BCH_INODE_HAS_XATTRS)
#define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED)
LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 32);
@ -1222,6 +1220,7 @@ enum bch_sb_features {
BCH_FEATURE_LZ4 = 0,
BCH_FEATURE_GZIP = 1,
BCH_FEATURE_ZSTD = 2,
BCH_FEATURE_ATOMIC_NLINK = 3,
};
/* options: */

View File

@ -206,14 +206,12 @@ void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
static __always_inline int bversion_cmp(struct bversion l, struct bversion r)
{
if (l.hi != r.hi)
return l.hi < r.hi ? -1 : 1;
if (l.lo != r.lo)
return l.lo < r.lo ? -1 : 1;
return 0;
return (l.hi > r.hi) - (l.hi < r.hi) ?:
(l.lo > r.lo) - (l.lo < r.lo);
}
#define ZERO_VERSION ((struct bversion) { .hi = 0, .lo = 0 })
#define MAX_VERSION ((struct bversion) { .hi = ~0, .lo = ~0ULL })
static __always_inline int bversion_zero(struct bversion v)
{

View File

@ -1449,7 +1449,7 @@ static struct bkey_packed *bch2_bset_search(struct btree *b,
!btree_iter_pos_cmp_packed(b, &search, m, strictly_greater))
m = bkey_next(m);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
if (btree_keys_expensive_checks(b)) {
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
BUG_ON(prev &&

View File

@ -730,6 +730,7 @@ retry:
if (bch2_btree_node_relock(iter, level + 1))
goto retry;
trans_restart();
return ERR_PTR(-EINTR);
}
}

View File

@ -1298,7 +1298,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
struct bkey_s_c u = bkey_disassemble(b, k, &tmp);
const char *invalid = bch2_bkey_val_invalid(c, type, u);
if (invalid) {
if (invalid ||
(inject_invalid_keys(c) &&
!bversion_cmp(u.k->version, MAX_VERSION))) {
char buf[160];
bch2_bkey_val_to_text(c, type, buf, sizeof(buf), u);
@ -1310,6 +1312,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
memmove_u64s_down(k, bkey_next(k),
(u64 *) vstruct_end(i) - (u64 *) k);
set_btree_bset_end(b, b->set);
continue;
}

View File

@ -262,6 +262,9 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
if (ret)
__btree_node_lock_type(c, b, type);
else
trans_restart();
return ret;
}
@ -1555,6 +1558,7 @@ void bch2_btree_iter_unlink(struct btree_iter *iter)
for_each_linked_btree_iter(iter, linked)
if (linked->next == iter) {
linked->next = iter->next;
iter->next = iter;
return;
}
@ -1571,8 +1575,9 @@ void bch2_btree_iter_link(struct btree_iter *iter, struct btree_iter *new)
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
unsigned nr_iters = 0;
for_each_btree_iter(iter, new)
nr_iters++;
for_each_btree_iter(new, iter)
if (iter->btree_id == new->btree_id)
nr_iters++;
BUG_ON(nr_iters > SIX_LOCK_MAX_RECURSE);
}
@ -1580,8 +1585,278 @@ void bch2_btree_iter_link(struct btree_iter *iter, struct btree_iter *new)
void bch2_btree_iter_copy(struct btree_iter *dst, struct btree_iter *src)
{
unsigned i;
__bch2_btree_iter_unlock(dst);
memcpy(dst, src, offsetof(struct btree_iter, next));
dst->nodes_locked = dst->nodes_intent_locked = 0;
dst->uptodate = BTREE_ITER_NEED_RELOCK;
for (i = 0; i < BTREE_MAX_DEPTH; i++)
if (btree_node_locked(dst, i))
six_lock_increment(&dst->l[i].b->lock,
__btree_lock_want(dst, i));
}
/* new transactional stuff: */
static void btree_trans_verify(struct btree_trans *trans)
{
unsigned i;
for (i = 0; i < trans->nr_iters; i++) {
struct btree_iter *iter = &trans->iters[i];
BUG_ON(btree_iter_linked(iter) !=
((trans->iters_linked & (1 << i)) &&
!is_power_of_2(trans->iters_linked)));
}
}
void bch2_trans_iter_free(struct btree_trans *trans,
struct btree_iter *iter)
{
unsigned idx;
for (idx = 0; idx < trans->nr_iters; idx++)
if (&trans->iters[idx] == iter)
goto found;
BUG();
found:
BUG_ON(!(trans->iters_linked & (1U << idx)));
trans->iters_live &= ~(1U << idx);
trans->iters_linked &= ~(1U << idx);
bch2_btree_iter_unlink(iter);
}
static int btree_trans_realloc_iters(struct btree_trans *trans)
{
struct btree_iter *new_iters;
unsigned i;
bch2_trans_unlock(trans);
new_iters = kmalloc(sizeof(struct btree_iter) * BTREE_ITER_MAX,
GFP_NOFS);
if (!new_iters)
return -ENOMEM;
memcpy(new_iters, trans->iters,
sizeof(struct btree_iter) * trans->nr_iters);
trans->iters = new_iters;
for (i = 0; i < trans->nr_iters; i++)
trans->iters[i].next = &trans->iters[i];
if (trans->iters_linked) {
unsigned first_linked = __ffs(trans->iters_linked);
for (i = first_linked + 1; i < trans->nr_iters; i++)
if (trans->iters_linked & (1 << i))
bch2_btree_iter_link(&trans->iters[first_linked],
&trans->iters[i]);
}
btree_trans_verify(trans);
if (trans->iters_live) {
trans_restart();
return -EINTR;
}
return 0;
}
int bch2_trans_preload_iters(struct btree_trans *trans)
{
if (trans->iters != trans->iters_onstack)
return 0;
return btree_trans_realloc_iters(trans);
}
static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
unsigned btree_id,
unsigned flags, u64 iter_id)
{
struct btree_iter *iter;
int idx;
BUG_ON(trans->nr_iters > BTREE_ITER_MAX);
for (idx = 0; idx < trans->nr_iters; idx++)
if (trans->iter_ids[idx] == iter_id)
goto found;
idx = -1;
found:
if (idx < 0) {
idx = ffz(trans->iters_linked);
if (idx < trans->nr_iters)
goto got_slot;
BUG_ON(trans->nr_iters == BTREE_ITER_MAX);
if (trans->iters == trans->iters_onstack &&
trans->nr_iters == ARRAY_SIZE(trans->iters_onstack)) {
int ret = btree_trans_realloc_iters(trans);
if (ret)
return ERR_PTR(ret);
}
idx = trans->nr_iters++;
got_slot:
trans->iter_ids[idx] = iter_id;
iter = &trans->iters[idx];
bch2_btree_iter_init(iter, trans->c, btree_id, POS_MIN, flags);
} else {
iter = &trans->iters[idx];
BUG_ON(iter->btree_id != btree_id);
BUG_ON((iter->flags ^ flags) &
(BTREE_ITER_SLOTS|BTREE_ITER_IS_EXTENTS));
iter->flags &= ~(BTREE_ITER_INTENT|BTREE_ITER_PREFETCH);
iter->flags |= flags & (BTREE_ITER_INTENT|BTREE_ITER_PREFETCH);
}
BUG_ON(trans->iters_live & (1 << idx));
trans->iters_live |= 1 << idx;
if (trans->iters_linked &&
!(trans->iters_linked & (1 << idx)))
bch2_btree_iter_link(&trans->iters[__ffs(trans->iters_linked)],
iter);
trans->iters_linked |= 1 << idx;
btree_trans_verify(trans);
return iter;
}
struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
enum btree_id btree_id,
struct bpos pos, unsigned flags,
u64 iter_id)
{
struct btree_iter *iter =
__btree_trans_get_iter(trans, btree_id, flags, iter_id);
if (!IS_ERR(iter))
bch2_btree_iter_set_pos(iter, pos);
return iter;
}
struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans,
struct btree_iter *src,
u64 iter_id)
{
struct btree_iter *iter =
__btree_trans_get_iter(trans, src->btree_id,
src->flags, iter_id);
if (!IS_ERR(iter))
bch2_btree_iter_copy(iter, src);
return iter;
}
void *bch2_trans_kmalloc(struct btree_trans *trans,
size_t size)
{
void *ret;
if (trans->mem_top + size > trans->mem_bytes) {
size_t old_bytes = trans->mem_bytes;
size_t new_bytes = roundup_pow_of_two(trans->mem_top + size);
void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
if (!new_mem)
return ERR_PTR(-ENOMEM);
trans->mem = new_mem;
trans->mem_bytes = new_bytes;
if (old_bytes) {
trans_restart();
return ERR_PTR(-EINTR);
}
}
ret = trans->mem + trans->mem_top;
trans->mem_top += size;
return ret;
}
int bch2_trans_unlock(struct btree_trans *trans)
{
unsigned iters = trans->iters_linked;
int ret = 0;
while (iters) {
unsigned idx = __ffs(iters);
struct btree_iter *iter = &trans->iters[idx];
if (iter->flags & BTREE_ITER_ERROR)
ret = -EIO;
__bch2_btree_iter_unlock(iter);
iters ^= 1 << idx;
}
return ret;
}
void __bch2_trans_begin(struct btree_trans *trans)
{
unsigned idx;
btree_trans_verify(trans);
/*
* On transaction restart, the transaction isn't required to allocate
* all the same iterators it on the last iteration:
*
* Unlink any iterators it didn't use this iteration, assuming it got
* further (allocated an iter with a higher idx) than where the iter
* was originally allocated:
*/
while (trans->iters_linked &&
trans->iters_live &&
(idx = __fls(trans->iters_linked)) >
__fls(trans->iters_live)) {
trans->iters_linked ^= 1 << idx;
bch2_btree_iter_unlink(&trans->iters[idx]);
}
trans->iters_live = 0;
trans->nr_updates = 0;
trans->mem_top = 0;
btree_trans_verify(trans);
}
void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c)
{
trans->c = c;
trans->nr_restarts = 0;
trans->nr_iters = 0;
trans->iters_live = 0;
trans->iters_linked = 0;
trans->nr_updates = 0;
trans->mem_top = 0;
trans->mem_bytes = 0;
trans->mem = NULL;
trans->iters = trans->iters_onstack;
}
int bch2_trans_exit(struct btree_trans *trans)
{
int ret = bch2_trans_unlock(trans);
kfree(trans->mem);
if (trans->iters != trans->iters_onstack)
kfree(trans->iters);
trans->mem = (void *) 0x1;
trans->iters = (void *) 0x1;
return ret;
}

View File

@ -269,4 +269,68 @@ static inline int btree_iter_err(struct bkey_s_c k)
return PTR_ERR_OR_ZERO(k.k);
}
/* new multiple iterator interface: */
int bch2_trans_preload_iters(struct btree_trans *);
void bch2_trans_iter_free(struct btree_trans *,
struct btree_iter *);
struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id,
struct bpos, unsigned, u64);
struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *,
struct btree_iter *, u64);
static __always_inline u64 __btree_iter_id(void)
{
u64 ret = 0;
ret <<= 32;
ret |= _RET_IP_ & U32_MAX;
ret <<= 32;
ret |= _THIS_IP_ & U32_MAX;
return ret;
}
static __always_inline struct btree_iter *
bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id,
struct bpos pos, unsigned flags)
{
return __bch2_trans_get_iter(trans, btree_id, pos, flags,
__btree_iter_id());
}
static __always_inline struct btree_iter *
bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src)
{
return __bch2_trans_copy_iter(trans, src, __btree_iter_id());
}
void __bch2_trans_begin(struct btree_trans *);
void *bch2_trans_kmalloc(struct btree_trans *, size_t);
int bch2_trans_unlock(struct btree_trans *);
void bch2_trans_init(struct btree_trans *, struct bch_fs *);
int bch2_trans_exit(struct btree_trans *);
#ifdef TRACE_TRANSACTION_RESTARTS
#define bch2_trans_begin(_trans) \
do { \
if (is_power_of_2((_trans)->nr_restarts) && \
(_trans)->nr_restarts >= 8) \
pr_info("nr restarts: %zu", (_trans)->nr_restarts); \
\
(_trans)->nr_restarts++; \
__bch2_trans_begin(_trans); \
} while (0)
#else
#define bch2_trans_begin(_trans) __bch2_trans_begin(_trans)
#endif
#ifdef TRACE_TRANSACTION_RESTARTS_ALL
#define trans_restart(...) pr_info("transaction restart" __VA_ARGS__)
#else
#define trans_restart(...) no_printk("transaction restart" __VA_ARGS__)
#endif
#endif /* _BCACHEFS_BTREE_ITER_H */

View File

@ -253,6 +253,40 @@ struct btree_iter {
struct btree_iter *next;
};
#define BTREE_ITER_MAX 8
struct btree_insert_entry {
struct btree_iter *iter;
struct bkey_i *k;
unsigned extra_res;
/*
* true if entire key was inserted - can only be false for
* extents
*/
bool done;
};
struct btree_trans {
struct bch_fs *c;
size_t nr_restarts;
u8 nr_iters;
u8 iters_live;
u8 iters_linked;
u8 nr_updates;
unsigned mem_top;
unsigned mem_bytes;
void *mem;
struct btree_iter *iters;
u64 iter_ids[BTREE_ITER_MAX];
struct btree_insert_entry updates[BTREE_ITER_MAX];
struct btree_iter iters_onstack[2];
};
#define BTREE_FLAG(flag) \
static inline bool btree_node_ ## flag(struct btree *b) \
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \

View File

@ -27,16 +27,7 @@ struct btree_insert {
bool did_work;
unsigned short nr;
struct btree_insert_entry {
struct btree_iter *iter;
struct bkey_i *k;
unsigned extra_res;
/*
* true if entire key was inserted - can only be false for
* extents
*/
bool done;
} *entries;
struct btree_insert_entry *entries;
};
int __bch2_btree_insert_at(struct btree_insert *);
@ -149,4 +140,31 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
struct btree *, struct bkey_i_extent *);
/* new transactional interface: */
void bch2_trans_update(struct btree_trans *, struct btree_iter *,
struct bkey_i *, unsigned);
int bch2_trans_commit(struct btree_trans *,
struct disk_reservation *,
struct extent_insert_hook *,
u64 *, unsigned);
#define bch2_trans_do(_c, _journal_seq, _flags, _do) \
({ \
struct btree_trans trans; \
int _ret; \
\
bch2_trans_init(&trans, (_c)); \
\
do { \
bch2_trans_begin(&trans); \
\
_ret = (_do) ?: bch2_trans_commit(&trans, NULL, NULL, \
(_journal_seq), (_flags)); \
} while (_ret == -EINTR); \
\
bch2_trans_exit(&trans); \
_ret; \
})
#endif /* _BCACHEFS_BTREE_UPDATE_H */

View File

@ -309,8 +309,10 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
unsigned u64s;
int ret;
trans_for_each_entry(trans, i)
trans_for_each_entry(trans, i) {
BUG_ON(i->done);
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
}
u64s = 0;
trans_for_each_entry(trans, i)
@ -330,6 +332,7 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
if (race_fault()) {
ret = -EINTR;
trans_restart(" (race)");
goto out;
}
@ -354,10 +357,14 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
}
}
if (journal_seq_verify(c) &&
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
trans_for_each_entry(trans, i)
i->k->k.version.lo = trans->journal_res.seq;
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
if (journal_seq_verify(c))
trans_for_each_entry(trans, i)
i->k->k.version.lo = trans->journal_res.seq;
else if (inject_invalid_keys(c))
trans_for_each_entry(trans, i)
i->k->k.version = MAX_VERSION;
}
trans_for_each_entry(trans, i) {
switch (btree_insert_key_leaf(trans, i)) {
@ -398,6 +405,17 @@ out:
return ret;
}
static inline void btree_insert_entry_checks(struct bch_fs *c,
struct btree_insert_entry *i)
{
BUG_ON(i->iter->level);
BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
BUG_ON(debug_check_bkeys(c) &&
!bkey_deleted(&i->k->k) &&
bch2_bkey_invalid(c, i->iter->btree_id,
bkey_i_to_s_c(i->k)));
}
/**
* __bch_btree_insert_at - insert keys at given iterator positions
*
@ -418,20 +436,16 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
unsigned flags;
int ret;
BUG_ON(!trans->nr);
for_each_btree_iter(trans->entries[0].iter, linked)
bch2_btree_iter_verify_locks(linked);
/* for the sake of sanity: */
BUG_ON(trans->nr > 1 && !(trans->flags & BTREE_INSERT_ATOMIC));
trans_for_each_entry(trans, i) {
BUG_ON(i->iter->level);
BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
BUG_ON(debug_check_bkeys(c) &&
!bkey_deleted(&i->k->k) &&
bch2_bkey_invalid(c, i->iter->btree_id,
bkey_i_to_s_c(i->k)));
}
trans_for_each_entry(trans, i)
btree_insert_entry_checks(c, i);
bubble_sort(trans->entries, trans->nr, btree_trans_cmp);
@ -442,7 +456,12 @@ retry:
cycle_gc_lock = false;
trans_for_each_entry(trans, i) {
unsigned old_locks_want = i->iter->locks_want;
unsigned old_uptodate = i->iter->uptodate;
if (!bch2_btree_iter_upgrade(i->iter, 1, true)) {
trans_restart(" (failed upgrade, locks_want %u uptodate %u)",
old_locks_want, old_uptodate);
ret = -EINTR;
goto err;
}
@ -515,8 +534,10 @@ err:
* don't care if we got ENOSPC because we told split it
* couldn't block:
*/
if (!ret || (flags & BTREE_INSERT_NOUNLOCK))
if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) {
trans_restart(" (split)");
ret = -EINTR;
}
}
if (cycle_gc_lock) {
@ -531,13 +552,16 @@ err:
}
if (ret == -EINTR) {
if (flags & BTREE_INSERT_NOUNLOCK)
if (flags & BTREE_INSERT_NOUNLOCK) {
trans_restart(" (can't unlock)");
goto out;
}
trans_for_each_entry(trans, i) {
int ret2 = bch2_btree_iter_traverse(i->iter);
if (ret2) {
ret = ret2;
trans_restart(" (traverse)");
goto out;
}
@ -550,11 +574,56 @@ err:
*/
if (!(flags & BTREE_INSERT_ATOMIC))
goto retry;
trans_restart(" (atomic)");
}
goto out;
}
void bch2_trans_update(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *k,
unsigned extra_journal_res)
{
struct btree_insert_entry *i;
BUG_ON(trans->nr_updates >= ARRAY_SIZE(trans->updates));
i = &trans->updates[trans->nr_updates++];
*i = (struct btree_insert_entry) {
.iter = iter,
.k = k,
.extra_res = extra_journal_res,
};
btree_insert_entry_checks(trans->c, i);
}
int bch2_trans_commit(struct btree_trans *trans,
struct disk_reservation *disk_res,
struct extent_insert_hook *hook,
u64 *journal_seq,
unsigned flags)
{
struct btree_insert insert = {
.c = trans->c,
.disk_res = disk_res,
.journal_seq = journal_seq,
.flags = flags,
.nr = trans->nr_updates,
.entries = trans->updates,
};
if (!trans->nr_updates)
return 0;
trans->nr_updates = 0;
return __bch2_btree_insert_at(&insert);
}
int bch2_btree_delete_at(struct btree_iter *iter, unsigned flags)
{
struct bkey_i k;

View File

@ -141,8 +141,8 @@ void bch2_dirent_to_text(struct bch_fs *c, char *buf,
}
}
static struct bkey_i_dirent *dirent_create_key(u8 type,
const struct qstr *name, u64 dst)
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
u8 type, const struct qstr *name, u64 dst)
{
struct bkey_i_dirent *dirent;
unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
@ -152,9 +152,9 @@ static struct bkey_i_dirent *dirent_create_key(u8 type,
BUG_ON(u64s > U8_MAX);
dirent = kmalloc(u64s * sizeof(u64), GFP_NOFS);
if (!dirent)
return ERR_PTR(-ENOMEM);
dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
if (IS_ERR(dirent))
return dirent;
bkey_dirent_init(&dirent->k_i);
dirent->k.u64s = u64s;
@ -172,23 +172,31 @@ static struct bkey_i_dirent *dirent_create_key(u8 type,
return dirent;
}
int __bch2_dirent_create(struct btree_trans *trans,
u64 dir_inum, const struct bch_hash_info *hash_info,
u8 type, const struct qstr *name, u64 dst_inum,
int flags)
{
struct bkey_i_dirent *dirent;
int ret;
dirent = dirent_create_key(trans, type, name, dst_inum);
ret = PTR_ERR_OR_ZERO(dirent);
if (ret)
return ret;
return __bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
dir_inum, &dirent->k_i, flags);
}
int bch2_dirent_create(struct bch_fs *c, u64 dir_inum,
const struct bch_hash_info *hash_info,
u8 type, const struct qstr *name, u64 dst_inum,
u64 *journal_seq, int flags)
{
struct bkey_i_dirent *dirent;
int ret;
dirent = dirent_create_key(type, name, dst_inum);
if (IS_ERR(dirent))
return PTR_ERR(dirent);
ret = bch2_hash_set(bch2_dirent_hash_desc, hash_info, c, dir_inum,
journal_seq, &dirent->k_i, flags);
kfree(dirent);
return ret;
return bch2_trans_do(c, journal_seq, flags,
__bch2_dirent_create(&trans, dir_inum, hash_info,
type, name, dst_inum, flags));
}
static void dirent_copy_target(struct bkey_i_dirent *dst,
@ -204,151 +212,117 @@ static struct bpos bch2_dirent_pos(struct bch_inode_info *inode,
return POS(inode->v.i_ino, bch2_dirent_hash(&inode->ei_str_hash, name));
}
int bch2_dirent_rename(struct bch_fs *c,
int bch2_dirent_rename(struct btree_trans *trans,
struct bch_inode_info *src_dir, const struct qstr *src_name,
struct bch_inode_info *dst_dir, const struct qstr *dst_name,
u64 *journal_seq, enum bch_rename_mode mode)
enum bch_rename_mode mode)
{
struct btree_iter src_iter, dst_iter, whiteout_iter;
struct btree_iter *src_iter, *dst_iter;
struct bkey_s_c old_src, old_dst;
struct bkey delete;
struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
struct bpos src_pos = bch2_dirent_pos(src_dir, src_name);
struct bpos dst_pos = bch2_dirent_pos(dst_dir, dst_name);
bool need_whiteout;
int ret;
bch2_btree_iter_init(&src_iter, c, BTREE_ID_DIRENTS, src_pos,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
bch2_btree_iter_init(&dst_iter, c, BTREE_ID_DIRENTS, dst_pos,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
bch2_btree_iter_link(&src_iter, &dst_iter);
bch2_btree_iter_init(&whiteout_iter, c, BTREE_ID_DIRENTS, src_pos,
BTREE_ITER_SLOTS);
bch2_btree_iter_link(&src_iter, &whiteout_iter);
if (mode == BCH_RENAME_EXCHANGE) {
new_src = dirent_create_key(0, src_name, 0);
if (IS_ERR(new_src)) {
ret = PTR_ERR(new_src);
goto err;
}
} else {
new_src = (void *) &delete;
}
new_dst = dirent_create_key(0, dst_name, 0);
if (IS_ERR(new_dst)) {
ret = PTR_ERR(new_dst);
goto err;
}
retry:
/*
* Note that on -EINTR/dropped locks we're not restarting the lookup
* from the original hashed position (like we do when creating dirents,
* in bch_hash_set) - we never move existing dirents to different slot:
*/
old_src = bch2_hash_lookup_at(bch2_dirent_hash_desc,
&src_dir->ei_str_hash,
&src_iter, src_name);
if ((ret = btree_iter_err(old_src)))
goto err;
ret = bch2_hash_needs_whiteout(bch2_dirent_hash_desc,
&src_dir->ei_str_hash,
&whiteout_iter, &src_iter);
if (ret < 0)
goto err;
need_whiteout = ret;
/*
* Lookup dst:
*
* Note that in BCH_RENAME mode, we're _not_ checking if
* the target already exists - we're relying on the VFS
* to do that check for us for correctness:
*/
old_dst = mode == BCH_RENAME
? bch2_hash_hole_at(bch2_dirent_hash_desc, &dst_iter)
: bch2_hash_lookup_at(bch2_dirent_hash_desc,
&dst_dir->ei_str_hash,
&dst_iter, dst_name);
if ((ret = btree_iter_err(old_dst)))
goto err;
dst_iter = mode == BCH_RENAME
? bch2_hash_hole(trans, bch2_dirent_hash_desc,
&dst_dir->ei_str_hash,
dst_dir->v.i_ino, dst_name)
: bch2_hash_lookup(trans, bch2_dirent_hash_desc,
&dst_dir->ei_str_hash,
dst_dir->v.i_ino, dst_name,
BTREE_ITER_INTENT);
if (IS_ERR(dst_iter))
return PTR_ERR(dst_iter);
old_dst = bch2_btree_iter_peek_slot(dst_iter);
switch (mode) {
case BCH_RENAME:
bkey_init(&new_src->k);
dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
/* Lookup src: */
src_iter = bch2_hash_lookup(trans, bch2_dirent_hash_desc,
&src_dir->ei_str_hash,
src_dir->v.i_ino, src_name,
BTREE_ITER_INTENT);
if (IS_ERR(src_iter))
return PTR_ERR(src_iter);
old_src = bch2_btree_iter_peek_slot(src_iter);
if (bkey_cmp(dst_pos, src_iter.pos) <= 0 &&
bkey_cmp(src_iter.pos, dst_iter.pos) < 0) {
/*
* If we couldn't insert new_dst at its hashed
* position (dst_pos) due to a hash collision,
* and we're going to be deleting in
* between the hashed position and first empty
* slot we found - just overwrite the pos we
* were going to delete:
*
* Note: this is a correctness issue, in this
* situation bch2_hash_needs_whiteout() could
* return false when the whiteout would have
* been needed if we inserted at the pos
* __dirent_find_hole() found
*/
new_dst->k.p = src_iter.pos;
ret = bch2_btree_insert_at(c, NULL, NULL,
journal_seq,
BTREE_INSERT_ATOMIC,
BTREE_INSERT_ENTRY(&src_iter,
&new_dst->k_i));
goto err;
}
/* Create new dst key: */
new_dst = dirent_create_key(trans, 0, dst_name, 0);
if (IS_ERR(new_dst))
return PTR_ERR(new_dst);
if (need_whiteout)
new_src->k.type = BCH_DIRENT_WHITEOUT;
break;
case BCH_RENAME_OVERWRITE:
bkey_init(&new_src->k);
dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
new_dst->k.p = dst_iter->pos;
/* Create new src key: */
if (mode == BCH_RENAME_EXCHANGE) {
new_src = dirent_create_key(trans, 0, src_name, 0);
if (IS_ERR(new_src))
return PTR_ERR(new_src);
if (bkey_cmp(dst_pos, src_iter.pos) <= 0 &&
bkey_cmp(src_iter.pos, dst_iter.pos) < 0) {
/*
* Same case described above -
* bch_hash_needs_whiteout could spuriously
* return false, but we have to insert at
* dst_iter.pos because we're overwriting
* another dirent:
*/
new_src->k.type = BCH_DIRENT_WHITEOUT;
} else if (need_whiteout)
new_src->k.type = BCH_DIRENT_WHITEOUT;
break;
case BCH_RENAME_EXCHANGE:
dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst));
dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
break;
new_src->k.p = src_iter->pos;
} else {
new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
if (IS_ERR(new_src))
return PTR_ERR(new_src);
bkey_init(&new_src->k);
new_src->k.p = src_iter->pos;
if (bkey_cmp(dst_pos, src_iter->pos) <= 0 &&
bkey_cmp(src_iter->pos, dst_iter->pos) < 0) {
/*
* We have a hash collision for the new dst key,
* and new_src - the key we're deleting - is between
* new_dst's hashed slot and the slot we're going to be
* inserting it into - oops. This will break the hash
* table if we don't deal with it:
*/
if (mode == BCH_RENAME) {
/*
* If we're not overwriting, we can just insert
* new_dst at the src position:
*/
new_dst->k.p = src_iter->pos;
bch2_trans_update(trans, src_iter, &new_dst->k_i, 0);
return 0;
} else {
/* If we're overwriting, we can't insert new_dst
* at a different slot because it has to
* overwrite old_dst - just make sure to use a
* whiteout when deleting src:
*/
new_src->k.type = BCH_DIRENT_WHITEOUT;
}
} else {
/* Check if we need a whiteout to delete src: */
ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc,
&src_dir->ei_str_hash,
src_iter);
if (ret < 0)
return ret;
if (ret)
new_src->k.type = BCH_DIRENT_WHITEOUT;
}
}
new_src->k.p = src_iter.pos;
new_dst->k.p = dst_iter.pos;
ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq,
BTREE_INSERT_ATOMIC,
BTREE_INSERT_ENTRY(&src_iter, &new_src->k_i),
BTREE_INSERT_ENTRY(&dst_iter, &new_dst->k_i));
err:
if (ret == -EINTR)
goto retry;
bch2_trans_update(trans, src_iter, &new_src->k_i, 0);
bch2_trans_update(trans, dst_iter, &new_dst->k_i, 0);
return 0;
}
bch2_btree_iter_unlock(&whiteout_iter);
bch2_btree_iter_unlock(&dst_iter);
bch2_btree_iter_unlock(&src_iter);
if (new_src != (void *) &delete)
kfree(new_src);
kfree(new_dst);
return ret;
int __bch2_dirent_delete(struct btree_trans *trans, u64 dir_inum,
const struct bch_hash_info *hash_info,
const struct qstr *name)
{
return bch2_hash_delete(trans, bch2_dirent_hash_desc, hash_info,
dir_inum, name);
}
int bch2_dirent_delete(struct bch_fs *c, u64 dir_inum,
@ -356,28 +330,34 @@ int bch2_dirent_delete(struct bch_fs *c, u64 dir_inum,
const struct qstr *name,
u64 *journal_seq)
{
return bch2_hash_delete(bch2_dirent_hash_desc, hash_info,
c, dir_inum, journal_seq, name);
return bch2_trans_do(c, journal_seq,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL,
__bch2_dirent_delete(&trans, dir_inum, hash_info, name));
}
u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
const struct bch_hash_info *hash_info,
const struct qstr *name)
{
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
u64 inum;
u64 inum = 0;
k = bch2_hash_lookup(bch2_dirent_hash_desc, hash_info, c,
dir_inum, &iter, name);
if (IS_ERR(k.k)) {
bch2_btree_iter_unlock(&iter);
return 0;
bch2_trans_init(&trans, c);
iter = bch2_hash_lookup(&trans, bch2_dirent_hash_desc,
hash_info, dir_inum, name, 0);
if (IS_ERR(iter)) {
BUG_ON(PTR_ERR(iter) == -EINTR);
goto out;
}
k = bch2_btree_iter_peek_slot(iter);
inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
bch2_btree_iter_unlock(&iter);
out:
bch2_trans_exit(&trans);
return inum;
}

View File

@ -21,8 +21,16 @@ struct bch_hash_info;
struct bch_inode_info;
unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent);
int __bch2_dirent_create(struct btree_trans *, u64,
const struct bch_hash_info *, u8,
const struct qstr *, u64, int);
int bch2_dirent_create(struct bch_fs *c, u64, const struct bch_hash_info *,
u8, const struct qstr *, u64, u64 *, int);
int __bch2_dirent_delete(struct btree_trans *, u64,
const struct bch_hash_info *,
const struct qstr *);
int bch2_dirent_delete(struct bch_fs *, u64, const struct bch_hash_info *,
const struct qstr *, u64 *);
@ -32,10 +40,10 @@ enum bch_rename_mode {
BCH_RENAME_EXCHANGE,
};
int bch2_dirent_rename(struct bch_fs *,
int bch2_dirent_rename(struct btree_trans *,
struct bch_inode_info *, const struct qstr *,
struct bch_inode_info *, const struct qstr *,
u64 *, enum bch_rename_mode);
enum bch_rename_mode);
u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *,
const struct qstr *);

View File

@ -131,8 +131,9 @@ print:
mutex_unlock(&c->fsck_error_lock);
if (fix)
set_bit(BCH_FS_FSCK_FIXED_ERRORS, &c->flags);
set_bit(fix
? BCH_FS_FSCK_FIXED_ERRORS
: BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags);
return fix ? FSCK_ERR_FIX
: flags & FSCK_CAN_IGNORE ? FSCK_ERR_IGNORE

View File

@ -147,12 +147,18 @@ void bch2_flush_fsck_errs(struct bch_fs *);
#define need_fsck_err_on(cond, c, ...) \
__fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
#define need_fsck_err(c, ...) \
__fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
#define mustfix_fsck_err(c, ...) \
__fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__)
#define mustfix_fsck_err_on(cond, c, ...) \
__fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__)
#define fsck_err(c, ...) \
__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
#define fsck_err_on(cond, c, ...) \
__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)

View File

@ -193,7 +193,7 @@ static int __must_check bch2_write_inode_size(struct bch_fs *c,
struct bch_inode_info *inode,
loff_t new_size)
{
return __bch2_write_inode(c, inode, inode_set_size, &new_size);
return __bch2_write_inode(c, inode, inode_set_size, &new_size, 0);
}
static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
@ -259,7 +259,7 @@ static int i_sectors_dirty_finish(struct bch_fs *c, struct i_sectors_hook *h)
mutex_lock(&h->inode->ei_update_lock);
i_sectors_acct(c, h->inode, &h->quota_res, h->sectors);
ret = __bch2_write_inode(c, h->inode, i_sectors_dirty_finish_fn, h);
ret = __bch2_write_inode(c, h->inode, i_sectors_dirty_finish_fn, h, 0);
if (!ret && h->new_i_size != U64_MAX)
i_size_write(&h->inode->v, h->new_i_size);
@ -289,7 +289,7 @@ static int i_sectors_dirty_start(struct bch_fs *c, struct i_sectors_hook *h)
int ret;
mutex_lock(&h->inode->ei_update_lock);
ret = __bch2_write_inode(c, h->inode, i_sectors_dirty_start_fn, h);
ret = __bch2_write_inode(c, h->inode, i_sectors_dirty_start_fn, h, 0);
mutex_unlock(&h->inode->ei_update_lock);
return ret;
@ -390,7 +390,8 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
struct bchfs_write_op *op = container_of(wop,
struct bchfs_write_op, op);
struct keylist *keys = &op->op.insert_keys;
struct btree_iter extent_iter, inode_iter;
struct btree_trans trans;
struct btree_iter *extent_iter, *inode_iter = NULL;
struct bchfs_extent_trans_hook hook;
struct bkey_i *k = bch2_keylist_front(keys);
s64 orig_sectors_added = op->sectors_added;
@ -398,12 +399,13 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
BUG_ON(k->k.p.inode != op->inode->v.i_ino);
bch2_btree_iter_init(&extent_iter, wop->c, BTREE_ID_EXTENTS,
bkey_start_pos(&bch2_keylist_front(keys)->k),
BTREE_ITER_INTENT);
bch2_btree_iter_init(&inode_iter, wop->c, BTREE_ID_INODES,
POS(extent_iter.pos.inode, 0),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
bch2_trans_init(&trans, wop->c);
extent_iter = bch2_trans_get_iter(&trans,
BTREE_ID_EXTENTS,
bkey_start_pos(&bch2_keylist_front(keys)->k),
BTREE_ITER_INTENT);
BUG_ON(IS_ERR(extent_iter));
hook.op = op;
hook.hook.fn = bchfs_extent_update_hook;
@ -416,19 +418,29 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
op->inode->ei_inode.bi_size)
hook.need_inode_update = true;
/* optimization for fewer transaction restarts: */
ret = bch2_btree_iter_traverse(extent_iter);
if (ret)
goto err;
if (hook.need_inode_update) {
struct bkey_s_c inode;
if (!btree_iter_linked(&inode_iter))
bch2_btree_iter_link(&extent_iter, &inode_iter);
if (!inode_iter) {
inode_iter = bch2_trans_get_iter(&trans,
BTREE_ID_INODES,
POS(extent_iter->pos.inode, 0),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
BUG_ON(IS_ERR(inode_iter));
}
inode = bch2_btree_iter_peek_slot(&inode_iter);
inode = bch2_btree_iter_peek_slot(inode_iter);
if ((ret = btree_iter_err(inode)))
goto err;
if (WARN_ONCE(inode.k->type != BCH_INODE_FS,
"inode %llu not found when updating",
extent_iter.pos.inode)) {
extent_iter->pos.inode)) {
ret = -ENOENT;
break;
}
@ -436,7 +448,7 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
if (WARN_ONCE(bkey_bytes(inode.k) >
sizeof(hook.inode_p),
"inode %llu too big (%zu bytes, buf %zu)",
extent_iter.pos.inode,
extent_iter->pos.inode,
bkey_bytes(inode.k),
sizeof(hook.inode_p))) {
ret = -ENOENT;
@ -448,7 +460,7 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
&hook.inode_u);
if (WARN_ONCE(ret,
"error %i unpacking inode %llu",
ret, extent_iter.pos.inode)) {
ret, extent_iter->pos.inode)) {
ret = -ENOENT;
break;
}
@ -458,8 +470,8 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
BTREE_INSERT_NOFAIL|
BTREE_INSERT_ATOMIC|
BTREE_INSERT_USE_RESERVE,
BTREE_INSERT_ENTRY(&extent_iter, k),
BTREE_INSERT_ENTRY_EXTRA_RES(&inode_iter,
BTREE_INSERT_ENTRY(extent_iter, k),
BTREE_INSERT_ENTRY_EXTRA_RES(inode_iter,
&hook.inode_p.inode.k_i, 2));
} else {
ret = bch2_btree_insert_at(wop->c, &wop->res,
@ -467,10 +479,10 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
BTREE_INSERT_NOFAIL|
BTREE_INSERT_ATOMIC|
BTREE_INSERT_USE_RESERVE,
BTREE_INSERT_ENTRY(&extent_iter, k));
BTREE_INSERT_ENTRY(extent_iter, k));
}
BUG_ON(bkey_cmp(extent_iter.pos, bkey_start_pos(&k->k)));
BUG_ON(bkey_cmp(extent_iter->pos, bkey_start_pos(&k->k)));
if (WARN_ONCE(!ret != !k->k.size,
"ret %i k->size %u", ret, k->k.size))
@ -481,12 +493,11 @@ err:
if (ret)
break;
BUG_ON(bkey_cmp(extent_iter.pos, k->k.p) < 0);
BUG_ON(bkey_cmp(extent_iter->pos, k->k.p) < 0);
bch2_keylist_pop_front(keys);
} while (!bch2_keylist_empty(keys));
bch2_btree_iter_unlock(&extent_iter);
bch2_btree_iter_unlock(&inode_iter);
bch2_trans_exit(&trans);
if (op->is_dio) {
struct dio_write *dio = container_of(op, struct dio_write, iop);
@ -2338,8 +2349,8 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
struct btree_iter src;
struct btree_iter dst;
struct btree_trans trans;
struct btree_iter *src, *dst;
BKEY_PADDED(k) copy;
struct bkey_s_c k;
struct i_sectors_hook i_sectors_hook = i_sectors_hook_init(inode, 0);
@ -2349,13 +2360,17 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
if ((offset | len) & (block_bytes(c) - 1))
return -EINVAL;
bch2_btree_iter_init(&dst, c, BTREE_ID_EXTENTS,
bch2_trans_init(&trans, c);
dst = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS(inode->v.i_ino, offset >> 9),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
BUG_ON(IS_ERR(dst));
/* position will be set from dst iter's position: */
bch2_btree_iter_init(&src, c, BTREE_ID_EXTENTS, POS_MIN,
src = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
BTREE_ITER_SLOTS);
bch2_btree_iter_link(&src, &dst);
BUG_ON(IS_ERR(src));
/*
* We need i_mutex to keep the page cache consistent with the extents
@ -2384,24 +2399,24 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
if (ret)
goto err;
while (bkey_cmp(dst.pos,
while (bkey_cmp(dst->pos,
POS(inode->v.i_ino,
round_up(new_size, PAGE_SIZE) >> 9)) < 0) {
struct disk_reservation disk_res;
bch2_btree_iter_set_pos(&src,
POS(dst.pos.inode, dst.pos.offset + (len >> 9)));
bch2_btree_iter_set_pos(src,
POS(dst->pos.inode, dst->pos.offset + (len >> 9)));
k = bch2_btree_iter_peek_slot(&src);
k = bch2_btree_iter_peek_slot(src);
if ((ret = btree_iter_err(k)))
goto btree_iter_err;
bkey_reassemble(&copy.k, k);
bch2_cut_front(src.pos, &copy.k);
bch2_cut_front(src->pos, &copy.k);
copy.k.k.p.offset -= len >> 9;
BUG_ON(bkey_cmp(dst.pos, bkey_start_pos(&copy.k.k)));
BUG_ON(bkey_cmp(dst->pos, bkey_start_pos(&copy.k.k)));
ret = bch2_disk_reservation_get(c, &disk_res, copy.k.k.size,
bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k)),
@ -2412,14 +2427,13 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
&inode->ei_journal_seq,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL,
BTREE_INSERT_ENTRY(&dst, &copy.k));
BTREE_INSERT_ENTRY(dst, &copy.k));
bch2_disk_reservation_put(c, &disk_res);
btree_iter_err:
if (ret == -EINTR)
ret = 0;
if (ret) {
bch2_btree_iter_unlock(&src);
bch2_btree_iter_unlock(&dst);
bch2_trans_exit(&trans);
goto err_put_sectors_dirty;
}
/*
@ -2427,11 +2441,10 @@ btree_iter_err:
* pointers... which isn't a _super_ serious problem...
*/
bch2_btree_iter_cond_resched(&src);
bch2_btree_iter_cond_resched(src);
}
bch2_btree_iter_unlock(&src);
bch2_btree_iter_unlock(&dst);
bch2_trans_exit(&trans);
ret = bch2_inode_truncate(c, inode->v.i_ino,
round_up(new_size, block_bytes(c)) >> 9,

View File

@ -87,6 +87,8 @@ void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
struct flags_set {
unsigned mask;
unsigned flags;
unsigned projid;
};
static int bch2_inode_flags_set(struct bch_inode_info *inode,
@ -150,7 +152,7 @@ static int bch2_ioc_setflags(struct bch_fs *c,
}
mutex_lock(&inode->ei_update_lock);
ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &s);
ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &s, 0);
if (!ret)
bch2_inode_flags_to_vfs(inode);
@ -185,9 +187,9 @@ static int bch2_set_projid(struct bch_fs *c,
qid.q[QTYP_PRJ] = projid;
ret = bch2_quota_transfer(c, 1 << QTYP_PRJ, qid, inode->ei_qid,
inode->v.i_blocks +
inode->ei_quota_reserved);
return bch2_quota_transfer(c, 1 << QTYP_PRJ, qid, inode->ei_qid,
inode->v.i_blocks +
inode->ei_quota_reserved);
if (ret)
return ret;
@ -195,6 +197,17 @@ static int bch2_set_projid(struct bch_fs *c,
return 0;
}
static int fssetxattr_inode_update_fn(struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
void *p)
{
struct flags_set *s = p;
bi->bi_project = s->projid;
return bch2_inode_flags_set(inode, bi, p);
}
static int bch2_ioc_fssetxattr(struct bch_fs *c,
struct file *file,
struct bch_inode_info *inode,
@ -211,6 +224,8 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
if (fa.fsx_xflags)
return -EOPNOTSUPP;
s.projid = fa.fsx_projid;
ret = mnt_want_write_file(file);
if (ret)
return ret;
@ -226,7 +241,7 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
if (ret)
goto err_unlock;
ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &s);
ret = __bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, 0);
if (!ret)
bch2_inode_flags_to_vfs(inode);
err_unlock:

File diff suppressed because it is too large Load Diff

View File

@ -51,8 +51,16 @@ struct bch_inode_unpacked;
typedef int (*inode_set_fn)(struct bch_inode_info *,
struct bch_inode_unpacked *, void *);
void bch2_inode_update_after_write(struct bch_fs *,
struct bch_inode_info *,
struct bch_inode_unpacked *,
unsigned);
int __must_check bch2_write_inode_trans(struct btree_trans *,
struct bch_inode_info *,
struct bch_inode_unpacked *,
inode_set_fn, void *);
int __must_check __bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
inode_set_fn, void *);
inode_set_fn, void *, unsigned);
int __must_check bch2_write_inode(struct bch_fs *,
struct bch_inode_info *);

View File

@ -126,16 +126,22 @@ static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
struct hash_check {
struct bch_hash_info info;
struct btree_iter chain;
struct btree_iter iter;
struct btree_trans *trans;
/* start of current chain of hash collisions: */
struct btree_iter *chain;
/* next offset in current chain of hash collisions: */
u64 next;
};
static void hash_check_init(const struct bch_hash_desc desc,
struct hash_check *h, struct bch_fs *c)
struct btree_trans *trans,
struct hash_check *h)
{
bch2_btree_iter_init(&h->chain, c, desc.btree_id, POS_MIN, 0);
bch2_btree_iter_init(&h->iter, c, desc.btree_id, POS_MIN, 0);
h->trans = trans;
h->chain = bch2_trans_get_iter(trans, desc.btree_id, POS_MIN, 0);
h->next = -1;
}
static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c,
@ -173,6 +179,75 @@ err:
return ret;
}
/* fsck hasn't been converted to new transactions yet: */
static int fsck_hash_delete_at(const struct bch_hash_desc desc,
struct bch_hash_info *info,
struct btree_iter *orig_iter)
{
struct btree_trans trans;
struct btree_iter *iter;
int ret;
bch2_btree_iter_unlock(orig_iter);
bch2_trans_init(&trans, orig_iter->c);
retry:
bch2_trans_begin(&trans);
iter = bch2_trans_copy_iter(&trans, orig_iter);
if (IS_ERR(iter)) {
ret = PTR_ERR(iter);
goto err;
}
ret = bch2_hash_delete_at(&trans, desc, info, iter) ?:
bch2_trans_commit(&trans, NULL, NULL, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL);
err:
if (ret == -EINTR)
goto retry;
bch2_trans_exit(&trans);
return ret;
}
static int hash_check_duplicates(const struct bch_hash_desc desc,
struct hash_check *h, struct bch_fs *c,
struct btree_iter *k_iter, struct bkey_s_c k)
{
struct btree_iter *iter;
struct bkey_s_c k2;
char buf[200];
int ret = 0;
if (!bkey_cmp(h->chain->pos, k_iter->pos))
return 0;
iter = bch2_trans_copy_iter(h->trans, h->chain);
BUG_ON(IS_ERR(iter));
for_each_btree_key_continue(iter, 0, k2) {
if (bkey_cmp(k2.k->p, k.k->p) >= 0)
break;
if (fsck_err_on(k2.k->type == desc.key_type &&
!desc.cmp_bkey(k, k2), c,
"duplicate hash table keys:\n%s",
(bch2_bkey_val_to_text(c, bkey_type(0, desc.btree_id),
buf, sizeof(buf), k), buf))) {
ret = fsck_hash_delete_at(desc, &h->info, k_iter);
if (ret)
return ret;
ret = 1;
break;
}
}
fsck_err:
bch2_trans_iter_free(h->trans, iter);
return ret;
}
static int hash_check_key(const struct bch_hash_desc desc,
struct hash_check *h, struct bch_fs *c,
struct btree_iter *k_iter, struct bkey_s_c k)
@ -185,13 +260,8 @@ static int hash_check_key(const struct bch_hash_desc desc,
k.k->type != desc.key_type)
return 0;
if (k.k->p.offset != h->next) {
if (!btree_iter_linked(&h->chain)) {
bch2_btree_iter_link(k_iter, &h->chain);
bch2_btree_iter_link(k_iter, &h->iter);
}
bch2_btree_iter_copy(&h->chain, k_iter);
}
if (k.k->p.offset != h->next)
bch2_btree_iter_copy(h->chain, k_iter);
h->next = k.k->p.offset + 1;
if (k.k->type != desc.key_type)
@ -199,11 +269,11 @@ static int hash_check_key(const struct bch_hash_desc desc,
hashed = desc.hash_bkey(&h->info, k);
if (fsck_err_on(hashed < h->chain.pos.offset ||
if (fsck_err_on(hashed < h->chain->pos.offset ||
hashed > k.k->p.offset, c,
"hash table key at wrong offset: %llu, "
"hashed to %llu chain starts at %llu\n%s",
k.k->p.offset, hashed, h->chain.pos.offset,
k.k->p.offset, hashed, h->chain->pos.offset,
(bch2_bkey_val_to_text(c, bkey_type(0, desc.btree_id),
buf, sizeof(buf), k), buf))) {
ret = hash_redo_key(desc, h, c, k_iter, k, hashed);
@ -214,25 +284,7 @@ static int hash_check_key(const struct bch_hash_desc desc,
return 1;
}
if (!bkey_cmp(h->chain.pos, k_iter->pos))
return 0;
bch2_btree_iter_copy(&h->iter, &h->chain);
while (bkey_cmp(h->iter.pos, k_iter->pos) < 0) {
struct bkey_s_c k2 = bch2_btree_iter_peek(&h->iter);
if (fsck_err_on(k2.k->type == desc.key_type &&
!desc.cmp_bkey(k, k2), c,
"duplicate hash table keys:\n%s",
(bch2_bkey_val_to_text(c, bkey_type(0, desc.btree_id),
buf, sizeof(buf), k), buf))) {
ret = bch2_hash_delete_at(desc, &h->info, &h->iter, NULL);
if (ret)
return ret;
return 1;
}
bch2_btree_iter_next(&h->iter);
}
ret = hash_check_duplicates(desc, h, c, k_iter, k);
fsck_err:
return ret;
}
@ -250,6 +302,8 @@ static int check_extents(struct bch_fs *c)
u64 i_sectors;
int ret = 0;
bch_verbose(c, "checking extents");
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
POS(BCACHEFS_ROOT_INO, 0), 0, k) {
ret = walk_inode(c, &w, k.k->p.inode);
@ -332,16 +386,25 @@ static int check_dirents(struct bch_fs *c)
{
struct inode_walker w = inode_walker_init();
struct hash_check h;
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
unsigned name_len;
char buf[200];
int ret = 0;
hash_check_init(bch2_dirent_hash_desc, &h, c);
bch_verbose(c, "checking dirents");
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
POS(BCACHEFS_ROOT_INO, 0), 0, k) {
bch2_trans_init(&trans, c);
BUG_ON(bch2_trans_preload_iters(&trans));
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
POS(BCACHEFS_ROOT_INO, 0), 0);
hash_check_init(bch2_dirent_hash_desc, &trans, &h);
for_each_btree_key_continue(iter, 0, k) {
struct bkey_s_c_dirent d;
struct bch_inode_unpacked target;
bool have_target;
@ -360,7 +423,7 @@ static int check_dirents(struct bch_fs *c)
mode_to_type(w.inode.bi_mode),
(bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
buf, sizeof(buf), k), buf))) {
ret = bch2_btree_delete_at(&iter, 0);
ret = bch2_btree_delete_at(iter, 0);
if (ret)
goto err;
continue;
@ -369,7 +432,7 @@ static int check_dirents(struct bch_fs *c)
if (w.first_this_inode && w.have_inode)
hash_check_set_inode(&h, c, &w.inode);
ret = hash_check_key(bch2_dirent_hash_desc, &h, c, &iter, k);
ret = hash_check_key(bch2_dirent_hash_desc, &h, c, iter, k);
if (ret > 0) {
ret = 0;
continue;
@ -393,7 +456,7 @@ static int check_dirents(struct bch_fs *c)
fsck_err_on(name_len == 2 &&
!memcmp(d.v->d_name, "..", 2), c,
".. dirent")) {
ret = remove_dirent(c, &iter, d);
ret = remove_dirent(c, iter, d);
if (ret)
goto err;
continue;
@ -403,7 +466,7 @@ static int check_dirents(struct bch_fs *c)
"dirent points to own directory:\n%s",
(bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
buf, sizeof(buf), k), buf))) {
ret = remove_dirent(c, &iter, d);
ret = remove_dirent(c, iter, d);
if (ret)
goto err;
continue;
@ -420,7 +483,7 @@ static int check_dirents(struct bch_fs *c)
"dirent points to missing inode:\n%s",
(bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
buf, sizeof(buf), k), buf))) {
ret = remove_dirent(c, &iter, d);
ret = remove_dirent(c, iter, d);
if (ret)
goto err;
continue;
@ -446,7 +509,7 @@ static int check_dirents(struct bch_fs *c)
ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
BTREE_INSERT_NOFAIL,
BTREE_INSERT_ENTRY(&iter, &n->k_i));
BTREE_INSERT_ENTRY(iter, &n->k_i));
kfree(n);
if (ret)
goto err;
@ -455,9 +518,7 @@ static int check_dirents(struct bch_fs *c)
}
err:
fsck_err:
bch2_btree_iter_unlock(&h.chain);
bch2_btree_iter_unlock(&h.iter);
return bch2_btree_iter_unlock(&iter) ?: ret;
return bch2_trans_exit(&trans) ?: ret;
}
/*
@ -468,14 +529,23 @@ static int check_xattrs(struct bch_fs *c)
{
struct inode_walker w = inode_walker_init();
struct hash_check h;
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
int ret = 0;
hash_check_init(bch2_xattr_hash_desc, &h, c);
bch_verbose(c, "checking xattrs");
for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
POS(BCACHEFS_ROOT_INO, 0), 0, k) {
bch2_trans_init(&trans, c);
BUG_ON(bch2_trans_preload_iters(&trans));
iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
POS(BCACHEFS_ROOT_INO, 0), 0);
hash_check_init(bch2_xattr_hash_desc, &trans, &h);
for_each_btree_key_continue(iter, 0, k) {
ret = walk_inode(c, &w, k.k->p.inode);
if (ret)
break;
@ -483,7 +553,7 @@ static int check_xattrs(struct bch_fs *c)
if (fsck_err_on(!w.have_inode, c,
"xattr for missing inode %llu",
k.k->p.inode)) {
ret = bch2_btree_delete_at(&iter, 0);
ret = bch2_btree_delete_at(iter, 0);
if (ret)
goto err;
continue;
@ -492,15 +562,13 @@ static int check_xattrs(struct bch_fs *c)
if (w.first_this_inode && w.have_inode)
hash_check_set_inode(&h, c, &w.inode);
ret = hash_check_key(bch2_xattr_hash_desc, &h, c, &iter, k);
ret = hash_check_key(bch2_xattr_hash_desc, &h, c, iter, k);
if (ret)
goto fsck_err;
}
err:
fsck_err:
bch2_btree_iter_unlock(&h.chain);
bch2_btree_iter_unlock(&h.iter);
return bch2_btree_iter_unlock(&iter) ?: ret;
return bch2_trans_exit(&trans) ?: ret;
}
/* Get root directory, create if it doesn't exist: */
@ -509,6 +577,8 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
struct bkey_inode_buf packed;
int ret;
bch_verbose(c, "checking root directory");
ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
if (ret && ret != -ENOENT)
return ret;
@ -546,6 +616,8 @@ static int check_lostfound(struct bch_fs *c,
u64 inum;
int ret;
bch_verbose(c, "checking lost+found");
inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
&lostfound);
if (!inum) {
@ -672,6 +744,8 @@ static int check_directory_structure(struct bch_fs *c,
u64 d_inum;
int ret = 0;
bch_verbose(c, "checking directory structure");
/* DFS: */
restart_dfs:
had_unreachable = false;
@ -872,15 +946,116 @@ s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum)
return bch2_btree_iter_unlock(&iter) ?: sectors;
}
static int bch2_gc_do_inode(struct bch_fs *c,
struct bch_inode_unpacked *lostfound_inode,
struct btree_iter *iter,
struct bkey_s_c_inode inode, struct nlink link)
static int check_inode_nlink(struct bch_fs *c,
struct bch_inode_unpacked *lostfound_inode,
struct bch_inode_unpacked *u,
struct nlink *link,
bool *do_update)
{
u32 i_nlink = u->bi_flags & BCH_INODE_UNLINKED
? 0
: u->bi_nlink + nlink_bias(u->bi_mode);
u32 real_i_nlink =
link->count * nlink_bias(u->bi_mode) +
link->dir_count;
int ret = 0;
/*
* These should have been caught/fixed by earlier passes, we don't
* repair them here:
*/
if (S_ISDIR(u->bi_mode) && link->count > 1) {
need_fsck_err(c, "directory %llu with multiple hardlinks: %u",
u->bi_inum, link->count);
return 0;
}
if (S_ISDIR(u->bi_mode) && !link->count) {
need_fsck_err(c, "unreachable directory found (inum %llu)",
u->bi_inum);
return 0;
}
if (!S_ISDIR(u->bi_mode) && link->dir_count) {
need_fsck_err(c, "non directory with subdirectories",
u->bi_inum);
return 0;
}
if (!link->count &&
!(u->bi_flags & BCH_INODE_UNLINKED) &&
(c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK))) {
if (fsck_err(c, "unreachable inode %llu not marked as unlinked (type %u)",
u->bi_inum, mode_to_type(u->bi_mode)) ==
FSCK_ERR_IGNORE)
return 0;
ret = reattach_inode(c, lostfound_inode, u->bi_inum);
if (ret)
return ret;
link->count = 1;
real_i_nlink = nlink_bias(u->bi_mode) + link->dir_count;
goto set_i_nlink;
}
if (i_nlink < link->count) {
if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)",
u->bi_inum, i_nlink, link->count,
mode_to_type(u->bi_mode)) == FSCK_ERR_IGNORE)
return 0;
goto set_i_nlink;
}
if (i_nlink != real_i_nlink &&
c->sb.clean) {
if (fsck_err(c, "filesystem marked clean, "
"but inode %llu has wrong i_nlink "
"(type %u i_nlink %u, should be %u)",
u->bi_inum, mode_to_type(u->bi_mode),
i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
return 0;
goto set_i_nlink;
}
if (i_nlink != real_i_nlink &&
(c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK))) {
if (fsck_err(c, "inode %llu has wrong i_nlink "
"(type %u i_nlink %u, should be %u)",
u->bi_inum, mode_to_type(u->bi_mode),
i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
return 0;
goto set_i_nlink;
}
if (real_i_nlink && i_nlink != real_i_nlink)
bch_verbose(c, "setting inode %llu nlink from %u to %u",
u->bi_inum, i_nlink, real_i_nlink);
set_i_nlink:
if (i_nlink != real_i_nlink) {
if (real_i_nlink) {
u->bi_nlink = real_i_nlink - nlink_bias(u->bi_mode);
u->bi_flags &= ~BCH_INODE_UNLINKED;
} else {
u->bi_nlink = 0;
u->bi_flags |= BCH_INODE_UNLINKED;
}
*do_update = true;
}
fsck_err:
return ret;
}
static int check_inode(struct bch_fs *c,
struct bch_inode_unpacked *lostfound_inode,
struct btree_iter *iter,
struct bkey_s_c_inode inode,
struct nlink *link)
{
struct bch_inode_unpacked u;
int ret = 0;
u32 i_nlink, real_i_nlink;
bool do_update = false;
int ret = 0;
ret = bch2_inode_unpack(inode, &u);
if (bch2_fs_inconsistent_on(ret, c,
@ -888,48 +1063,17 @@ static int bch2_gc_do_inode(struct bch_fs *c,
inode.k->p.inode))
return ret;
i_nlink = u.bi_nlink + nlink_bias(u.bi_mode);
fsck_err_on(i_nlink < link.count, c,
"inode %llu i_link too small (%u < %u, type %i)",
inode.k->p.inode, i_nlink,
link.count, mode_to_type(u.bi_mode));
/* These should have been caught/fixed by earlier passes: */
if (S_ISDIR(u.bi_mode)) {
need_fsck_err_on(link.count > 1, c,
"directory %llu with multiple hardlinks: %u",
inode.k->p.inode, link.count);
real_i_nlink = link.count * 2 + link.dir_count;
} else {
need_fsck_err_on(link.dir_count, c,
"found dirents for non directory %llu",
inode.k->p.inode);
real_i_nlink = link.count + link.dir_count;
if (link) {
ret = check_inode_nlink(c, lostfound_inode, &u, link,
&do_update);
if (ret)
return ret;
}
if (!link.count) {
fsck_err_on(c->sb.clean, c,
"filesystem marked clean, "
"but found orphaned inode %llu",
inode.k->p.inode);
if (u.bi_flags & BCH_INODE_UNLINKED) {
bch_verbose(c, "deleting inode %llu", u.bi_inum);
if (fsck_err_on(S_ISDIR(u.bi_mode) &&
bch2_empty_dir(c, inode.k->p.inode), c,
"non empty directory with link count 0, "
"inode nlink %u, dir links found %u",
i_nlink, link.dir_count)) {
ret = reattach_inode(c, lostfound_inode,
inode.k->p.inode);
if (ret)
return ret;
}
bch_verbose(c, "deleting inode %llu", inode.k->p.inode);
ret = bch2_inode_rm(c, inode.k->p.inode);
ret = bch2_inode_rm(c, u.bi_inum);
if (ret)
bch_err(c, "error in fs gc: error %i "
"while deleting inode", ret);
@ -940,16 +1084,16 @@ static int bch2_gc_do_inode(struct bch_fs *c,
fsck_err_on(c->sb.clean, c,
"filesystem marked clean, "
"but inode %llu has i_size dirty",
inode.k->p.inode);
u.bi_inum);
bch_verbose(c, "truncating inode %llu", inode.k->p.inode);
bch_verbose(c, "truncating inode %llu", u.bi_inum);
/*
* XXX: need to truncate partial blocks too here - or ideally
* just switch units to bytes and that issue goes away
*/
ret = bch2_inode_truncate(c, inode.k->p.inode,
ret = bch2_inode_truncate(c, u.bi_inum,
round_up(u.bi_size, PAGE_SIZE) >> 9,
NULL, NULL);
if (ret) {
@ -974,12 +1118,12 @@ static int bch2_gc_do_inode(struct bch_fs *c,
fsck_err_on(c->sb.clean, c,
"filesystem marked clean, "
"but inode %llu has i_sectors dirty",
inode.k->p.inode);
u.bi_inum);
bch_verbose(c, "recounting sectors for inode %llu",
inode.k->p.inode);
u.bi_inum);
sectors = bch2_count_inode_sectors(c, inode.k->p.inode);
sectors = bch2_count_inode_sectors(c, u.bi_inum);
if (sectors < 0) {
bch_err(c, "error in fs gc: error %i "
"recounting inode sectors",
@ -992,20 +1136,6 @@ static int bch2_gc_do_inode(struct bch_fs *c,
do_update = true;
}
if (i_nlink != real_i_nlink) {
fsck_err_on(c->sb.clean, c,
"filesystem marked clean, "
"but inode %llu has wrong i_nlink "
"(type %u i_nlink %u, should be %u)",
inode.k->p.inode, mode_to_type(u.bi_mode),
i_nlink, real_i_nlink);
bch_verbose(c, "setting inode %llu nlinks from %u to %u",
inode.k->p.inode, i_nlink, real_i_nlink);
u.bi_nlink = real_i_nlink - nlink_bias(u.bi_mode);
do_update = true;
}
if (do_update) {
struct bkey_inode_buf p;
@ -1024,9 +1154,9 @@ fsck_err:
noinline_for_stack
static int bch2_gc_walk_inodes(struct bch_fs *c,
struct bch_inode_unpacked *lostfound_inode,
nlink_table *links,
u64 range_start, u64 range_end)
struct bch_inode_unpacked *lostfound_inode,
nlink_table *links,
u64 range_start, u64 range_end)
{
struct btree_iter iter;
struct bkey_s_c k;
@ -1065,10 +1195,9 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
*/
bch2_btree_iter_unlock(&iter);
ret = bch2_gc_do_inode(c, lostfound_inode, &iter,
bkey_s_c_to_inode(k), *link);
if (ret == -EINTR)
continue;
ret = check_inode(c, lostfound_inode, &iter,
bkey_s_c_to_inode(k), link);
BUG_ON(ret == -EINTR);
if (ret)
break;
@ -1103,6 +1232,8 @@ static int check_inode_nlinks(struct bch_fs *c,
u64 this_iter_range_start, next_iter_range_start = 0;
int ret = 0;
bch_verbose(c, "checking inode nlinks");
genradix_init(&links);
do {
@ -1129,68 +1260,103 @@ static int check_inode_nlinks(struct bch_fs *c,
return ret;
}
noinline_for_stack
static int check_inodes_fast(struct bch_fs *c)
{
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_s_c_inode inode;
unsigned long nr_inodes = 0;
int ret = 0;
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
if (k.k->type != BCH_INODE_FS)
continue;
inode = bkey_s_c_to_inode(k);
if (!(inode.v->bi_flags & BCH_INODE_UNLINKED))
nr_inodes++;
if (inode.v->bi_flags &
(BCH_INODE_I_SIZE_DIRTY|
BCH_INODE_I_SECTORS_DIRTY|
BCH_INODE_UNLINKED)) {
fsck_err_on(c->sb.clean, c,
"filesystem marked clean but found inode %llu with flags %x",
inode.k->p.inode, inode.v->bi_flags);
ret = check_inode(c, NULL, &iter, inode, NULL);
BUG_ON(ret == -EINTR);
if (ret)
break;
}
}
atomic_long_set(&c->nr_inodes, nr_inodes);
fsck_err:
return bch2_btree_iter_unlock(&iter) ?: ret;
}
/*
* Checks for inconsistencies that shouldn't happen, unless we have a bug.
* Doesn't fix them yet, mainly because they haven't yet been observed:
*/
int bch2_fsck(struct bch_fs *c, bool full_fsck)
static int bch2_fsck_full(struct bch_fs *c)
{
struct bch_inode_unpacked root_inode, lostfound_inode;
int ret;
if (full_fsck) {
bch_verbose(c, "checking extents");
ret = check_extents(c);
if (ret)
return ret;
bch_verbose(c, "checking dirents");
ret = check_dirents(c);
if (ret)
return ret;
bch_verbose(c, "checking xattrs");
ret = check_xattrs(c);
if (ret)
return ret;
bch_verbose(c, "checking root directory");
ret = check_root(c, &root_inode);
if (ret)
return ret;
bch_verbose(c, "checking lost+found");
ret = check_lostfound(c, &root_inode, &lostfound_inode);
if (ret)
return ret;
bch_verbose(c, "checking directory structure");
ret = check_directory_structure(c, &lostfound_inode);
if (ret)
return ret;
bch_verbose(c, "checking inode nlinks");
ret = check_inode_nlinks(c, &lostfound_inode);
if (ret)
return ret;
} else {
bch_verbose(c, "checking root directory");
ret = check_root(c, &root_inode);
if (ret)
return ret;
bch_verbose(c, "checking lost+found");
ret = check_lostfound(c, &root_inode, &lostfound_inode);
if (ret)
return ret;
bch_verbose(c, "checking inode nlinks");
ret = check_inode_nlinks(c, &lostfound_inode);
if (ret)
return ret;
}
bch_verbose(c, "starting fsck:");
ret = check_extents(c) ?:
check_dirents(c) ?:
check_xattrs(c) ?:
check_root(c, &root_inode) ?:
check_lostfound(c, &root_inode, &lostfound_inode) ?:
check_directory_structure(c, &lostfound_inode) ?:
check_inode_nlinks(c, &lostfound_inode);
bch2_flush_fsck_errs(c);
bch_verbose(c, "fsck done");
return 0;
return ret;
}
static int bch2_fsck_inode_nlink(struct bch_fs *c)
{
struct bch_inode_unpacked root_inode, lostfound_inode;
int ret;
bch_verbose(c, "checking inode link counts:");
ret = check_root(c, &root_inode) ?:
check_lostfound(c, &root_inode, &lostfound_inode) ?:
check_inode_nlinks(c, &lostfound_inode);
bch2_flush_fsck_errs(c);
bch_verbose(c, "done");
return ret;
}
static int bch2_fsck_walk_inodes_only(struct bch_fs *c)
{
int ret;
bch_verbose(c, "walking inodes:");
ret = check_inodes_fast(c);
bch2_flush_fsck_errs(c);
bch_verbose(c, "done");
return ret;
}
int bch2_fsck(struct bch_fs *c)
{
if (!c->opts.nofsck)
return bch2_fsck_full(c);
if (!c->sb.clean &&
!(c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK)))
return bch2_fsck_inode_nlink(c);
return bch2_fsck_walk_inodes_only(c);
}

View File

@ -2,6 +2,6 @@
#define _BCACHEFS_FSCK_H
s64 bch2_count_inode_sectors(struct bch_fs *, u64);
int bch2_fsck(struct bch_fs *, bool);
int bch2_fsck(struct bch_fs *);
#endif /* _BCACHEFS_FSCK_H */

View File

@ -203,6 +203,10 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
return "invalid data checksum type";
if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
unpacked.bi_nlink != 0)
return "flagged as unlinked but bi_nlink != 0";
return NULL;
}
case BCH_INODE_BLOCKDEV:
@ -276,12 +280,27 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
}
}
int bch2_inode_create(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
u64 min, u64 max, u64 *hint)
static inline u32 bkey_generation(struct bkey_s_c k)
{
struct bkey_inode_buf inode_p;
struct btree_iter iter;
bool searched_from_start = false;
switch (k.k->type) {
case BCH_INODE_BLOCKDEV:
case BCH_INODE_FS:
BUG();
case BCH_INODE_GENERATION:
return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
default:
return 0;
}
}
int __bch2_inode_create(struct btree_trans *trans,
struct bch_inode_unpacked *inode_u,
u64 min, u64 max, u64 *hint)
{
struct bch_fs *c = trans->c;
struct bkey_inode_buf *inode_p;
struct btree_iter *iter;
u64 start;
int ret;
if (!max)
@ -290,82 +309,66 @@ int bch2_inode_create(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
if (c->opts.inodes_32bit)
max = min_t(u64, max, U32_MAX);
if (*hint >= max || *hint < min)
*hint = min;
start = READ_ONCE(*hint);
if (*hint == min)
searched_from_start = true;
if (start >= max || start < min)
start = min;
inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
if (IS_ERR(inode_p))
return PTR_ERR(inode_p);
iter = bch2_trans_get_iter(trans,
BTREE_ID_INODES, POS(start, 0),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
if (IS_ERR(iter))
return PTR_ERR(iter);
again:
bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(*hint, 0),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
while (1) {
struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
u32 bi_generation = 0;
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
ret = btree_iter_err(k);
if (ret) {
bch2_btree_iter_unlock(&iter);
if (ret)
return ret;
}
switch (k.k->type) {
case BCH_INODE_BLOCKDEV:
case BCH_INODE_FS:
/* slot used */
if (iter.pos.inode == max)
if (iter->pos.inode >= max)
goto out;
bch2_btree_iter_next_slot(&iter);
bch2_btree_iter_next_slot(iter);
break;
case BCH_INODE_GENERATION: {
struct bkey_s_c_inode_generation g =
bkey_s_c_to_inode_generation(k);
bi_generation = le32_to_cpu(g.v->bi_generation);
/* fallthrough: */
}
default:
inode_u->bi_generation = bi_generation;
bch2_inode_pack(&inode_p, inode_u);
inode_p.inode.k.p = k.k->p;
ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
BTREE_INSERT_ATOMIC,
BTREE_INSERT_ENTRY(&iter,
&inode_p.inode.k_i));
if (ret != -EINTR) {
bch2_btree_iter_unlock(&iter);
if (!ret) {
inode_u->bi_inum =
inode_p.inode.k.p.inode;
*hint = inode_p.inode.k.p.inode + 1;
}
return ret;
}
if (ret == -EINTR)
continue;
*hint = k.k->p.inode;
inode_u->bi_inum = k.k->p.inode;
inode_u->bi_generation = bkey_generation(k);
bch2_inode_pack(inode_p, inode_u);
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
return 0;
}
}
out:
bch2_btree_iter_unlock(&iter);
if (!searched_from_start) {
if (start != min) {
/* Retry from start */
*hint = min;
searched_from_start = true;
start = min;
bch2_btree_iter_set_pos(iter, POS(start, 0));
goto again;
}
return -ENOSPC;
}
int bch2_inode_create(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
u64 min, u64 max, u64 *hint)
{
return bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC,
__bch2_inode_create(&trans, inode_u, min, max, hint));
}
int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size,
struct extent_insert_hook *hook, u64 *journal_seq)
{

View File

@ -38,8 +38,13 @@ int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
uid_t, gid_t, umode_t, dev_t,
struct bch_inode_unpacked *);
int __bch2_inode_create(struct btree_trans *,
struct bch_inode_unpacked *,
u64, u64, u64 *);
int bch2_inode_create(struct bch_fs *, struct bch_inode_unpacked *,
u64, u64, u64 *);
int bch2_inode_truncate(struct bch_fs *, u64, u64,
struct extent_insert_hook *, u64 *);
int bch2_inode_rm(struct bch_fs *, u64);

View File

@ -5,6 +5,7 @@
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "dirent.h"
#include "error.h"
#include "fsck.h"
#include "journal_io.h"
@ -14,6 +15,8 @@
#include <linux/stat.h>
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
struct bkey_i *btree_root_find(struct bch_fs *c,
struct bch_sb_field_clean *clean,
struct jset *j,
@ -233,7 +236,8 @@ int bch2_fs_recovery(struct bch_fs *c)
bch2_fs_journal_start(&c->journal);
err = "error starting allocator";
if (bch2_fs_allocator_start(c))
ret = bch2_fs_allocator_start(c);
if (ret)
goto err;
bch_verbose(c, "starting journal replay:");
@ -246,12 +250,16 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.norecovery)
goto out;
bch_verbose(c, "starting fsck:");
err = "error in fsck";
ret = bch2_fsck(c, !c->opts.nofsck);
ret = bch2_fsck(c);
if (ret)
goto err;
bch_verbose(c, "fsck done");
if (!test_bit(BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags)) {
mutex_lock(&c->sb_lock);
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
mutex_unlock(&c->sb_lock);
}
if (enabled_qtypes(c)) {
bch_verbose(c, "reading quotas:");
@ -273,8 +281,10 @@ fsck_err:
int bch2_fs_initialize(struct bch_fs *c)
{
struct bch_inode_unpacked inode;
struct bch_inode_unpacked root_inode, lostfound_inode;
struct bkey_inode_buf packed_inode;
struct bch_hash_info root_hash_info;
struct qstr lostfound = QSTR("lost+found");
const char *err = "cannot allocate memory";
struct bch_dev *ca;
LIST_HEAD(journal);
@ -307,21 +317,46 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_journal_set_replay_done(&c->journal);
err = "error starting allocator";
if (bch2_fs_allocator_start(c))
ret = bch2_fs_allocator_start(c);
if (ret)
goto err;
bch2_inode_init(c, &inode, 0, 0,
bch2_inode_init(c, &root_inode, 0, 0,
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
inode.bi_inum = BCACHEFS_ROOT_INO;
bch2_inode_pack(&packed_inode, &inode);
root_inode.bi_inum = BCACHEFS_ROOT_INO;
root_inode.bi_nlink++; /* lost+found */
bch2_inode_pack(&packed_inode, &root_inode);
err = "error creating root directory";
if (bch2_btree_insert(c, BTREE_ID_INODES,
&packed_inode.inode.k_i,
NULL, NULL, NULL, 0))
ret = bch2_btree_insert(c, BTREE_ID_INODES,
&packed_inode.inode.k_i,
NULL, NULL, NULL, 0);
if (ret)
goto err;
bch2_inode_init(c, &lostfound_inode, 0, 0,
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0,
&root_inode);
lostfound_inode.bi_inum = BCACHEFS_ROOT_INO + 1;
bch2_inode_pack(&packed_inode, &lostfound_inode);
err = "error creating lost+found";
ret = bch2_btree_insert(c, BTREE_ID_INODES,
&packed_inode.inode.k_i,
NULL, NULL, NULL, 0);
if (ret)
goto err;
root_hash_info = bch2_hash_info_init(c, &root_inode);
ret = bch2_dirent_create(c, BCACHEFS_ROOT_INO, &root_hash_info, DT_DIR,
&lostfound, lostfound_inode.bi_inum, NULL,
BTREE_INSERT_NOFAIL);
if (ret)
goto err;
atomic_long_set(&c->nr_inodes, 2);
if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c);
if (ret)
@ -329,12 +364,14 @@ int bch2_fs_initialize(struct bch_fs *c)
}
err = "error writing first journal entry";
if (bch2_journal_meta(&c->journal))
ret = bch2_journal_meta(&c->journal);
if (ret)
goto err;
mutex_lock(&c->sb_lock);
SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
bch2_write_super(c);
mutex_unlock(&c->sb_lock);

View File

@ -125,21 +125,29 @@ struct bch_hash_desc {
bool (*cmp_bkey)(struct bkey_s_c, struct bkey_s_c);
};
static inline struct bkey_s_c
bch2_hash_lookup_at(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
struct btree_iter *iter, const void *search)
static inline struct btree_iter *
bch2_hash_lookup(struct btree_trans *trans,
const struct bch_hash_desc desc,
const struct bch_hash_info *info,
u64 inode, const void *key,
unsigned flags)
{
u64 inode = iter->pos.inode;
struct btree_iter *iter;
struct bkey_s_c k;
iter = bch2_trans_get_iter(trans, desc.btree_id,
POS(inode, desc.hash_key(info, key)),
BTREE_ITER_SLOTS|flags);
if (IS_ERR(iter))
return iter;
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) {
if (iter->pos.inode != inode)
break;
if (k.k->type == desc.key_type) {
if (!desc.cmp_key(k, search))
return k;
if (!desc.cmp_key(k, key))
return iter;
} else if (k.k->type == desc.whiteout_type) {
;
} else {
@ -147,97 +155,48 @@ bch2_hash_lookup_at(const struct bch_hash_desc desc,
break;
}
}
return btree_iter_err(k) ? k : bkey_s_c_err(-ENOENT);
return IS_ERR(k.k) ? ERR_CAST(k.k) : ERR_PTR(-ENOENT);
}
static inline struct bkey_s_c
bch2_hash_lookup_bkey_at(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
struct btree_iter *iter, struct bkey_s_c search)
static inline struct btree_iter *
bch2_hash_hole(struct btree_trans *trans,
const struct bch_hash_desc desc,
const struct bch_hash_info *info,
u64 inode, const void *key)
{
u64 inode = iter->pos.inode;
struct btree_iter *iter;
struct bkey_s_c k;
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) {
if (iter->pos.inode != inode)
break;
if (k.k->type == desc.key_type) {
if (!desc.cmp_bkey(k, search))
return k;
} else if (k.k->type == desc.whiteout_type) {
;
} else {
/* hole, not found */
break;
}
}
return btree_iter_err(k) ? k : bkey_s_c_err(-ENOENT);
}
static inline struct bkey_s_c
bch2_hash_lookup(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
struct bch_fs *c, u64 inode,
struct btree_iter *iter, const void *key)
{
bch2_btree_iter_init(iter, c, desc.btree_id,
POS(inode, desc.hash_key(info, key)),
BTREE_ITER_SLOTS);
return bch2_hash_lookup_at(desc, info, iter, key);
}
static inline struct bkey_s_c
bch2_hash_lookup_intent(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
struct bch_fs *c, u64 inode,
struct btree_iter *iter, const void *key)
{
bch2_btree_iter_init(iter, c, desc.btree_id,
POS(inode, desc.hash_key(info, key)),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
return bch2_hash_lookup_at(desc, info, iter, key);
}
static inline struct bkey_s_c
bch2_hash_hole_at(const struct bch_hash_desc desc, struct btree_iter *iter)
{
u64 inode = iter->pos.inode;
struct bkey_s_c k;
iter = bch2_trans_get_iter(trans, desc.btree_id,
POS(inode, desc.hash_key(info, key)),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
if (IS_ERR(iter))
return iter;
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) {
if (iter->pos.inode != inode)
break;
if (k.k->type != desc.key_type)
return k;
return iter;
}
return btree_iter_err(k) ? k : bkey_s_c_err(-ENOENT);
return IS_ERR(k.k) ? ERR_CAST(k.k) : ERR_PTR(-ENOSPC);
}
static inline struct bkey_s_c bch2_hash_hole(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
struct bch_fs *c, u64 inode,
struct btree_iter *iter,
const void *key)
{
bch2_btree_iter_init(iter, c, desc.btree_id,
POS(inode, desc.hash_key(info, key)),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
return bch2_hash_hole_at(desc, iter);
}
static inline int bch2_hash_needs_whiteout(const struct bch_hash_desc desc,
static inline int bch2_hash_needs_whiteout(struct btree_trans *trans,
const struct bch_hash_desc desc,
const struct bch_hash_info *info,
struct btree_iter *iter,
struct btree_iter *start)
{
struct btree_iter *iter;
struct bkey_s_c k;
bch2_btree_iter_copy(iter, start);
iter = bch2_trans_copy_iter(trans, start);
if (IS_ERR(iter))
return PTR_ERR(iter);
bch2_btree_iter_next_slot(iter);
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) {
@ -252,142 +211,108 @@ static inline int bch2_hash_needs_whiteout(const struct bch_hash_desc desc,
return btree_iter_err(k);
}
static inline int __bch2_hash_set(struct btree_trans *trans,
const struct bch_hash_desc desc,
const struct bch_hash_info *info,
u64 inode, struct bkey_i *insert, int flags)
{
struct btree_iter *iter, *slot = NULL;
struct bkey_s_c k;
iter = bch2_trans_get_iter(trans, desc.btree_id,
POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
if (IS_ERR(iter))
return PTR_ERR(iter);
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) {
if (iter->pos.inode != inode)
break;
if (k.k->type == desc.key_type) {
if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
goto found;
/* hash collision: */
continue;
}
if (!slot &&
!(flags & BCH_HASH_SET_MUST_REPLACE)) {
slot = bch2_trans_copy_iter(trans, iter);
if (IS_ERR(slot))
return PTR_ERR(slot);
}
if (k.k->type != desc.whiteout_type)
goto not_found;
}
return btree_iter_err(k) ?: -ENOSPC;
not_found:
if (flags & BCH_HASH_SET_MUST_REPLACE)
return -ENOENT;
insert->k.p = slot->pos;
bch2_trans_update(trans, slot, insert, 0);
return 0;
found:
if (flags & BCH_HASH_SET_MUST_CREATE)
return -EEXIST;
insert->k.p = iter->pos;
bch2_trans_update(trans, iter, insert, 0);
return 0;
}
static inline int bch2_hash_set(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
struct bch_fs *c, u64 inode,
u64 *journal_seq,
struct bkey_i *insert, int flags)
{
struct btree_iter iter, hashed_slot;
struct bkey_s_c k;
return bch2_trans_do(c, journal_seq, flags|BTREE_INSERT_ATOMIC,
__bch2_hash_set(&trans, desc, info,
inode, insert, flags));
}
static inline int bch2_hash_delete_at(struct btree_trans *trans,
const struct bch_hash_desc desc,
const struct bch_hash_info *info,
struct btree_iter *iter)
{
struct bkey_i *delete;
int ret;
bch2_btree_iter_init(&hashed_slot, c, desc.btree_id,
POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
bch2_btree_iter_init(&iter, c, desc.btree_id, hashed_slot.pos,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
bch2_btree_iter_link(&hashed_slot, &iter);
retry:
/*
* On hash collision, we have to keep the slot we hashed to locked while
* we do the insert - to avoid racing with another thread deleting
* whatever's in the slot we hashed to:
*/
ret = bch2_btree_iter_traverse(&hashed_slot);
if (ret)
goto err;
/*
* On -EINTR/retry, we dropped locks - always restart from the slot we
* hashed to:
*/
bch2_btree_iter_copy(&iter, &hashed_slot);
k = bch2_hash_lookup_bkey_at(desc, info, &iter, bkey_i_to_s_c(insert));
ret = btree_iter_err(k);
if (ret == -ENOENT) {
if (flags & BCH_HASH_SET_MUST_REPLACE) {
ret = -ENOENT;
goto err;
}
/*
* Not found, so we're now looking for any open
* slot - we might have skipped over a whiteout
* that we could have used, so restart from the
* slot we hashed to:
*/
bch2_btree_iter_copy(&iter, &hashed_slot);
k = bch2_hash_hole_at(desc, &iter);
if ((ret = btree_iter_err(k)))
goto err;
} else if (!ret) {
if (flags & BCH_HASH_SET_MUST_CREATE) {
ret = -EEXIST;
goto err;
}
} else {
goto err;
}
insert->k.p = iter.pos;
ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq,
BTREE_INSERT_ATOMIC|flags,
BTREE_INSERT_ENTRY(&iter, insert));
err:
if (ret == -EINTR)
goto retry;
/*
* On successful insert, we don't want to clobber ret with error from
* iter:
*/
bch2_btree_iter_unlock(&iter);
bch2_btree_iter_unlock(&hashed_slot);
return ret;
}
static inline int bch2_hash_delete_at(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
struct btree_iter *iter,
u64 *journal_seq)
{
struct btree_iter whiteout_iter;
struct bkey_i delete;
int ret = -ENOENT;
bch2_btree_iter_init(&whiteout_iter, iter->c, desc.btree_id,
iter->pos, BTREE_ITER_SLOTS);
bch2_btree_iter_link(iter, &whiteout_iter);
ret = bch2_hash_needs_whiteout(desc, info, &whiteout_iter, iter);
ret = bch2_hash_needs_whiteout(trans, desc, info, iter);
if (ret < 0)
goto err;
return ret;
bkey_init(&delete.k);
delete.k.p = iter->pos;
delete.k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED;
delete = bch2_trans_kmalloc(trans, sizeof(*delete));
if (IS_ERR(delete))
return PTR_ERR(delete);
ret = bch2_btree_insert_at(iter->c, NULL, NULL, journal_seq,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_ATOMIC,
BTREE_INSERT_ENTRY(iter, &delete));
err:
bch2_btree_iter_unlink(&whiteout_iter);
return ret;
bkey_init(&delete->k);
delete->k.p = iter->pos;
delete->k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED;
bch2_trans_update(trans, iter, delete, 0);
return 0;
}
static inline int bch2_hash_delete(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
struct bch_fs *c, u64 inode,
u64 *journal_seq, const void *key)
static inline int bch2_hash_delete(struct btree_trans *trans,
const struct bch_hash_desc desc,
const struct bch_hash_info *info,
u64 inode, const void *key)
{
struct btree_iter iter, whiteout_iter;
struct bkey_s_c k;
int ret = -ENOENT;
struct btree_iter *iter;
bch2_btree_iter_init(&iter, c, desc.btree_id,
POS(inode, desc.hash_key(info, key)),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
bch2_btree_iter_init(&whiteout_iter, c, desc.btree_id,
POS(inode, desc.hash_key(info, key)),
BTREE_ITER_SLOTS);
bch2_btree_iter_link(&iter, &whiteout_iter);
retry:
k = bch2_hash_lookup_at(desc, info, &iter, key);
if ((ret = btree_iter_err(k)))
goto err;
iter = bch2_hash_lookup(trans, desc, info, inode, key,
BTREE_ITER_INTENT);
if (IS_ERR(iter))
return PTR_ERR(iter);
ret = bch2_hash_delete_at(desc, info, &iter, journal_seq);
err:
if (ret == -EINTR)
goto retry;
bch2_btree_iter_unlock(&whiteout_iter);
bch2_btree_iter_unlock(&iter);
return ret;
return bch2_hash_delete_at(trans, desc, info, iter);
}
#endif /* _BCACHEFS_STR_HASH_H */

View File

@ -52,7 +52,7 @@ static int __bch2_strtoh(const char *cp, u64 *res,
cp++;
} while (isdigit(*cp));
for (u = 1; u < ARRAY_SIZE(si_units); u++)
for (u = 1; u < strlen(si_units); u++)
if (*cp == si_units[u]) {
cp++;
goto got_unit;

View File

@ -74,7 +74,6 @@ const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
const struct xattr_handler *handler;
struct bkey_s_c_xattr xattr;
unsigned u64s;
switch (k.k->type) {
case BCH_XATTR:
@ -82,13 +81,15 @@ const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k)
return "value too small";
xattr = bkey_s_c_to_xattr(k);
u64s = xattr_val_u64s(xattr.v->x_name_len,
le16_to_cpu(xattr.v->x_val_len));
if (bkey_val_u64s(k.k) < u64s)
if (bkey_val_u64s(k.k) <
xattr_val_u64s(xattr.v->x_name_len,
le16_to_cpu(xattr.v->x_val_len)))
return "value too small";
if (bkey_val_u64s(k.k) > u64s)
if (bkey_val_u64s(k.k) >
xattr_val_u64s(xattr.v->x_name_len,
le16_to_cpu(xattr.v->x_val_len) + 4))
return "value too big";
handler = bch2_xattr_type_to_handler(xattr.v->x_type);
@ -142,32 +143,28 @@ void bch2_xattr_to_text(struct bch_fs *c, char *buf,
}
}
struct bkey_s_c bch2_xattr_get_iter(struct bch_fs *c,
struct btree_iter *iter,
struct bch_inode_info *inode,
const char *name, int type)
{
return bch2_hash_lookup(bch2_xattr_hash_desc,
&inode->ei_str_hash,
c, inode->v.i_ino, iter,
&X_SEARCH(type, name, strlen(name)));
}
int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
const char *name, void *buffer, size_t size, int type)
const char *name, void *buffer, size_t size, int type)
{
struct btree_iter iter;
struct bkey_s_c k;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c_xattr xattr;
int ret;
k = bch2_hash_lookup(bch2_xattr_hash_desc, &inode->ei_str_hash, c,
inode->v.i_ino, &iter,
&X_SEARCH(type, name, strlen(name)));
if (IS_ERR(k.k))
return bch2_btree_iter_unlock(&iter) ?: -ENODATA;
bch2_trans_init(&trans, c);
xattr = bkey_s_c_to_xattr(k);
iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
&inode->ei_str_hash, inode->v.i_ino,
&X_SEARCH(type, name, strlen(name)),
0);
if (IS_ERR(iter)) {
bch2_trans_exit(&trans);
BUG_ON(PTR_ERR(iter) == -EINTR);
return PTR_ERR(iter) == -ENOENT ? -ENODATA : PTR_ERR(iter);
}
xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
ret = le16_to_cpu(xattr.v->x_val_len);
if (buffer) {
if (ret > size)
@ -176,47 +173,48 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
memcpy(buffer, xattr_val(xattr.v), ret);
}
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
return ret;
}
int bch2_xattr_set(struct bch_fs *c, u64 inum,
int bch2_xattr_set(struct btree_trans *trans, u64 inum,
const struct bch_hash_info *hash_info,
const char *name, const void *value, size_t size,
int flags, int type, u64 *journal_seq)
int type, int flags)
{
struct xattr_search_key search = X_SEARCH(type, name, strlen(name));
int ret;
if (value) {
struct bkey_i_xattr *xattr;
unsigned namelen = strlen(name);
unsigned u64s = BKEY_U64s +
xattr_val_u64s(search.name.len, size);
xattr_val_u64s(namelen, size);
if (u64s > U8_MAX)
return -ERANGE;
xattr = kmalloc(u64s * sizeof(u64), GFP_NOFS);
if (!xattr)
return -ENOMEM;
xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
if (IS_ERR(xattr))
return PTR_ERR(xattr);
bkey_xattr_init(&xattr->k_i);
xattr->k.u64s = u64s;
xattr->v.x_type = type;
xattr->v.x_name_len = search.name.len;
xattr->v.x_name_len = namelen;
xattr->v.x_val_len = cpu_to_le16(size);
memcpy(xattr->v.x_name, search.name.name, search.name.len);
memcpy(xattr->v.x_name, name, namelen);
memcpy(xattr_val(&xattr->v), value, size);
ret = bch2_hash_set(bch2_xattr_hash_desc, hash_info, c,
inum, journal_seq,
&xattr->k_i,
(flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
(flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
kfree(xattr);
ret = __bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
inum, &xattr->k_i,
(flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
(flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
} else {
ret = bch2_hash_delete(bch2_xattr_hash_desc, hash_info,
c, inum, journal_seq, &search);
struct xattr_search_key search =
X_SEARCH(type, name, strlen(name));
ret = bch2_hash_delete(trans, bch2_xattr_hash_desc,
hash_info, inum, &search);
}
if (ret == -ENOENT)
@ -308,9 +306,11 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler,
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
return bch2_xattr_set(c, inode->v.i_ino, &inode->ei_str_hash,
name, value, size, flags, handler->flags,
&inode->ei_journal_seq);
return bch2_trans_do(c, &inode->ei_journal_seq, BTREE_INSERT_ATOMIC,
bch2_xattr_set(&trans, inode->v.i_ino,
&inode->ei_str_hash,
name, value, size,
handler->flags, flags));
}
static const struct xattr_handler bch_xattr_user_handler = {
@ -433,7 +433,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
}
mutex_lock(&inode->ei_update_lock);
ret = __bch2_write_inode(c, inode, inode_opt_set_fn, &s);
ret = __bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0);
mutex_unlock(&inode->ei_update_lock);
if (value &&

View File

@ -35,15 +35,12 @@ struct xattr_handler;
struct bch_hash_info;
struct bch_inode_info;
struct bkey_s_c bch2_xattr_get_iter(struct bch_fs *,
struct btree_iter *,
struct bch_inode_info *,
const char *, int);
int bch2_xattr_get(struct bch_fs *, struct bch_inode_info *,
const char *, void *, size_t, int);
int bch2_xattr_set(struct bch_fs *, u64, const struct bch_hash_info *,
const char *, const void *, size_t, int, int, u64 *);
int bch2_xattr_set(struct btree_trans *, u64, const struct bch_hash_info *,
const char *, const void *, size_t, int, int);
ssize_t bch2_xattr_list(struct dentry *, char *, size_t);
extern const struct xattr_handler *bch2_xattr_handlers[];