Update bcachefs sources to f9c612bbf82d bcachefs: Fixes for building in userspace

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-09-23 18:42:30 -04:00
parent 731926b5e5
commit a053ebfb8c
114 changed files with 4092 additions and 3674 deletions

View File

@ -1 +1 @@
e7f62157681d96386dc500609149b9685358a2b0
f9c612bbf82da87d7d4a005310c5213db00e22de

View File

@ -32,13 +32,13 @@ try-run = $(shell set -e; \
# Usage: aflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
as-option = $(call try-run,\
$(CC) -Werror $(KBUILD_AFLAGS) $(1) -c -x assembler-with-cpp /dev/null -o "$$TMP",$(1),$(2))
$(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_AFLAGS) $(1) -c -x assembler-with-cpp /dev/null -o "$$TMP",$(1),$(2))
# as-instr
# Usage: aflags-y += $(call as-instr,instr,option1,option2)
as-instr = $(call try-run,\
printf "%b\n" "$(1)" | $(CC) -Werror $(KBUILD_AFLAGS) -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3))
printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3))
# __cc-option
# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586)
@ -72,7 +72,3 @@ clang-min-version = $(call test-ge, $(CONFIG_CLANG_VERSION), $1)
# ld-option
# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
# ld-ifversion
# Usage: $(call ld-ifversion, -ge, 22252, y)
ld-ifversion = $(shell [ $(CONFIG_LD_VERSION)0 $(1) $(2)0 ] && echo $(3) || echo $(4))

View File

@ -61,13 +61,11 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd,
for (i = 0; i < BTREE_ID_NR; i++) {
const struct bch_extent_ptr *ptr;
struct bkey_ptrs_c ptrs;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct btree *b;
bch2_trans_init(&trans, c, 0, 0);
__for_each_btree_node(&trans, iter, i, POS_MIN, 0, 1, 0, b, ret) {
__for_each_btree_node(trans, iter, i, POS_MIN, 0, 1, 0, b, ret) {
struct btree_node_iter iter;
struct bkey u;
struct bkey_s_c k;
@ -97,8 +95,8 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd,
btree_bytes(c));
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
}
qcow2_write_image(ca->disk_sb.bdev->bd_buffered_fd, fd, &data,

View File

@ -64,7 +64,7 @@ int cmd_kill_btree_node(int argc, char *argv[])
if (IS_ERR(c))
die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct btree *b;
int ret;
@ -74,9 +74,7 @@ int cmd_kill_btree_node(int argc, char *argv[])
if (ret)
die("error %s from posix_memalign", bch2_err_str(ret));
bch2_trans_init(&trans, c, 0, 0);
__for_each_btree_node(&trans, iter, btree_id, POS_MIN, 0, level, 0, b, ret) {
__for_each_btree_node(trans, iter, btree_id, POS_MIN, 0, level, 0, b, ret) {
if (b->c.level != level)
continue;
@ -113,8 +111,8 @@ int cmd_kill_btree_node(int argc, char *argv[])
bch_err(c, "node at specified index not found");
ret = EXIT_FAILURE;
done:
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
bch2_fs_stop(c);
return ret;

View File

@ -33,7 +33,7 @@
#include "libbcachefs/errcode.h"
#include "libbcachefs/fs-common.h"
#include "libbcachefs/inode.h"
#include "libbcachefs/io.h"
#include "libbcachefs/io_write.h"
#include "libbcachefs/replicas.h"
#include "libbcachefs/str_hash.h"
#include "libbcachefs/super.h"
@ -126,7 +126,7 @@ static void update_inode(struct bch_fs *c,
bch2_inode_pack(&packed, inode);
packed.inode.k.p.snapshot = U32_MAX;
ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
NULL, NULL, 0);
NULL, 0);
if (ret)
die("error updating inode: %s", bch2_err_str(ret));
}
@ -140,7 +140,7 @@ static void create_link(struct bch_fs *c,
struct bch_inode_unpacked inode;
int ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_link_trans(&trans,
bch2_link_trans(trans,
(subvol_inum) { 1, parent->bi_inum }, &parent_u,
(subvol_inum) { 1, inum }, &inode, &qstr));
if (ret)
@ -159,7 +159,7 @@ static struct bch_inode_unpacked create_file(struct bch_fs *c,
bch2_inode_init_early(c, &new_inode);
int ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_create_trans(&trans,
bch2_create_trans(trans,
(subvol_inum) { 1, parent->bi_inum }, parent,
&new_inode, &qstr,
uid, gid, mode, rdev, NULL, NULL,
@ -232,7 +232,7 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
struct bch_inode_unpacked inode_u;
int ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_xattr_set(&trans,
bch2_xattr_set(trans,
(subvol_inum) { 1, dst->bi_inum },
&inode_u, &hash_info, attr,
val, val_size, h->flags, 0));
@ -339,8 +339,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
die("error reserving space in new filesystem: %s",
bch2_err_str(ret));
ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i,
&res, NULL, 0);
ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i, &res, 0);
if (ret)
die("btree insert error %s", bch2_err_str(ret));

View File

@ -6,6 +6,8 @@
#include <linux/kobject.h>
#include <linux/types.h>
#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX)
#define BIO_MAX_VECS 256U
typedef unsigned fmode_t;
@ -21,30 +23,20 @@ struct user_namespace;
#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi))
/* file is open for reading */
#define FMODE_READ ((__force fmode_t)0x1)
/* file is open for writing */
#define FMODE_WRITE ((__force fmode_t)0x2)
/* file is seekable */
#define FMODE_LSEEK ((__force fmode_t)0x4)
/* file can be accessed using pread */
#define FMODE_PREAD ((__force fmode_t)0x8)
/* file can be accessed using pwrite */
#define FMODE_PWRITE ((__force fmode_t)0x10)
/* File is opened for execution with sys_execve / sys_uselib */
#define FMODE_EXEC ((__force fmode_t)0x20)
/* File is opened with O_NDELAY (only set for block devices) */
#define FMODE_NDELAY ((__force fmode_t)0x40)
/* File is opened with O_EXCL (only set for block devices) */
#define FMODE_EXCL ((__force fmode_t)0x80)
/* File is opened using open(.., 3, ..) and is writeable only for ioctls
(specialy hack for floppy.c) */
#define FMODE_WRITE_IOCTL ((__force fmode_t)0x100)
/* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH ((__force fmode_t)0x200)
/* 64bit hashes as llseek() offset (for directories) */
#define FMODE_64BITHASH ((__force fmode_t)0x400)
#define FMODE_BUFFERED ((__force fmode_t)0x800)
typedef unsigned int __bitwise blk_mode_t;
/* open for reading */
#define BLK_OPEN_READ ((__force blk_mode_t)(1 << 0))
/* open for writing */
#define BLK_OPEN_WRITE ((__force blk_mode_t)(1 << 1))
/* open exclusively (vs other exclusive openers */
#define BLK_OPEN_EXCL ((__force blk_mode_t)(1 << 2))
/* opened with O_NDELAY */
#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3))
/* open for "writes" only for ioctls (specialy hack for floppy.c) */
#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
#define BLK_OPEN_BUFFERED ((__force blk_mode_t)(1 << 5))
struct inode {
unsigned long i_ino;
@ -93,9 +85,14 @@ int blkdev_issue_zeroout(struct block_device *, sector_t, sector_t, gfp_t, unsig
unsigned bdev_logical_block_size(struct block_device *bdev);
sector_t get_capacity(struct gendisk *disk);
void blkdev_put(struct block_device *bdev, fmode_t mode);
struct blk_holder_ops {
void (*mark_dead)(struct block_device *bdev);
};
void blkdev_put(struct block_device *bdev, void *holder);
void bdput(struct block_device *bdev);
struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder);
struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
void *holder, const struct blk_holder_ops *hop);
int lookup_bdev(const char *path, dev_t *);
struct super_block {

View File

@ -65,6 +65,7 @@
#define unreachable() __builtin_unreachable()
#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
#define fallthrough __attribute__((__fallthrough__))
#define __noreturn __attribute__((__noreturn__))
#define ___PASTE(a,b) a##b
#define __PASTE(a,b) ___PASTE(a,b)

View File

@ -12,7 +12,7 @@
#define rcu_access_pointer(p) READ_ONCE(p)
#define kfree_rcu(ptr, rcu_head) kfree(ptr) /* XXX */
#define kvfree_rcu(ptr) kfree(ptr) /* XXX */
#define kvfree_rcu_mightsleep(ptr) kfree(ptr) /* XXX */
#define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v)

View File

@ -1,18 +1,71 @@
// SPDX-License-Identifier: GPL-2.0
#ifdef CONFIG_BCACHEFS_POSIX_ACL
#include "bcachefs.h"
#include <linux/fs.h>
#include "acl.h"
#include "xattr.h"
#include <linux/posix_acl.h>
static const char * const acl_types[] = {
[ACL_USER_OBJ] = "user_obj",
[ACL_USER] = "user",
[ACL_GROUP_OBJ] = "group_obj",
[ACL_GROUP] = "group",
[ACL_MASK] = "mask",
[ACL_OTHER] = "other",
NULL,
};
void bch2_acl_to_text(struct printbuf *out, const void *value, size_t size)
{
const void *p, *end = value + size;
if (!value ||
size < sizeof(bch_acl_header) ||
((bch_acl_header *)value)->a_version != cpu_to_le32(BCH_ACL_VERSION))
return;
p = value + sizeof(bch_acl_header);
while (p < end) {
const bch_acl_entry *in = p;
unsigned tag = le16_to_cpu(in->e_tag);
prt_str(out, acl_types[tag]);
switch (tag) {
case ACL_USER_OBJ:
case ACL_GROUP_OBJ:
case ACL_MASK:
case ACL_OTHER:
p += sizeof(bch_acl_entry_short);
break;
case ACL_USER:
prt_printf(out, " uid %u", le32_to_cpu(in->e_id));
p += sizeof(bch_acl_entry);
break;
case ACL_GROUP:
prt_printf(out, " gid %u", le32_to_cpu(in->e_id));
p += sizeof(bch_acl_entry);
break;
}
prt_printf(out, " %o", le16_to_cpu(in->e_perm));
if (p != end)
prt_char(out, ' ');
}
}
#ifdef CONFIG_BCACHEFS_POSIX_ACL
#include "fs.h"
#include <linux/fs.h>
#include <linux/posix_acl_xattr.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include "acl.h"
#include "fs.h"
#include "xattr.h"
static inline size_t bch2_acl_size(unsigned nr_short, unsigned nr_long)
{
return sizeof(bch_acl_header) +
@ -226,18 +279,16 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct bkey_s_c_xattr xattr;
struct posix_acl *acl = NULL;
struct bkey_s_c k;
int ret;
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_hash_lookup(&trans, &iter, bch2_xattr_hash_desc,
ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
&hash, inode_inum(inode), &search, 0);
if (ret) {
if (!bch2_err_matches(ret, ENOENT))
@ -253,7 +304,7 @@ retry:
}
xattr = bkey_s_c_to_xattr(k);
acl = bch2_acl_from_disk(&trans, xattr_val(xattr.v),
acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
le16_to_cpu(xattr.v->x_val_len));
if (!IS_ERR(acl))
@ -262,8 +313,8 @@ out:
if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart))
goto retry;
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
return acl;
}
@ -303,7 +354,7 @@ int bch2_set_acl(struct mnt_idmap *idmap,
{
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter inode_iter = { NULL };
struct bch_inode_unpacked inode_u;
struct posix_acl *acl;
@ -311,12 +362,11 @@ int bch2_set_acl(struct mnt_idmap *idmap,
int ret;
mutex_lock(&inode->ei_update_lock);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
acl = _acl;
ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode),
ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
BTREE_ITER_INTENT);
if (ret)
goto btree_err;
@ -329,30 +379,30 @@ retry:
goto btree_err;
}
ret = bch2_set_acl_trans(&trans, inode_inum(inode), &inode_u, acl, type);
ret = bch2_set_acl_trans(trans, inode_inum(inode), &inode_u, acl, type);
if (ret)
goto btree_err;
inode_u.bi_ctime = bch2_current_time(c);
inode_u.bi_mode = mode;
ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL, NULL, 0);
ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(trans, NULL, NULL, 0);
btree_err:
bch2_trans_iter_exit(&trans, &inode_iter);
bch2_trans_iter_exit(trans, &inode_iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (unlikely(ret))
goto err;
bch2_inode_update_after_write(&trans, inode, &inode_u,
bch2_inode_update_after_write(trans, inode, &inode_u,
ATTR_CTIME|ATTR_MODE);
set_cached_acl(&inode->v, type, acl);
err:
bch2_trans_exit(&trans);
mutex_unlock(&inode->ei_update_lock);
bch2_trans_put(trans);
return ret;
}
@ -367,7 +417,7 @@ int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
struct btree_iter iter;
struct bkey_s_c_xattr xattr;
struct bkey_i_xattr *new;
struct posix_acl *acl;
struct posix_acl *acl = NULL;
struct bkey_s_c k;
int ret;
@ -377,9 +427,10 @@ int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
return bch2_err_matches(ret, ENOENT) ? 0 : ret;
k = bch2_btree_iter_peek_slot(&iter);
xattr = bkey_s_c_to_xattr(k);
ret = bkey_err(k);
if (ret)
goto err;
xattr = bkey_s_c_to_xattr(k);
acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
le16_to_cpu(xattr.v->x_val_len));

View File

@ -7,8 +7,6 @@ struct bch_hash_info;
struct bch_inode_info;
struct posix_acl;
#ifdef CONFIG_BCACHEFS_POSIX_ACL
#define BCH_ACL_VERSION 0x0001
typedef struct {
@ -26,6 +24,10 @@ typedef struct {
__le32 a_version;
} bch_acl_header;
void bch2_acl_to_text(struct printbuf *, const void *, size_t);
#ifdef CONFIG_BCACHEFS_POSIX_ACL
struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int);
int bch2_set_acl_trans(struct btree_trans *, subvol_inum,

View File

@ -237,13 +237,12 @@ int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
}
int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags,
struct printbuf *err)
enum bkey_invalid_flags flags, struct printbuf *err)
{
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
if (alloc_v4_u64s(a.v) > bkey_val_u64s(k.k)) {
prt_printf(err, "bad val size (%u > %lu)",
prt_printf(err, "bad val size (%u > %zu)",
alloc_v4_u64s(a.v), bkey_val_u64s(k.k));
return -BCH_ERR_invalid_bkey;
}
@ -527,7 +526,7 @@ int bch2_bucket_gens_invalid(const struct bch_fs *c, struct bkey_s_c k,
struct printbuf *err)
{
if (bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens)) {
prt_printf(err, "bad val size (%lu != %zu)",
prt_printf(err, "bad val size (%zu != %zu)",
bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens));
return -BCH_ERR_invalid_bkey;
}
@ -549,7 +548,7 @@ void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bke
int bch2_bucket_gens_init(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_alloc_v4 a;
@ -560,9 +559,7 @@ int bch2_bucket_gens_init(struct bch_fs *c)
u8 gen;
int ret;
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
/*
* Not a fsck error because this is checked/repaired by
@ -575,10 +572,10 @@ int bch2_bucket_gens_init(struct bch_fs *c)
pos = alloc_gens_pos(iter.pos, &offset);
if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
ret = commit_do(&trans, NULL, NULL,
ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
__bch2_btree_insert(&trans, BTREE_ID_bucket_gens, &g.k_i, 0));
bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
if (ret)
break;
have_bucket_gens_key = false;
@ -592,15 +589,15 @@ int bch2_bucket_gens_init(struct bch_fs *c)
g.v.gens[offset] = gen;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
if (have_bucket_gens_key && !ret)
ret = commit_do(&trans, NULL, NULL,
ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
__bch2_btree_insert(&trans, BTREE_ID_bucket_gens, &g.k_i, 0));
bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
@ -609,20 +606,19 @@ int bch2_bucket_gens_init(struct bch_fs *c)
int bch2_alloc_read(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_dev *ca;
int ret;
down_read(&c->gc_lock);
bch2_trans_init(&trans, c, 0, 0);
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) {
const struct bch_bucket_gens *g;
u64 b;
for_each_btree_key(&trans, iter, BTREE_ID_bucket_gens, POS_MIN,
for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
@ -646,11 +642,11 @@ int bch2_alloc_read(struct bch_fs *c)
b++)
*bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK];
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
} else {
struct bch_alloc_v4 a;
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
/*
* Not a fsck error because this is checked/repaired by
@ -663,10 +659,10 @@ int bch2_alloc_read(struct bch_fs *c)
*bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
up_read(&c->gc_lock);
if (ret)
@ -1201,15 +1197,15 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
}
if (need_update) {
struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(g));
struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
ret = PTR_ERR_OR_ZERO(k);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto err;
memcpy(k, &g, sizeof(g));
memcpy(u, &g, sizeof(g));
ret = bch2_trans_update(trans, bucket_gens_iter, k, 0);
ret = bch2_trans_update(trans, bucket_gens_iter, u, 0);
if (ret)
goto err;
}
@ -1286,7 +1282,7 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
if (!btree_id_is_extents(iter->btree_id)) {
return __bch2_check_discard_freespace_key(trans, iter);
} else {
int ret;
int ret = 0;
while (!bkey_eq(iter->pos, end) &&
!(ret = btree_trans_too_many_iters(trans) ?:
@ -1355,15 +1351,14 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
}
if (need_update) {
struct bkey_i *k;
struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
k = bch2_trans_kmalloc(trans, sizeof(g));
ret = PTR_ERR_OR_ZERO(k);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto out;
memcpy(k, &g, sizeof(g));
ret = bch2_trans_update(trans, iter, k, 0);
memcpy(u, &g, sizeof(g));
ret = bch2_trans_update(trans, iter, u, 0);
}
out:
fsck_err:
@ -1373,27 +1368,25 @@ fsck_err:
int bch2_check_alloc_info(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter;
struct bkey hole;
struct bkey_s_c k;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN,
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH);
bch2_trans_iter_init(&trans, &discard_iter, BTREE_ID_need_discard, POS_MIN,
bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN,
BTREE_ITER_PREFETCH);
bch2_trans_iter_init(&trans, &freespace_iter, BTREE_ID_freespace, POS_MIN,
bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, POS_MIN,
BTREE_ITER_PREFETCH);
bch2_trans_iter_init(&trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN,
bch2_trans_iter_init(trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN,
BTREE_ITER_PREFETCH);
while (1) {
struct bpos next;
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
k = bch2_get_key_or_real_bucket_hole(&iter, &hole);
ret = bkey_err(k);
@ -1406,7 +1399,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
if (k.k->type) {
next = bpos_nosnap_successor(k.k->p);
ret = bch2_check_alloc_key(&trans,
ret = bch2_check_alloc_key(trans,
k, &iter,
&discard_iter,
&freespace_iter,
@ -1416,11 +1409,11 @@ int bch2_check_alloc_info(struct bch_fs *c)
} else {
next = k.k->p;
ret = bch2_check_alloc_hole_freespace(&trans,
ret = bch2_check_alloc_hole_freespace(trans,
bkey_start_pos(k.k),
&next,
&freespace_iter) ?:
bch2_check_alloc_hole_bucket_gens(&trans,
bch2_check_alloc_hole_bucket_gens(trans,
bkey_start_pos(k.k),
&next,
&bucket_gens_iter);
@ -1428,7 +1421,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
goto bkey_err;
}
ret = bch2_trans_commit(&trans, NULL, NULL,
ret = bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
if (ret)
@ -1441,29 +1434,29 @@ bkey_err:
if (ret)
break;
}
bch2_trans_iter_exit(&trans, &bucket_gens_iter);
bch2_trans_iter_exit(&trans, &freespace_iter);
bch2_trans_iter_exit(&trans, &discard_iter);
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &bucket_gens_iter);
bch2_trans_iter_exit(trans, &freespace_iter);
bch2_trans_iter_exit(trans, &discard_iter);
bch2_trans_iter_exit(trans, &iter);
if (ret < 0)
goto err;
ret = for_each_btree_key2(&trans, iter,
ret = for_each_btree_key2(trans, iter,
BTREE_ID_need_discard, POS_MIN,
BTREE_ITER_PREFETCH, k,
bch2_check_discard_freespace_key(&trans, &iter, k.k->p)) ?:
for_each_btree_key2(&trans, iter,
bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?:
for_each_btree_key2(trans, iter,
BTREE_ID_freespace, POS_MIN,
BTREE_ITER_PREFETCH, k,
bch2_check_discard_freespace_key(&trans, &iter, k.k->p)) ?:
for_each_btree_key_commit(&trans, iter,
bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?:
for_each_btree_key_commit(trans, iter,
BTREE_ID_bucket_gens, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
bch2_check_bucket_gens_key(&trans, &iter, k));
bch2_check_bucket_gens_key(trans, &iter, k));
err:
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
return ret;
@ -1549,10 +1542,10 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
int ret = 0;
ret = bch2_trans_run(c,
for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
bch2_check_alloc_to_lru_ref(&trans, &iter)));
bch2_check_alloc_to_lru_ref(trans, &iter)));
if (ret)
bch_err_fn(c, ret);
return ret;
@ -1677,29 +1670,25 @@ out:
static void bch2_do_discards_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0;
struct bpos discard_pos_done = POS_MAX;
int ret;
bch2_trans_init(&trans, c, 0, 0);
/*
* We're doing the commit in bch2_discard_one_bucket instead of using
* for_each_btree_key_commit() so that we can increment counters after
* successful commit:
*/
ret = for_each_btree_key2(&trans, iter,
BTREE_ID_need_discard, POS_MIN, 0, k,
bch2_discard_one_bucket(&trans, &iter, &discard_pos_done,
&seen,
&open,
&need_journal_commit,
&discarded));
bch2_trans_exit(&trans);
ret = bch2_trans_run(c,
for_each_btree_key2(trans, iter,
BTREE_ID_need_discard, POS_MIN, 0, k,
bch2_discard_one_bucket(trans, &iter, &discard_pos_done,
&seen,
&open,
&need_journal_commit,
&discarded)));
if (need_journal_commit * 2 > seen)
bch2_journal_flush_async(&c->journal, NULL);
@ -1805,15 +1794,13 @@ static void bch2_do_invalidates_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
struct bch_dev *ca;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
ret = bch2_btree_write_buffer_flush(&trans);
ret = bch2_btree_write_buffer_flush(trans);
if (ret)
goto err;
@ -1821,11 +1808,11 @@ static void bch2_do_invalidates_work(struct work_struct *work)
s64 nr_to_invalidate =
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_lru,
ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru,
lru_pos(ca->dev_idx, 0, 0),
lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
BTREE_ITER_INTENT, k,
invalidate_one_bucket(&trans, &iter, k, &nr_to_invalidate));
invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate));
if (ret < 0) {
percpu_ref_put(&ca->ref);
@ -1833,7 +1820,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
}
}
err:
bch2_trans_exit(&trans);
bch2_trans_put(trans);
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
}
@ -1847,7 +1834,7 @@ void bch2_do_invalidates(struct bch_fs *c)
static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
unsigned long *last_updated)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bkey hole;
@ -1855,9 +1842,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
struct bch_member *m;
int ret;
bch2_trans_init(&trans, c, 0, 0);
bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc,
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
POS(ca->dev_idx, ca->mi.first_bucket),
BTREE_ITER_PREFETCH);
/*
@ -1871,7 +1856,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
*last_updated = jiffies;
}
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
if (bkey_ge(iter.pos, end)) {
ret = 0;
@ -1891,8 +1876,8 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
ret = bch2_bucket_do_index(&trans, k, a, true) ?:
bch2_trans_commit(&trans, NULL, NULL,
ret = bch2_bucket_do_index(trans, k, a, true) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL);
if (ret)
@ -1902,7 +1887,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
} else {
struct bkey_i *freespace;
freespace = bch2_trans_kmalloc(&trans, sizeof(*freespace));
freespace = bch2_trans_kmalloc(trans, sizeof(*freespace));
ret = PTR_ERR_OR_ZERO(freespace);
if (ret)
goto bkey_err;
@ -1912,8 +1897,8 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
freespace->k.p = k.k->p;
freespace->k.size = k.k->size;
ret = __bch2_btree_insert(&trans, BTREE_ID_freespace, freespace, 0) ?:
bch2_trans_commit(&trans, NULL, NULL,
ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL);
if (ret)
@ -1928,11 +1913,11 @@ bkey_err:
break;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
if (ret < 0) {
bch_err(ca, "error initializing free space: %s", bch2_err_str(ret));
bch_err_msg(ca, ret, "initializing free space");
return ret;
}

View File

@ -25,7 +25,7 @@
#include "disk_groups.h"
#include "ec.h"
#include "error.h"
#include "io.h"
#include "io_write.h"
#include "journal.h"
#include "movinggc.h"
#include "nocow_locking.h"
@ -502,9 +502,14 @@ again:
}
/**
* bch_bucket_alloc - allocate a single bucket from a specific device
* bch2_bucket_alloc_trans - allocate a single bucket from a specific device
* @trans: transaction object
* @ca: device to allocate from
* @watermark: how important is this allocation?
* @cl: if not NULL, closure to be used to wait if buckets not available
* @usage: for secondarily also returning the current device usage
*
* Returns index of bucket on success, 0 on failure
* Returns: an open_bucket on success, or an ERR_PTR() on failure.
*/
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
struct bch_dev *ca,
@ -597,7 +602,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
struct open_bucket *ob;
bch2_trans_do(c, NULL, NULL, 0,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, watermark,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark,
cl, &usage)));
return ob;
}
@ -775,7 +780,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
struct dev_alloc_list devs_sorted;
struct ec_stripe_head *h;
struct open_bucket *ob;
struct bch_dev *ca;
unsigned i, ec_idx;
int ret = 0;
@ -805,8 +809,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
}
goto out_put_head;
got_bucket:
ca = bch_dev_bkey_exists(c, ob->dev);
ob->ec_idx = ec_idx;
ob->ec = h->s;
ec_stripe_new_get(h->s, STRIPE_REF_io);
@ -1032,10 +1034,13 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
/**
* should_drop_bucket - check if this is open_bucket should go away
* @ob: open_bucket to predicate on
* @c: filesystem handle
* @ca: if set, we're killing buckets for a particular device
* @ec: if true, we're shutting down erasure coding and killing all ec
* open_buckets
* otherwise, return true
* Returns: true if we should kill this open_bucket
*
* We're killing open_buckets because we're shutting down a device, erasure
* coding, or the entire filesystem - check if this open_bucket matches:

View File

@ -351,20 +351,17 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_
{
struct bch_fs *c = trans->c;
struct btree_iter alloc_iter = { NULL };
struct bch_dev *ca;
struct bkey_s_c alloc_k;
struct printbuf buf = PRINTBUF;
int ret = 0;
if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
"backpointer for mising device:\n%s",
"backpointer for missing device:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = bch2_btree_delete_at(trans, bp_iter, 0);
goto out;
}
ca = bch_dev_bkey_exists(c, k.k->p.inode);
alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
bp_pos_to_bucket(c, k.k->p), 0);
ret = bkey_err(alloc_k);
@ -393,10 +390,10 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
int ret;
ret = bch2_trans_run(c,
for_each_btree_key_commit(&trans, iter,
for_each_btree_key_commit(trans, iter,
BTREE_ID_backpointers, POS_MIN, 0, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
bch2_check_btree_backpointer(&trans, &iter, k)));
bch2_check_btree_backpointer(trans, &iter, k)));
if (ret)
bch_err_fn(c, ret);
return ret;
@ -629,7 +626,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_iter iter;
enum btree_id btree_id;
struct bpos_level last_flushed = { UINT_MAX };
struct bpos_level last_flushed = { UINT_MAX, POS_MIN };
int ret = 0;
for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) {
@ -706,7 +703,7 @@ static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
--btree_nodes;
if (!btree_nodes) {
*end = alloc_k.k->p;
*end = alloc_k.k ? alloc_k.k->p : SPOS_MAX;
break;
}
@ -726,13 +723,12 @@ static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
int bch2_check_extents_to_backpointers(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct bpos start = POS_MIN, end;
int ret;
bch2_trans_init(&trans, c, 0, 0);
while (1) {
ret = bch2_get_alloc_in_memory_pos(&trans, start, &end);
ret = bch2_get_alloc_in_memory_pos(trans, start, &end);
if (ret)
break;
@ -752,13 +748,13 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
printbuf_exit(&buf);
}
ret = bch2_check_extents_to_backpointers_pass(&trans, start, end);
ret = bch2_check_extents_to_backpointers_pass(trans, start, end);
if (ret || bpos_eq(end, SPOS_MAX))
break;
start = bpos_successor(end);
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
@ -827,13 +823,12 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
int bch2_check_backpointers_to_extents(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
int ret;
bch2_trans_init(&trans, c, 0, 0);
while (1) {
ret = bch2_get_btree_in_memory_pos(&trans,
ret = bch2_get_btree_in_memory_pos(trans,
(1U << BTREE_ID_extents)|
(1U << BTREE_ID_reflink),
~0,
@ -859,13 +854,13 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
printbuf_exit(&buf);
}
ret = bch2_check_backpointers_to_extents_pass(&trans, start, end);
ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
if (ret || !bbpos_cmp(end, BBPOS_MAX))
break;
start = bbpos_successor(end);
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);

View File

@ -454,6 +454,7 @@ enum gc_phase {
GC_PHASE_BTREE_bucket_gens,
GC_PHASE_BTREE_snapshot_trees,
GC_PHASE_BTREE_deleted_inodes,
GC_PHASE_BTREE_logged_ops,
GC_PHASE_PENDING_DELETE,
};
@ -626,8 +627,8 @@ struct journal_keys {
size_t size;
};
struct btree_path_buf {
struct btree_path *path;
struct btree_trans_buf {
struct btree_trans *trans;
};
#define REPLICAS_DELTA_LIST_MAX (1U << 16)
@ -786,9 +787,9 @@ struct bch_fs {
/* btree_iter.c: */
struct seqmutex btree_trans_lock;
struct list_head btree_trans_list;
mempool_t btree_paths_pool;
mempool_t btree_trans_pool;
mempool_t btree_trans_mem_pool;
struct btree_path_buf __percpu *btree_paths_bufs;
struct btree_trans_buf __percpu *btree_trans_bufs;
struct srcu_struct btree_trans_barrier;
bool btree_trans_barrier_initialized;

View File

@ -83,8 +83,8 @@ typedef uuid_t __uuid_t;
#endif
#define BITMASK(name, type, field, offset, end) \
static const unsigned name##_OFFSET = offset; \
static const unsigned name##_BITS = (end - offset); \
static const __maybe_unused unsigned name##_OFFSET = offset; \
static const __maybe_unused unsigned name##_BITS = (end - offset); \
\
static inline __u64 name(const type *k) \
{ \
@ -98,9 +98,9 @@ static inline void SET_##name(type *k, __u64 v) \
}
#define LE_BITMASK(_bits, name, type, field, offset, end) \
static const unsigned name##_OFFSET = offset; \
static const unsigned name##_BITS = (end - offset); \
static const __u##_bits name##_MAX = (1ULL << (end - offset)) - 1; \
static const __maybe_unused unsigned name##_OFFSET = offset; \
static const __maybe_unused unsigned name##_BITS = (end - offset); \
static const __maybe_unused __u##_bits name##_MAX = (1ULL << (end - offset)) - 1;\
\
static inline __u64 name(const type *k) \
{ \
@ -370,7 +370,9 @@ static inline void bkey_init(struct bkey *k)
x(backpointer, 28) \
x(inode_v3, 29) \
x(bucket_gens, 30) \
x(snapshot_tree, 31)
x(snapshot_tree, 31) \
x(logged_op_truncate, 32) \
x(logged_op_finsert, 33)
enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr,
@ -723,7 +725,7 @@ struct bch_inode {
__le64 bi_hash_seed;
__le32 bi_flags;
__le16 bi_mode;
__u8 fields[0];
__u8 fields[];
} __packed __aligned(8);
struct bch_inode_v2 {
@ -733,7 +735,7 @@ struct bch_inode_v2 {
__le64 bi_hash_seed;
__le64 bi_flags;
__le16 bi_mode;
__u8 fields[0];
__u8 fields[];
} __packed __aligned(8);
struct bch_inode_v3 {
@ -745,7 +747,7 @@ struct bch_inode_v3 {
__le64 bi_sectors;
__le64 bi_size;
__le64 bi_version;
__u8 fields[0];
__u8 fields[];
} __packed __aligned(8);
#define INODEv3_FIELDS_START_INITIAL 6
@ -847,8 +849,8 @@ enum {
__BCH_INODE_NODUMP = 3,
__BCH_INODE_NOATIME = 4,
__BCH_INODE_I_SIZE_DIRTY = 5,
__BCH_INODE_I_SECTORS_DIRTY = 6,
__BCH_INODE_I_SIZE_DIRTY = 5, /* obsolete */
__BCH_INODE_I_SECTORS_DIRTY = 6, /* obsolete */
__BCH_INODE_UNLINKED = 7,
__BCH_INODE_BACKPTR_UNTRUSTED = 8,
@ -1097,20 +1099,20 @@ struct bch_reflink_v {
struct bch_val v;
__le64 refcount;
union bch_extent_entry start[0];
__u64 _data[0];
__u64 _data[];
} __packed __aligned(8);
struct bch_indirect_inline_data {
struct bch_val v;
__le64 refcount;
u8 data[0];
u8 data[];
};
/* Inline data */
struct bch_inline_data {
struct bch_val v;
u8 data[0];
u8 data[];
};
/* Subvolumes: */
@ -1183,6 +1185,33 @@ struct bch_lru {
#define LRU_ID_STRIPES (1U << 16)
/* Logged operations btree: */
struct bch_logged_op_truncate {
struct bch_val v;
__le32 subvol;
__le32 pad;
__le64 inum;
__le64 new_i_size;
};
enum logged_op_finsert_state {
LOGGED_OP_FINSERT_start,
LOGGED_OP_FINSERT_shift_extents,
LOGGED_OP_FINSERT_finish,
};
struct bch_logged_op_finsert {
struct bch_val v;
__u8 state;
__u8 pad[3];
__le32 subvol;
__le64 inum;
__le64 dst_offset;
__le64 src_offset;
__le64 pos;
};
/* Optional/variable size superblock sections: */
struct bch_sb_field {
@ -1223,7 +1252,7 @@ enum bch_sb_field_type {
struct bch_sb_field_journal {
struct bch_sb_field field;
__le64 buckets[0];
__le64 buckets[];
};
struct bch_sb_field_journal_v2 {
@ -1232,7 +1261,7 @@ struct bch_sb_field_journal_v2 {
struct bch_sb_field_journal_v2_entry {
__le64 start;
__le64 nr;
} d[0];
} d[];
};
/* BCH_SB_FIELD_members: */
@ -1279,7 +1308,7 @@ enum bch_member_state {
struct bch_sb_field_members {
struct bch_sb_field field;
struct bch_member members[0];
struct bch_member members[];
};
/* BCH_SB_FIELD_crypt: */
@ -1377,19 +1406,19 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
struct bch_replicas_entry_v0 {
__u8 data_type;
__u8 nr_devs;
__u8 devs[0];
__u8 devs[];
} __packed;
struct bch_sb_field_replicas_v0 {
struct bch_sb_field field;
struct bch_replicas_entry_v0 entries[0];
struct bch_replicas_entry_v0 entries[];
} __packed __aligned(8);
struct bch_replicas_entry {
__u8 data_type;
__u8 nr_devs;
__u8 nr_required;
__u8 devs[0];
__u8 devs[];
} __packed;
#define replicas_entry_bytes(_i) \
@ -1397,7 +1426,7 @@ struct bch_replicas_entry {
struct bch_sb_field_replicas {
struct bch_sb_field field;
struct bch_replicas_entry entries[0];
struct bch_replicas_entry entries[];
} __packed __aligned(8);
/* BCH_SB_FIELD_quota: */
@ -1432,7 +1461,7 @@ LE64_BITMASK(BCH_GROUP_PARENT, struct bch_disk_group, flags[0], 6, 24)
struct bch_sb_field_disk_groups {
struct bch_sb_field field;
struct bch_disk_group entries[0];
struct bch_disk_group entries[];
} __packed __aligned(8);
/* BCH_SB_FIELD_counters */
@ -1525,7 +1554,7 @@ enum bch_persistent_counters {
struct bch_sb_field_counters {
struct bch_sb_field field;
__le64 d[0];
__le64 d[];
};
/*
@ -1539,10 +1568,8 @@ struct jset_entry {
__u8 type; /* designates what this jset holds */
__u8 pad[3];
union {
struct bkey_i start[0];
__u64 _data[0];
};
struct bkey_i start[0];
__u64 _data[];
};
struct bch_sb_field_clean {
@ -1553,10 +1580,8 @@ struct bch_sb_field_clean {
__le16 _write_clock;
__le64 journal_seq;
union {
struct jset_entry start[0];
__u64 _data[0];
};
struct jset_entry start[0];
__u64 _data[];
};
struct journal_seq_blacklist_entry {
@ -1567,10 +1592,8 @@ struct journal_seq_blacklist_entry {
struct bch_sb_field_journal_seq_blacklist {
struct bch_sb_field field;
union {
struct journal_seq_blacklist_entry start[0];
__u64 _data[0];
};
struct journal_seq_blacklist_entry start[0];
__u64 _data[];
};
/* Superblock: */
@ -1645,7 +1668,8 @@ enum bcachefs_metadata_version {
bcachefs_metadata_version_max
};
static const unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_major_minor;
static const __maybe_unused
unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_major_minor;
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
@ -1706,10 +1730,8 @@ struct bch_sb {
struct bch_sb_layout layout;
union {
struct bch_sb_field start[0];
__le64 _data[0];
};
struct bch_sb_field start[0];
__le64 _data[];
} __packed __aligned(8);
/*
@ -1954,7 +1976,7 @@ enum bch_csum_type {
BCH_CSUM_NR
};
static const unsigned bch_crc_bytes[] = {
static const __maybe_unused unsigned bch_crc_bytes[] = {
[BCH_CSUM_none] = 0,
[BCH_CSUM_crc32c_nonzero] = 4,
[BCH_CSUM_crc32c] = 4,
@ -2186,10 +2208,8 @@ struct jset {
__le64 last_seq;
union {
struct jset_entry start[0];
__u64 _data[0];
};
struct jset_entry start[0];
__u64 _data[];
} __packed __aligned(8);
LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
@ -2259,7 +2279,10 @@ enum btree_id_flags {
x(snapshot_trees, 15, 0, \
BIT_ULL(KEY_TYPE_snapshot_tree)) \
x(deleted_inodes, 16, BTREE_ID_SNAPSHOTS, \
BIT_ULL(KEY_TYPE_set))
BIT_ULL(KEY_TYPE_set)) \
x(logged_ops, 17, 0, \
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
BIT_ULL(KEY_TYPE_logged_op_finsert))
enum btree_id {
#define x(name, nr, ...) BTREE_ID_##name = nr,
@ -2294,10 +2317,8 @@ struct bset {
__le16 version;
__le16 u64s; /* count of d[] in u64s */
union {
struct bkey_packed start[0];
__u64 _data[0];
};
struct bkey_packed start[0];
__u64 _data[];
} __packed __aligned(8);
LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4);

View File

@ -127,7 +127,7 @@ static void pack_state_finish(struct pack_state *state,
struct bkey_packed *k)
{
EBUG_ON(state->p < k->_data);
EBUG_ON(state->p >= k->_data + state->format->key_u64s);
EBUG_ON(state->p >= (u64 *) k->_data + state->format->key_u64s);
*state->p = state->w;
}
@ -308,9 +308,14 @@ struct bpos __bkey_unpack_pos(const struct bkey_format *format,
/**
* bch2_bkey_pack_key -- pack just the key, not the value
* @out: packed result
* @in: key to pack
* @format: format of packed result
*
* Returns: true on success, false on failure
*/
bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
const struct bkey_format *format)
const struct bkey_format *format)
{
struct pack_state state = pack_state_init(format, out);
u64 *w = out->_data;
@ -336,9 +341,12 @@ bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
/**
* bch2_bkey_unpack -- unpack the key and the value
* @b: btree node of @src key (for packed format)
* @dst: unpacked result
* @src: packed input
*/
void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst,
const struct bkey_packed *src)
const struct bkey_packed *src)
{
__bkey_unpack_key(b, &dst->k, src);
@ -349,19 +357,24 @@ void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst,
/**
* bch2_bkey_pack -- pack the key and the value
* @dst: packed result
* @src: unpacked input
* @format: format of packed result
*
* Returns: true on success, false on failure
*/
bool bch2_bkey_pack(struct bkey_packed *out, const struct bkey_i *in,
const struct bkey_format *format)
bool bch2_bkey_pack(struct bkey_packed *dst, const struct bkey_i *src,
const struct bkey_format *format)
{
struct bkey_packed tmp;
if (!bch2_bkey_pack_key(&tmp, &in->k, format))
if (!bch2_bkey_pack_key(&tmp, &src->k, format))
return false;
memmove_u64s((u64 *) out + format->key_u64s,
&in->v,
bkey_val_u64s(&in->k));
memcpy_u64s_small(out, &tmp, format->key_u64s);
memmove_u64s((u64 *) dst + format->key_u64s,
&src->v,
bkey_val_u64s(&src->k));
memcpy_u64s_small(dst, &tmp, format->key_u64s);
return true;
}

View File

@ -52,7 +52,7 @@ struct bkey_s {
static inline struct bkey_i *bkey_next(struct bkey_i *k)
{
return (struct bkey_i *) (k->_data + k->k.u64s);
return (struct bkey_i *) ((u64 *) k->_data + k->k.u64s);
}
#define bkey_val_u64s(_k) ((_k)->u64s - BKEY_U64s)
@ -397,7 +397,7 @@ static inline void set_bkeyp_val_u64s(const struct bkey_format *format,
}
#define bkeyp_val(_format, _k) \
((struct bch_val *) ((_k)->_data + bkeyp_key_u64s(_format, _k)))
((struct bch_val *) ((u64 *) (_k)->_data + bkeyp_key_u64s(_format, _k)))
extern const struct bkey_format bch2_bkey_format_current;
@ -732,7 +732,7 @@ static inline unsigned high_word_offset(const struct bkey_format *f)
#error edit for your odd byteorder.
#endif
#define high_word(f, k) ((k)->_data + high_word_offset(f))
#define high_word(f, k) ((u64 *) (k)->_data + high_word_offset(f))
#define next_word(p) nth_word(p, 1)
#define prev_word(p) nth_word(p, -1)

View File

@ -10,6 +10,7 @@
#include "error.h"
#include "extents.h"
#include "inode.h"
#include "io_misc.h"
#include "lru.h"
#include "quota.h"
#include "reflink.h"
@ -25,7 +26,7 @@ const char * const bch2_bkey_types[] = {
};
static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
unsigned flags, struct printbuf *err)
enum bkey_invalid_flags flags, struct printbuf *err)
{
return 0;
}
@ -39,7 +40,7 @@ static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
})
static int empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
unsigned flags, struct printbuf *err)
enum bkey_invalid_flags flags, struct printbuf *err)
{
if (bkey_val_bytes(k.k)) {
prt_printf(err, "incorrect value size (%zu != 0)",
@ -55,7 +56,7 @@ static int empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k,
})
static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k,
unsigned flags, struct printbuf *err)
enum bkey_invalid_flags flags, struct printbuf *err)
{
return 0;
}
@ -70,7 +71,7 @@ static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k,
})
static int key_type_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k,
unsigned flags, struct printbuf *err)
enum bkey_invalid_flags flags, struct printbuf *err)
{
return 0;
}
@ -91,7 +92,7 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
})
static int key_type_set_invalid(const struct bch_fs *c, struct bkey_s_c k,
unsigned flags, struct printbuf *err)
enum bkey_invalid_flags flags, struct printbuf *err)
{
if (bkey_val_bytes(k.k)) {
prt_printf(err, "incorrect value size (%zu != %zu)",
@ -368,7 +369,6 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
{
const struct bkey_ops *ops;
struct bkey uk;
struct bkey_s u;
unsigned nr_compat = 5;
int i;
@ -433,7 +433,9 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
}
break;
case 4:
case 4: {
struct bkey_s u;
if (!bkey_packed(k)) {
u = bkey_i_to_s(packed_to_bkey(k));
} else {
@ -450,6 +452,7 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
if (ops->compat)
ops->compat(btree_id, version, big_endian, write, u);
break;
}
default:
BUG();
}

View File

@ -9,14 +9,24 @@ struct sort_iter {
struct sort_iter_set {
struct bkey_packed *k, *end;
} data[MAX_BSETS + 1];
} data[];
};
static inline void sort_iter_init(struct sort_iter *iter, struct btree *b)
static inline void sort_iter_init(struct sort_iter *iter, struct btree *b, unsigned size)
{
iter->b = b;
iter->used = 0;
iter->size = ARRAY_SIZE(iter->data);
iter->size = size;
}
struct sort_iter_stack {
struct sort_iter iter;
struct sort_iter_set sets[MAX_BSETS + 1];
};
static inline void sort_iter_stack_init(struct sort_iter_stack *iter, struct btree *b)
{
sort_iter_init(&iter->iter, b, ARRAY_SIZE(iter->sets));
}
static inline void sort_iter_add(struct sort_iter *iter,

View File

@ -172,10 +172,10 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
printk(KERN_ERR "iter was:");
btree_node_iter_for_each(_iter, set) {
struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
struct bset_tree *t = bch2_bkey_to_bset(b, k);
struct bkey_packed *k2 = __btree_node_offset_to_key(b, set->k);
struct bset_tree *t = bch2_bkey_to_bset(b, k2);
printk(" [%zi %zi]", t - b->set,
k->_data - bset(b, t)->_data);
k2->_data - bset(b, t)->_data);
}
panic("\n");
}
@ -232,7 +232,7 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
{
struct bset_tree *t = bch2_bkey_to_bset(b, where);
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
struct bkey_packed *next = (void *) (where->_data + clobber_u64s);
struct bkey_packed *next = (void *) ((u64 *) where->_data + clobber_u64s);
struct printbuf buf1 = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
#if 0
@ -300,7 +300,8 @@ static unsigned bkey_float_byte_offset(unsigned idx)
}
struct ro_aux_tree {
struct bkey_float f[0];
u8 nothing[0];
struct bkey_float f[];
};
struct rw_aux_tree {
@ -476,7 +477,7 @@ static struct bkey_packed *tree_to_prev_bkey(const struct btree *b,
{
unsigned prev_u64s = ro_aux_tree_prev(b, t)[j];
return (void *) (tree_to_bkey(b, t, j)->_data - prev_u64s);
return (void *) ((u64 *) tree_to_bkey(b, t, j)->_data - prev_u64s);
}
static struct rw_aux_tree *rw_aux_tree(const struct btree *b,
@ -1010,8 +1011,8 @@ void bch2_bset_insert(struct btree *b,
btree_keys_account_key_add(&b->nr, t - b->set, src);
if (src->u64s != clobber_u64s) {
u64 *src_p = where->_data + clobber_u64s;
u64 *dst_p = where->_data + src->u64s;
u64 *src_p = (u64 *) where->_data + clobber_u64s;
u64 *dst_p = (u64 *) where->_data + src->u64s;
EBUG_ON((int) le16_to_cpu(bset(b, t)->u64s) <
(int) clobber_u64s - src->u64s);
@ -1037,7 +1038,7 @@ void bch2_bset_delete(struct btree *b,
unsigned clobber_u64s)
{
struct bset_tree *t = bset_tree_last(b);
u64 *src_p = where->_data + clobber_u64s;
u64 *src_p = (u64 *) where->_data + clobber_u64s;
u64 *dst_p = where->_data;
bch2_bset_verify_rw_aux_tree(b, t);
@ -1188,7 +1189,7 @@ struct bkey_packed *__bch2_bset_search(struct btree *b,
case BSET_RO_AUX_TREE:
return bset_search_tree(b, t, search, lossy_packed_search);
default:
unreachable();
BUG();
}
}
@ -1268,9 +1269,13 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
}
/**
* bch_btree_node_iter_init - initialize a btree node iterator, starting from a
* bch2_btree_node_iter_init - initialize a btree node iterator, starting from a
* given position
*
* @iter: iterator to initialize
* @b: btree node to search
* @search: search key
*
* Main entry point to the lookup code for individual btree nodes:
*
* NOTE:

View File

@ -795,7 +795,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
six_unlock_intent(&b->c.lock);
/* Unlock before doing IO: */
if (trans && sync)
if (path && sync)
bch2_trans_unlock_noassert(trans);
bch2_btree_node_read(c, b, sync);
@ -934,7 +934,7 @@ retry:
}
if (unlikely(need_relock)) {
int ret = bch2_trans_relock(trans) ?:
ret = bch2_trans_relock(trans) ?:
bch2_btree_path_relock_intent(trans, path);
if (ret) {
six_unlock_type(&b->c.lock, lock_type);
@ -965,11 +965,20 @@ retry:
}
/**
* bch_btree_node_get - find a btree node in the cache and lock it, reading it
* bch2_btree_node_get - find a btree node in the cache and lock it, reading it
* in from disk if necessary.
*
* @trans: btree transaction object
* @path: btree_path being traversed
* @k: pointer to btree node (generally KEY_TYPE_btree_ptr_v2)
* @level: level of btree node being looked up (0 == leaf node)
* @lock_type: SIX_LOCK_read or SIX_LOCK_intent
* @trace_ip: ip of caller of btree iterator code (i.e. caller of bch2_btree_iter_peek())
*
* The btree node will have either a read or a write lock held, depending on
* the @write parameter.
*
* Returns: btree node or ERR_PTR()
*/
struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path,
const struct bkey_i *k, unsigned level,
@ -1016,28 +1025,8 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
}
if (unlikely(btree_node_read_in_flight(b))) {
u32 seq = six_lock_seq(&b->c.lock);
six_unlock_type(&b->c.lock, lock_type);
bch2_trans_unlock(trans);
bch2_btree_node_wait_on_read(b);
/*
* should_be_locked is not set on this path yet, so we need to
* relock it specifically:
*/
if (trans) {
int ret = bch2_trans_relock(trans) ?:
bch2_btree_path_relock_intent(trans, path);
if (ret) {
BUG_ON(!trans->restarted);
return ERR_PTR(ret);
}
}
if (!six_relock_type(&b->c.lock, lock_type, seq))
return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
}
prefetch(b->aux_data);

View File

@ -529,13 +529,11 @@ fsck_err:
int bch2_check_topology(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree *b;
unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
@ -546,8 +544,8 @@ int bch2_check_topology(struct bch_fs *c)
if (btree_node_fake(b))
continue;
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
ret = bch2_btree_repair_topology_recurse(&trans, b);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
ret = bch2_btree_repair_topology_recurse(trans, b);
six_unlock_read(&b->c.lock);
if (ret == DROP_THIS_NODE) {
@ -556,7 +554,7 @@ int bch2_check_topology(struct bch_fs *c)
}
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}
@ -566,8 +564,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
struct bkey_s_c *k)
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k);
const union bch_extent_entry *entry;
struct bkey_ptrs_c ptrs_c = bch2_bkey_ptrs_c(*k);
const union bch_extent_entry *entry_c;
struct extent_ptr_decoded p = { 0 };
bool do_update = false;
struct printbuf buf = PRINTBUF;
@ -577,10 +575,10 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
* XXX
* use check_bucket_ref here
*/
bkey_for_each_ptr_decode(k->k, ptrs, p, entry) {
bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) {
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr);
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr);
if (!g->gen_valid &&
(c->opts.reconstruct_alloc ||
@ -1068,15 +1066,13 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
enum btree_id ids[BTREE_ID_NR];
unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
if (initial)
trans.is_initial_gc = true;
trans->is_initial_gc = true;
for (i = 0; i < BTREE_ID_NR; i++)
ids[i] = i;
@ -1084,22 +1080,22 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only)
for (i = 0; i < BTREE_ID_NR && !ret; i++)
ret = initial
? bch2_gc_btree_init(&trans, ids[i], metadata_only)
: bch2_gc_btree(&trans, ids[i], initial, metadata_only);
? bch2_gc_btree_init(trans, ids[i], metadata_only)
: bch2_gc_btree(trans, ids[i], initial, metadata_only);
for (i = BTREE_ID_NR; i < btree_id_nr_alive(c) && !ret; i++) {
if (!bch2_btree_id_root(c, i)->alive)
continue;
ret = initial
? bch2_gc_btree_init(&trans, i, metadata_only)
: bch2_gc_btree(&trans, i, initial, metadata_only);
? bch2_gc_btree_init(trans, i, metadata_only)
: bch2_gc_btree(trans, i, initial, metadata_only);
}
if (ret < 0)
bch_err_fn(c, ret);
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}
@ -1220,14 +1216,6 @@ static int bch2_gc_done(struct bch_fs *c,
fsck_err(c, _msg ": got %llu, should be %llu" \
, ##__VA_ARGS__, dst->_f, src->_f))) \
dst->_f = src->_f
#define copy_stripe_field(_f, _msg, ...) \
if (dst->_f != src->_f && \
(!verify || \
fsck_err(c, "stripe %zu has wrong "_msg \
": got %u, should be %u", \
iter.pos, ##__VA_ARGS__, \
dst->_f, src->_f))) \
dst->_f = src->_f
#define copy_dev_field(_f, _msg, ...) \
copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
#define copy_fs_field(_f, _msg, ...) \
@ -1249,7 +1237,7 @@ static int bch2_gc_done(struct bch_fs *c,
copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]);
copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
}
};
}
{
unsigned nr = fs_usage_u64s(c);
@ -1469,37 +1457,35 @@ fsck_err:
static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_dev *ca;
unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
for_each_member_device(ca, c, i) {
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS(ca->dev_idx, ca->mi.first_bucket),
BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW,
bch2_alloc_write_key(&trans, &iter, k, metadata_only));
bch2_alloc_write_key(trans, &iter, k, metadata_only));
if (ret < 0) {
bch_err(c, "error writing alloc info: %s", bch2_err_str(ret));
bch_err_fn(c, ret);
percpu_ref_put(&ca->ref);
break;
}
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret < 0 ? ret : 0;
}
static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
{
struct bch_dev *ca;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bucket *g;
@ -1515,17 +1501,16 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
if (!buckets) {
percpu_ref_put(&ca->ref);
bch_err(c, "error allocating ca->buckets[gc]");
return -BCH_ERR_ENOMEM_gc_alloc_start;
ret = -BCH_ERR_ENOMEM_gc_alloc_start;
goto err;
}
buckets->first_bucket = ca->mi.first_bucket;
buckets->nbuckets = ca->mi.nbuckets;
rcu_assign_pointer(ca->buckets_gc, buckets);
};
}
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = gc_bucket(ca, k.k->p.offset);
@ -1546,13 +1531,11 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
g->stripe_redundancy = a->stripe_redundancy;
}
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_put(trans);
if (ret)
bch_err(c, "error reading alloc info at gc start: %s", bch2_err_str(ret));
bch_err_fn(c, ret);
return ret;
}
@ -1575,7 +1558,7 @@ static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
g->dirty_sectors = 0;
g->cached_sectors = 0;
}
};
}
}
static int bch2_gc_write_reflink_key(struct btree_trans *trans,
@ -1627,7 +1610,7 @@ fsck_err:
static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
{
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
size_t idx = 0;
@ -1636,23 +1619,23 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
if (metadata_only)
return 0;
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
ret = for_each_btree_key_commit(&trans, iter,
ret = for_each_btree_key_commit(trans, iter,
BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL,
bch2_gc_write_reflink_key(&trans, &iter, k, &idx));
bch2_gc_write_reflink_key(trans, &iter, k, &idx));
c->reflink_gc_nr = 0;
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}
static int bch2_gc_reflink_start(struct bch_fs *c,
bool metadata_only)
{
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct reflink_gc *r;
@ -1661,10 +1644,10 @@ static int bch2_gc_reflink_start(struct bch_fs *c,
if (metadata_only)
return 0;
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
c->reflink_gc_nr = 0;
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k);
@ -1682,9 +1665,9 @@ static int bch2_gc_reflink_start(struct bch_fs *c,
r->size = k.k->size;
r->refcount = 0;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}
@ -1751,7 +1734,7 @@ fsck_err:
static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only)
{
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
@ -1759,15 +1742,15 @@ static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only)
if (metadata_only)
return 0;
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
ret = for_each_btree_key_commit(&trans, iter,
ret = for_each_btree_key_commit(trans, iter,
BTREE_ID_stripes, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL,
bch2_gc_write_stripes_key(&trans, &iter, k));
bch2_gc_write_stripes_key(trans, &iter, k));
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}
@ -1779,6 +1762,12 @@ static void bch2_gc_stripes_reset(struct bch_fs *c, bool metadata_only)
/**
* bch2_gc - walk _all_ references to buckets, and recompute them:
*
* @c: filesystem object
* @initial: are we in recovery?
* @metadata_only: are we just checking metadata references, or everything?
*
* Returns: 0 on success, or standard errcode on failure
*
* Order matters here:
* - Concurrent GC relies on the fact that we have a total ordering for
* everything that GC walks - see gc_will_visit_node(),
@ -1947,7 +1936,7 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i
int bch2_gc_gens(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bch_dev *ca;
@ -1965,7 +1954,7 @@ int bch2_gc_gens(struct bch_fs *c)
trace_and_count(c, gc_gens_start, c);
down_read(&c->gc_lock);
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
for_each_member_device(ca, c, i) {
struct bucket_gens *gens;
@ -1988,33 +1977,31 @@ int bch2_gc_gens(struct bch_fs *c)
for (i = 0; i < BTREE_ID_NR; i++)
if (btree_type_has_ptrs(i)) {
struct btree_iter iter;
struct bkey_s_c k;
c->gc_gens_btree = i;
c->gc_gens_pos = POS_MIN;
ret = for_each_btree_key_commit(&trans, iter, i,
ret = for_each_btree_key_commit(trans, iter, i,
POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
k,
NULL, NULL,
BTREE_INSERT_NOFAIL,
gc_btree_gens_key(&trans, &iter, k));
gc_btree_gens_key(trans, &iter, k));
if (ret && !bch2_err_matches(ret, EROFS))
bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret));
bch_err_fn(c, ret);
if (ret)
goto err;
}
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN,
BTREE_ITER_PREFETCH,
k,
NULL, NULL,
BTREE_INSERT_NOFAIL,
bch2_alloc_write_oldest_gen(&trans, &iter, k));
bch2_alloc_write_oldest_gen(trans, &iter, k));
if (ret && !bch2_err_matches(ret, EROFS))
bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret));
bch_err_fn(c, ret);
if (ret)
goto err;
@ -2031,7 +2018,7 @@ err:
ca->oldest_gen = NULL;
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
up_read(&c->gc_lock);
mutex_unlock(&c->gc_gens_lock);
return ret;
@ -2086,7 +2073,7 @@ static int bch2_gc_thread(void *arg)
ret = bch2_gc_gens(c);
#endif
if (ret < 0)
bch_err(c, "btree gc failed: %s", bch2_err_str(ret));
bch_err_fn(c, ret);
debug_check_no_locks_held();
}
@ -2116,7 +2103,7 @@ int bch2_gc_thread_start(struct bch_fs *c)
p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
if (IS_ERR(p)) {
bch_err(c, "error creating gc thread: %s", bch2_err_str(PTR_ERR(p)));
bch_err_fn(c, PTR_ERR(p));
return PTR_ERR(p);
}

View File

@ -14,7 +14,7 @@
#include "debug.h"
#include "error.h"
#include "extents.h"
#include "io.h"
#include "io_write.h"
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "recovery.h"
@ -106,8 +106,8 @@ static void btree_bounce_free(struct bch_fs *c, size_t size,
vpfree(p, size);
}
static void *btree_bounce_alloc_noprof(struct bch_fs *c, size_t size,
bool *used_mempool)
static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
bool *used_mempool)
{
unsigned flags = memalloc_nofs_save();
void *p;
@ -115,7 +115,7 @@ static void *btree_bounce_alloc_noprof(struct bch_fs *c, size_t size,
BUG_ON(size > btree_bytes(c));
*used_mempool = false;
p = vpmalloc_noprof(size, __GFP_NOWARN|GFP_NOWAIT);
p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
if (!p) {
*used_mempool = true;
p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
@ -123,8 +123,6 @@ static void *btree_bounce_alloc_noprof(struct bch_fs *c, size_t size,
memalloc_nofs_restore(flags);
return p;
}
#define btree_bounce_alloc(_c, _size, _used_mempool) \
alloc_hooks(btree_bounce_alloc_noprof(_c, _size, _used_mempool))
static void sort_bkey_ptrs(const struct btree *bt,
struct bkey_packed **ptrs, unsigned nr)
@ -294,7 +292,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
bool filter_whiteouts)
{
struct btree_node *out;
struct sort_iter sort_iter;
struct sort_iter_stack sort_iter;
struct bset_tree *t;
struct bset *start_bset = bset(b, &b->set[start_idx]);
bool used_mempool = false;
@ -303,13 +301,13 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
bool sorting_entire_node = start_idx == 0 &&
end_idx == b->nsets;
sort_iter_init(&sort_iter, b);
sort_iter_stack_init(&sort_iter, b);
for (t = b->set + start_idx;
t < b->set + end_idx;
t++) {
u64s += le16_to_cpu(bset(b, t)->u64s);
sort_iter_add(&sort_iter,
sort_iter_add(&sort_iter.iter,
btree_bkey_first(b, t),
btree_bkey_last(b, t));
}
@ -322,7 +320,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
start_time = local_clock();
u64s = bch2_sort_keys(out->keys.start, &sort_iter, filter_whiteouts);
u64s = bch2_sort_keys(out->keys.start, &sort_iter.iter, filter_whiteouts);
out->keys.u64s = cpu_to_le16(u64s);
@ -338,7 +336,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
start_bset->journal_seq = cpu_to_le64(seq);
if (sorting_entire_node) {
unsigned u64s = le16_to_cpu(out->keys.u64s);
u64s = le16_to_cpu(out->keys.u64s);
BUG_ON(bytes != btree_bytes(c));
@ -412,8 +410,6 @@ void bch2_btree_sort_into(struct bch_fs *c,
bch2_verify_btree_nr_keys(dst);
}
#define SORT_CRIT (4096 / sizeof(u64))
/*
* We're about to add another bset to the btree node, so if there's currently
* too many bsets - sort some of them together:
@ -544,6 +540,7 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
prt_str(out, ": ");
}
__printf(8, 9)
static int __btree_err(int ret,
struct bch_fs *c,
struct bch_dev *ca,
@ -624,9 +621,6 @@ __cold
void bch2_btree_node_drop_keys_outside_node(struct btree *b)
{
struct bset_tree *t;
struct bkey_s_c k;
struct bkey unpacked;
struct btree_node_iter iter;
for_each_bset(b, t) {
struct bset *i = bset(b, t);
@ -662,6 +656,9 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
bch2_bset_set_no_aux_tree(b, b->set);
bch2_btree_build_aux_trees(b);
struct bkey_s_c k;
struct bkey unpacked;
struct btree_node_iter iter;
for_each_btree_node_key_unpack(b, k, &iter, &unpacked) {
BUG_ON(bpos_lt(k.k->p, b->data->min_key));
BUG_ON(bpos_gt(k.k->p, b->data->max_key));
@ -910,7 +907,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
unsigned u64s;
unsigned blacklisted_written, nonblacklisted_written = 0;
unsigned ptr_written = btree_ptr_sectors_written(&b->key);
struct printbuf buf = PRINTBUF;
int ret = 0, retry_read = 0, write = READ;
@ -920,8 +916,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
b->written = 0;
iter = mempool_alloc(&c->fill_iter, GFP_NOFS);
sort_iter_init(iter, b);
iter->size = (btree_blocks(c) + 1) * 2;
sort_iter_init(iter, b, (btree_blocks(c) + 1) * 2);
if (bch2_meta_read_fault("btree"))
btree_err(-BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL,
@ -1045,8 +1040,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
sort_iter_add(iter,
vstruct_idx(i, 0),
vstruct_last(i));
nonblacklisted_written = b->written;
}
if (ptr_written) {
@ -1064,18 +1057,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
true),
-BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL,
"found bset signature after last bset");
/*
* Blacklisted bsets are those that were written after the most recent
* (flush) journal write. Since there wasn't a flush, they may not have
* made it to all devices - which means we shouldn't write new bsets
* after them, as that could leave a gap and then reads from that device
* wouldn't find all the bsets in that btree node - which means it's
* important that we start writing new bsets after the most recent _non_
* blacklisted bset:
*/
blacklisted_written = b->written;
b->written = nonblacklisted_written;
}
sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
@ -1143,9 +1124,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_node_reset_sib_u64s(b);
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bch_dev *ca2 = bch_dev_bkey_exists(c, ptr->dev);
if (ca->mi.state != BCH_MEMBER_STATE_rw)
if (ca2->mi.state != BCH_MEMBER_STATE_rw)
set_btree_node_need_rewrite(b);
}
@ -1227,19 +1208,17 @@ start:
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
rb->start_time);
bio_put(&rb->bio);
printbuf_exit(&buf);
if (saw_error && !btree_node_read_error(b)) {
struct printbuf buf = PRINTBUF;
printbuf_reset(&buf);
bch2_bpos_to_text(&buf, b->key.k.p);
bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
__func__, bch2_btree_ids[b->c.btree_id], b->c.level, buf.buf);
printbuf_exit(&buf);
bch2_btree_node_rewrite_async(c, b);
}
printbuf_exit(&buf);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
}
@ -1649,8 +1628,7 @@ err:
int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
const struct bkey_i *k, unsigned level)
{
return bch2_trans_run(c, __bch2_btree_root_read(&trans, id, k, level));
return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
}
void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
@ -1712,15 +1690,13 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
bch2_trans_init(&trans, c, 0, 0);
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
__btree_node_write_done(c, b);
six_unlock_read(&b->c.lock);
bch2_trans_exit(&trans);
bch2_trans_put(trans);
}
static void btree_node_write_work(struct work_struct *work)
@ -1749,7 +1725,7 @@ static void btree_node_write_work(struct work_struct *work)
}
} else {
ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_btree_node_update_key_get_iter(&trans, b, &wbio->key,
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
BCH_WATERMARK_reclaim|
BTREE_INSERT_JOURNAL_RECLAIM|
BTREE_INSERT_NOFAIL|
@ -1854,7 +1830,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
struct bset *i;
struct btree_node *bn = NULL;
struct btree_node_entry *bne = NULL;
struct sort_iter sort_iter;
struct sort_iter_stack sort_iter;
struct nonce nonce;
unsigned bytes_to_write, sectors_to_write, bytes, u64s;
u64 seq = 0;
@ -1927,7 +1903,7 @@ do_write:
bch2_sort_whiteouts(c, b);
sort_iter_init(&sort_iter, b);
sort_iter_stack_init(&sort_iter, b);
bytes = !b->written
? sizeof(struct btree_node)
@ -1942,7 +1918,7 @@ do_write:
continue;
bytes += le16_to_cpu(i->u64s) * sizeof(u64);
sort_iter_add(&sort_iter,
sort_iter_add(&sort_iter.iter,
btree_bkey_first(b, t),
btree_bkey_last(b, t));
seq = max(seq, le64_to_cpu(i->journal_seq));
@ -1971,14 +1947,14 @@ do_write:
i->journal_seq = cpu_to_le64(seq);
i->u64s = 0;
sort_iter_add(&sort_iter,
sort_iter_add(&sort_iter.iter,
unwritten_whiteouts_start(c, b),
unwritten_whiteouts_end(c, b));
SET_BSET_SEPARATE_WHITEOUTS(i, false);
b->whiteout_u64s = 0;
u64s = bch2_sort_keys(i->start, &sort_iter, false);
u64s = bch2_sort_keys(i->start, &sort_iter.iter, false);
le16_add_cpu(&i->u64s, u64s);
BUG_ON(!b->written && i->u64s != b->data->keys.u64s);

View File

@ -7,7 +7,7 @@
#include "btree_locking.h"
#include "checksum.h"
#include "extents.h"
#include "io_types.h"
#include "io_write_types.h"
struct bch_fs;
struct btree_write;

View File

@ -488,7 +488,6 @@ fixup_done:
if (!bch2_btree_node_iter_end(node_iter) &&
iter_current_key_modified &&
b->c.level) {
struct bset_tree *t;
struct bkey_packed *k, *k2, *p;
k = bch2_btree_node_iter_peek_all(node_iter, b);
@ -689,7 +688,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
if (t != BTREE_NODE_UNLOCKED) {
btree_node_unlock(trans, path, b->c.level);
six_lock_increment(&b->c.lock, (enum six_lock_type) t);
mark_btree_node_locked(trans, path, b->c.level, (enum six_lock_type) t);
mark_btree_node_locked(trans, path, b->c.level, t);
}
bch2_btree_path_level_init(trans, path, b);
@ -764,7 +763,8 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
path->l[i].b = NULL;
mark_btree_node_locked(trans, path, path->level, lock_type);
mark_btree_node_locked(trans, path, path->level,
(enum btree_node_locked_type) lock_type);
bch2_btree_path_level_init(trans, path, b);
return 0;
}
@ -936,7 +936,8 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
if (btree_node_read_locked(path, level + 1))
btree_node_unlock(trans, path, level + 1);
mark_btree_node_locked(trans, path, level, lock_type);
mark_btree_node_locked(trans, path, level,
(enum btree_node_locked_type) lock_type);
path->level = level;
bch2_btree_path_level_init(trans, path, b);
@ -1341,14 +1342,14 @@ static void bch2_path_put_nokeep(struct btree_trans *trans, struct btree_path *p
__bch2_path_free(trans, path);
}
void bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count)
void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count)
{
panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
trans->restart_count, restart_count,
(void *) trans->last_begin_ip);
}
void bch2_trans_in_restart_error(struct btree_trans *trans)
void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans)
{
panic("in transaction restart: %s, last restarted by %pS\n",
bch2_err_str(trans->restarted),
@ -1493,7 +1494,7 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans)
static noinline void btree_path_overflow(struct btree_trans *trans)
{
bch2_dump_trans_paths_updates(trans);
panic("trans path oveflow\n");
panic("trans path overflow\n");
}
static inline struct btree_path *btree_path_alloc(struct btree_trans *trans,
@ -2046,8 +2047,12 @@ out:
}
/**
* bch2_btree_iter_peek: returns first key greater than or equal to iterator's
* current position
* bch2_btree_iter_peek_upto() - returns first key greater than or equal to
* iterator's current position
* @iter: iterator to peek from
* @end: search limit: returns keys less than or equal to @end
*
* Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end)
{
@ -2184,10 +2189,13 @@ end:
}
/**
* bch2_btree_iter_peek_all_levels: returns the first key greater than or equal
* to iterator's current position, returning keys from every level of the btree.
* For keys at different levels of the btree that compare equal, the key from
* the lower level (leaf) is returned first.
* bch2_btree_iter_peek_all_levels() - returns the first key greater than or
* equal to iterator's current position, returning keys from every level of the
* btree. For keys at different levels of the btree that compare equal, the key
* from the lower level (leaf) is returned first.
* @iter: iterator to peek from
*
* Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter)
{
@ -2278,8 +2286,11 @@ out_no_locked:
}
/**
* bch2_btree_iter_next: returns first key greater than iterator's current
* bch2_btree_iter_next() - returns first key greater than iterator's current
* position
* @iter: iterator to peek from
*
* Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
{
@ -2290,8 +2301,11 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
}
/**
* bch2_btree_iter_peek_prev: returns first key less than or equal to
* bch2_btree_iter_peek_prev() - returns first key less than or equal to
* iterator's current position
* @iter: iterator to peek from
*
* Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
{
@ -2414,8 +2428,11 @@ out_no_locked:
}
/**
* bch2_btree_iter_prev: returns first key less than iterator's current
* bch2_btree_iter_prev() - returns first key less than iterator's current
* position
* @iter: iterator to peek from
*
* Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
{
@ -2722,7 +2739,7 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
void bch2_trans_iter_init_outlined(struct btree_trans *trans,
struct btree_iter *iter,
unsigned btree_id, struct bpos pos,
enum btree_id btree_id, struct bpos pos,
unsigned flags)
{
bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
@ -2830,6 +2847,8 @@ static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
* bch2_trans_begin() - reset a transaction after a interrupted attempt
* @trans: transaction to reset
*
* Returns: current restart counter, to be used with trans_was_restarted()
*
* While iterating over nodes or updating nodes a attempt to lock a btree node
* may return BCH_ERR_transaction_restart when the trylock fails. When this
* occurs bch2_trans_begin() should be called and the transaction retried.
@ -2887,28 +2906,23 @@ u32 bch2_trans_begin(struct btree_trans *trans)
return trans->restart_count;
}
static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
static struct btree_trans *bch2_trans_alloc(struct bch_fs *c)
{
size_t paths_bytes = sizeof(struct btree_path) * BTREE_ITER_MAX;
size_t updates_bytes = sizeof(struct btree_insert_entry) * BTREE_ITER_MAX;
void *p = NULL;
struct btree_trans *trans;
BUG_ON(trans->used_mempool);
#ifdef __KERNEL__
p = this_cpu_xchg(c->btree_paths_bufs->path, NULL);
#endif
if (!p) {
p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS);
/*
* paths need to be zeroed, bch2_check_for_deadlock looks at
* paths in other threads
*/
memset(p, 0, paths_bytes);
if (IS_ENABLED(__KERNEL__)) {
trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL);
if (trans)
return trans;
}
trans->paths = p; p += paths_bytes;
trans->updates = p; p += updates_bytes;
trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
/*
* paths need to be zeroed, bch2_check_for_deadlock looks at
* paths in other threads
*/
memset(&trans->paths, 0, sizeof(trans->paths));
return trans;
}
const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
@ -2928,13 +2942,16 @@ unsigned bch2_trans_get_fn_idx(const char *fn)
return i;
}
void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_idx)
struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
__acquires(&c->btree_trans_barrier)
{
struct btree_trans *trans;
struct btree_transaction_stats *s;
bch2_assert_btree_nodes_not_locked();
trans = bch2_trans_alloc(c);
memset(trans, 0, sizeof(*trans));
trans->c = c;
trans->fn = fn_idx < ARRAY_SIZE(bch2_btree_transaction_fns)
@ -2946,8 +2963,6 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
closure_init_stack(&trans->ref);
bch2_trans_alloc_paths(trans, c);
s = btree_trans_stats(trans);
if (s && s->max_mem) {
unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem);
@ -2993,6 +3008,8 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
list_add_done:
seqmutex_unlock(&c->btree_trans_lock);
}
return trans;
}
static void check_btree_paths_leaked(struct btree_trans *trans)
@ -3017,7 +3034,7 @@ leaked:
#endif
}
void bch2_trans_exit(struct btree_trans *trans)
void bch2_trans_put(struct btree_trans *trans)
__releases(&c->btree_trans_barrier)
{
struct btree_insert_entry *i;
@ -3063,18 +3080,11 @@ void bch2_trans_exit(struct btree_trans *trans)
else
kfree(trans->mem);
#ifdef __KERNEL__
/*
* Userspace doesn't have a real percpu implementation:
*/
trans->paths = this_cpu_xchg(c->btree_paths_bufs->path, trans->paths);
#endif
if (trans->paths)
mempool_free(trans->paths, &c->btree_paths_pool);
trans->mem = (void *) 0x1;
trans->paths = (void *) 0x1;
/* Userspace doesn't have a real percpu implementation: */
if (IS_ENABLED(__KERNEL__))
trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
if (trans)
mempool_free(trans, &c->btree_trans_pool);
}
static void __maybe_unused
@ -3152,6 +3162,17 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
void bch2_fs_btree_iter_exit(struct bch_fs *c)
{
struct btree_transaction_stats *s;
struct btree_trans *trans;
int cpu;
trans = list_first_entry_or_null(&c->btree_trans_list, struct btree_trans, list);
if (trans)
panic("%s leaked btree_trans\n", trans->fn);
if (c->btree_trans_bufs)
for_each_possible_cpu(cpu)
kfree(per_cpu_ptr(c->btree_trans_bufs, cpu)->trans);
free_percpu(c->btree_trans_bufs);
for (s = c->btree_transaction_stats;
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
@ -3163,13 +3184,12 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
if (c->btree_trans_barrier_initialized)
cleanup_srcu_struct(&c->btree_trans_barrier);
mempool_exit(&c->btree_trans_mem_pool);
mempool_exit(&c->btree_paths_pool);
mempool_exit(&c->btree_trans_pool);
}
int bch2_fs_btree_iter_init(struct bch_fs *c)
{
struct btree_transaction_stats *s;
unsigned nr = BTREE_ITER_MAX;
int ret;
for (s = c->btree_transaction_stats;
@ -3182,9 +3202,12 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
INIT_LIST_HEAD(&c->btree_trans_list);
seqmutex_init(&c->btree_trans_lock);
ret = mempool_init_kmalloc_pool(&c->btree_paths_pool, 1,
sizeof(struct btree_path) * nr +
sizeof(struct btree_insert_entry) * nr) ?:
c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf);
if (!c->btree_trans_bufs)
return -ENOMEM;
ret = mempool_init_kmalloc_pool(&c->btree_trans_pool, 1,
sizeof(struct btree_trans)) ?:
mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
BTREE_TRANS_MEM_MAX) ?:
init_srcu_struct(&c->btree_trans_barrier);

View File

@ -276,12 +276,14 @@ int bch2_trans_relock_notrace(struct btree_trans *);
void bch2_trans_unlock(struct btree_trans *);
bool bch2_trans_locked(struct btree_trans *);
static inline bool trans_was_restarted(struct btree_trans *trans, u32 restart_count)
static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count)
{
return restart_count != trans->restart_count;
return restart_count != trans->restart_count
? -BCH_ERR_transaction_restart_nested
: 0;
}
void bch2_trans_restart_error(struct btree_trans *, u32);
void __noreturn bch2_trans_restart_error(struct btree_trans *, u32);
static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans,
u32 restart_count)
@ -290,7 +292,7 @@ static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans,
bch2_trans_restart_error(trans, restart_count);
}
void bch2_trans_in_restart_error(struct btree_trans *);
void __noreturn bch2_trans_in_restart_error(struct btree_trans *);
static inline void bch2_trans_verify_not_in_restart(struct btree_trans *trans)
{
@ -463,7 +465,7 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans,
}
void bch2_trans_iter_init_outlined(struct btree_trans *, struct btree_iter *,
unsigned, struct bpos, unsigned);
enum btree_id, struct bpos, unsigned);
static inline void bch2_trans_iter_init(struct btree_trans *trans,
struct btree_iter *iter,
@ -672,17 +674,17 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
#define lockrestart_do(_trans, _do) \
({ \
u32 _restart_count; \
int _ret; \
int _ret2; \
\
do { \
_restart_count = bch2_trans_begin(_trans); \
_ret = (_do); \
} while (bch2_err_matches(_ret, BCH_ERR_transaction_restart)); \
_ret2 = (_do); \
} while (bch2_err_matches(_ret2, BCH_ERR_transaction_restart)); \
\
if (!_ret) \
if (!_ret2) \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
\
_ret; \
_ret2; \
})
/*
@ -697,26 +699,23 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
#define nested_lockrestart_do(_trans, _do) \
({ \
u32 _restart_count, _orig_restart_count; \
int _ret; \
int _ret2; \
\
_restart_count = _orig_restart_count = (_trans)->restart_count; \
\
while (bch2_err_matches(_ret = (_do), BCH_ERR_transaction_restart))\
while (bch2_err_matches(_ret2 = (_do), BCH_ERR_transaction_restart))\
_restart_count = bch2_trans_begin(_trans); \
\
if (!_ret) \
if (!_ret2) \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
\
if (!_ret && trans_was_restarted(_trans, _orig_restart_count)) \
_ret = -BCH_ERR_transaction_restart_nested; \
\
_ret; \
_ret2 ?: trans_was_restarted(_trans, _restart_count); \
})
#define for_each_btree_key2(_trans, _iter, _btree_id, \
_start, _flags, _k, _do) \
({ \
int _ret = 0; \
int _ret3 = 0; \
\
bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
@ -724,15 +723,15 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
while (1) { \
u32 _restart_count = bch2_trans_begin(_trans); \
\
_ret = 0; \
_ret3 = 0; \
(_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \
if (!(_k).k) \
break; \
\
_ret = bkey_err(_k) ?: (_do); \
if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
_ret3 = bkey_err(_k) ?: (_do); \
if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\
continue; \
if (_ret) \
if (_ret3) \
break; \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
if (!bch2_btree_iter_advance(&(_iter))) \
@ -740,13 +739,13 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
} \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
_ret; \
_ret3; \
})
#define for_each_btree_key2_upto(_trans, _iter, _btree_id, \
_start, _end, _flags, _k, _do) \
({ \
int _ret = 0; \
int _ret3 = 0; \
\
bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
@ -754,15 +753,15 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
while (1) { \
u32 _restart_count = bch2_trans_begin(_trans); \
\
_ret = 0; \
_ret3 = 0; \
(_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, (_flags));\
if (!(_k).k) \
break; \
\
_ret = bkey_err(_k) ?: (_do); \
if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
_ret3 = bkey_err(_k) ?: (_do); \
if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\
continue; \
if (_ret) \
if (_ret3) \
break; \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
if (!bch2_btree_iter_advance(&(_iter))) \
@ -770,13 +769,13 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
} \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
_ret; \
_ret3; \
})
#define for_each_btree_key_reverse(_trans, _iter, _btree_id, \
_start, _flags, _k, _do) \
({ \
int _ret = 0; \
int _ret3 = 0; \
\
bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
@ -785,14 +784,14 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
u32 _restart_count = bch2_trans_begin(_trans); \
(_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\
if (!(_k).k) { \
_ret = 0; \
_ret3 = 0; \
break; \
} \
\
_ret = bkey_err(_k) ?: (_do); \
if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
_ret3 = bkey_err(_k) ?: (_do); \
if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\
continue; \
if (_ret) \
if (_ret3) \
break; \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
if (!bch2_btree_iter_rewind(&(_iter))) \
@ -800,7 +799,7 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
} \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
_ret; \
_ret3; \
})
#define for_each_btree_key_commit(_trans, _iter, _btree_id, \
@ -916,21 +915,21 @@ void bch2_btree_path_to_text(struct printbuf *, struct btree_path *);
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
void bch2_dump_trans_updates(struct btree_trans *);
void bch2_dump_trans_paths_updates(struct btree_trans *);
void __bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned);
void bch2_trans_exit(struct btree_trans *);
struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned);
void bch2_trans_put(struct btree_trans *);
extern const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
unsigned bch2_trans_get_fn_idx(const char *);
#define bch2_trans_init(_trans, _c, _nr_iters, _mem) \
do { \
#define bch2_trans_get(_c) \
({ \
static unsigned trans_fn_idx; \
\
if (unlikely(!trans_fn_idx)) \
trans_fn_idx = bch2_trans_get_fn_idx(__func__); \
\
__bch2_trans_init(_trans, _c, trans_fn_idx); \
} while (0)
__bch2_trans_get(_c, trans_fn_idx); \
})
void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);

View File

@ -243,8 +243,6 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
}
if (ck) {
int ret;
ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent, _THIS_IP_);
if (unlikely(ret)) {
bkey_cached_move_to_freelist(bc, ck);
@ -253,7 +251,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
path->l[0].b = (void *) ck;
path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
ret = bch2_btree_node_lock_write(trans, path, &ck->c);
if (unlikely(ret)) {
@ -331,7 +329,7 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create);
}
mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
}
ck->c.level = 0;
@ -479,7 +477,7 @@ retry:
if (!ck)
goto retry;
mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
path->locks_want = 1;
} else {
enum six_lock_type lock_want = __btree_lock_want(path, 0);
@ -497,7 +495,8 @@ retry:
goto retry;
}
mark_btree_node_locked(trans, path, 0, lock_want);
mark_btree_node_locked(trans, path, 0,
(enum btree_node_locked_type) lock_want);
}
path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
@ -579,7 +578,8 @@ retry:
goto retry;
}
mark_btree_node_locked(trans, path, 0, lock_want);
mark_btree_node_locked(trans, path, 0,
(enum btree_node_locked_type) lock_want);
}
path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
@ -705,13 +705,11 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
struct bkey_cached *ck =
container_of(pin, struct bkey_cached, journal);
struct bkey_cached_key key;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
btree_node_lock_nopath_nofail(&trans, &ck->c, SIX_LOCK_read);
btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_read);
key = ck->key;
if (ck->journal.seq != seq ||
@ -728,13 +726,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
}
six_unlock_read(&ck->c.lock);
ret = commit_do(&trans, NULL, NULL, 0,
btree_key_cache_flush_pos(&trans, key, seq,
ret = commit_do(trans, NULL, NULL, 0,
btree_key_cache_flush_pos(trans, key, seq,
BTREE_INSERT_JOURNAL_RECLAIM, false));
unlock:
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}
@ -1065,7 +1063,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
{
prt_printf(out, "nr_freed:\t%zu", atomic_long_read(&c->nr_freed));
prt_printf(out, "nr_freed:\t%lu", atomic_long_read(&c->nr_freed));
prt_newline(out);
prt_printf(out, "nr_keys:\t%lu", atomic_long_read(&c->nr_keys));
prt_newline(out);

View File

@ -91,7 +91,7 @@ static inline void mark_btree_node_unlocked(struct btree_path *path,
static inline void mark_btree_node_locked(struct btree_trans *trans,
struct btree_path *path,
unsigned level,
enum six_lock_type type)
enum btree_node_locked_type type)
{
mark_btree_node_locked_noreset(path, level, (enum btree_node_locked_type) type);
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS

View File

@ -163,13 +163,11 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct btree_write *w = container_of(pin, struct btree_write, journal);
struct btree *b = container_of(w, struct btree, writes[i]);
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
unsigned long old, new, v;
unsigned idx = w - b->writes;
bch2_trans_init(&trans, c, 0, 0);
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
v = READ_ONCE(b->flags);
do {
@ -188,7 +186,7 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return 0;
}
@ -214,7 +212,11 @@ inline void bch2_btree_add_journal_pin(struct bch_fs *c,
}
/**
* btree_insert_key - insert a key one key into a leaf node
* bch2_btree_insert_key_leaf() - insert a key one key into a leaf node
* @trans: btree transaction object
* @path: path pointing to @insert's pos
* @insert: key to insert
* @journal_seq: sequence number of journal reservation
*/
inline void bch2_btree_insert_key_leaf(struct btree_trans *trans,
struct btree_path *path,
@ -555,7 +557,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
struct btree_write_buffered_key *wb;
struct btree_trans_commit_hook *h;
unsigned u64s = 0;
bool marking = false;
int ret;
if (race_fault()) {
@ -584,9 +585,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
*stopped_at = i;
return ret;
}
if (btree_node_type_needs_gc(i->bkey_type))
marking = true;
}
if (trans->nr_wb_updates &&
@ -778,7 +776,6 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans
bch2_journal_key_overwritten(trans->c, wb->btree, 0, wb->k.k.p);
}
#ifdef CONFIG_BCACHEFS_DEBUG
static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, unsigned flags,
struct btree_insert_entry *i,
struct printbuf *err)
@ -804,7 +801,6 @@ static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, un
return -EINVAL;
}
#endif
/*
* Get journal reservation, take write locks, and attempt to do btree update(s):
@ -1029,7 +1025,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (ret)
goto out_reset;
#ifdef CONFIG_BCACHEFS_DEBUG
trans_for_each_update(trans, i) {
struct printbuf buf = PRINTBUF;
enum bkey_invalid_flags invalid_flags = 0;
@ -1046,7 +1041,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (ret)
return ret;
}
#endif
if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) {
ret = do_bch2_trans_commit_to_journal_replay(trans);

View File

@ -194,34 +194,34 @@ struct btree_node_iter {
/*
* Iterate over all possible positions, synthesizing deleted keys for holes:
*/
static const u16 BTREE_ITER_SLOTS = 1 << 0;
static const u16 BTREE_ITER_ALL_LEVELS = 1 << 1;
static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0;
static const __maybe_unused u16 BTREE_ITER_ALL_LEVELS = 1 << 1;
/*
* Indicates that intent locks should be taken on leaf nodes, because we expect
* to be doing updates:
*/
static const u16 BTREE_ITER_INTENT = 1 << 2;
static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 2;
/*
* Causes the btree iterator code to prefetch additional btree nodes from disk:
*/
static const u16 BTREE_ITER_PREFETCH = 1 << 3;
static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 3;
/*
* Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
* @pos or the first key strictly greater than @pos
*/
static const u16 BTREE_ITER_IS_EXTENTS = 1 << 4;
static const u16 BTREE_ITER_NOT_EXTENTS = 1 << 5;
static const u16 BTREE_ITER_CACHED = 1 << 6;
static const u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 7;
static const u16 BTREE_ITER_WITH_UPDATES = 1 << 8;
static const u16 BTREE_ITER_WITH_JOURNAL = 1 << 9;
static const u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 10;
static const u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 11;
static const u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 12;
static const u16 BTREE_ITER_NOPRESERVE = 1 << 13;
static const u16 BTREE_ITER_CACHED_NOFILL = 1 << 14;
static const u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 15;
#define __BTREE_ITER_FLAGS_END 16
static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 4;
static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 5;
static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 6;
static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 7;
static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 8;
static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 9;
static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 10;
static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 11;
static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 12;
static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 13;
static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 14;
static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 15;
#define __BTREE_ITER_FLAGS_END 16
enum btree_path_uptodate {
BTREE_ITER_UPTODATE = 0,
@ -459,8 +459,8 @@ struct btree_trans {
void *mem;
u8 sorted[BTREE_ITER_MAX + 8];
struct btree_path *paths;
struct btree_insert_entry *updates;
struct btree_path paths[BTREE_ITER_MAX];
struct btree_insert_entry updates[BTREE_ITER_MAX];
struct btree_write_buffered_key *wb_updates;
/* update path: */

View File

@ -124,7 +124,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
struct bkey_s_c old_k, new_k;
snapshot_id_list s;
struct bkey_i *update;
int ret;
int ret = 0;
if (!bch2_snapshot_has_children(c, old_pos.snapshot))
return 0;
@ -466,11 +466,49 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path,
return 0;
}
static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
struct btree_iter *iter,
struct btree_path *path)
{
if (!iter->key_cache_path ||
!iter->key_cache_path->should_be_locked ||
!bpos_eq(iter->key_cache_path->pos, iter->pos)) {
struct bkey_cached *ck;
int ret;
if (!iter->key_cache_path)
iter->key_cache_path =
bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
BTREE_ITER_INTENT|
BTREE_ITER_CACHED, _THIS_IP_);
iter->key_cache_path =
bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos,
iter->flags & BTREE_ITER_INTENT,
_THIS_IP_);
ret = bch2_btree_path_traverse(trans, iter->key_cache_path,
BTREE_ITER_CACHED);
if (unlikely(ret))
return ret;
ck = (void *) iter->key_cache_path->l[0].b;
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
}
btree_path_set_should_be_locked(iter->key_cache_path);
}
return 0;
}
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_i *k, enum btree_update_flags flags)
{
struct btree_path *path = iter->update_path ?: iter->path;
struct bkey_cached *ck;
int ret;
if (iter->flags & BTREE_ITER_IS_EXTENTS)
@ -494,34 +532,9 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
!path->cached &&
!path->level &&
btree_id_cached(trans->c, path->btree_id)) {
if (!iter->key_cache_path ||
!iter->key_cache_path->should_be_locked ||
!bpos_eq(iter->key_cache_path->pos, k->k.p)) {
if (!iter->key_cache_path)
iter->key_cache_path =
bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
BTREE_ITER_INTENT|
BTREE_ITER_CACHED, _THIS_IP_);
iter->key_cache_path =
bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos,
iter->flags & BTREE_ITER_INTENT,
_THIS_IP_);
ret = bch2_btree_path_traverse(trans, iter->key_cache_path,
BTREE_ITER_CACHED);
if (unlikely(ret))
return ret;
ck = (void *) iter->key_cache_path->l[0].b;
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
}
btree_path_set_should_be_locked(iter->key_cache_path);
}
ret = bch2_trans_update_get_key_cache(trans, iter, path);
if (ret)
return ret;
path = iter->key_cache_path;
}
@ -640,6 +653,7 @@ int bch2_btree_insert_nonextent(struct btree_trans *trans,
int ret;
bch2_trans_iter_init(trans, &iter, btree, k->k.p,
BTREE_ITER_CACHED|
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&iter) ?:
@ -648,8 +662,8 @@ int bch2_btree_insert_nonextent(struct btree_trans *trans,
return ret;
}
int __bch2_btree_insert(struct btree_trans *trans, enum btree_id id,
struct bkey_i *k, enum btree_update_flags flags)
int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id,
struct bkey_i *k, enum btree_update_flags flags)
{
struct btree_iter iter;
int ret;
@ -667,16 +681,18 @@ int __bch2_btree_insert(struct btree_trans *trans, enum btree_id id,
* bch2_btree_insert - insert keys into the extent btree
* @c: pointer to struct bch_fs
* @id: btree to insert into
* @insert_keys: list of keys to insert
* @hook: insert callback
* @k: key to insert
* @disk_res: must be non-NULL whenever inserting or potentially
* splitting data extents
* @flags: transaction commit flags
*
* Returns: 0 on success, error code on failure
*/
int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
struct bkey_i *k,
struct disk_reservation *disk_res,
u64 *journal_seq, int flags)
int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k,
struct disk_reservation *disk_res, int flags)
{
return bch2_trans_do(c, disk_res, journal_seq, flags,
__bch2_btree_insert(&trans, id, k, 0));
return bch2_trans_do(c, disk_res, NULL, flags,
bch2_btree_insert_trans(trans, id, k, 0));
}
int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter,
@ -714,6 +730,23 @@ int bch2_btree_delete_at_buffered(struct btree_trans *trans,
return bch2_trans_update_buffered(trans, btree, k);
}
int bch2_btree_delete(struct btree_trans *trans,
enum btree_id btree, struct bpos pos,
unsigned update_flags)
{
struct btree_iter iter;
int ret;
bch2_trans_iter_init(trans, &iter, btree, pos,
BTREE_ITER_CACHED|
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&iter) ?:
bch2_btree_delete_at(trans, &iter, update_flags);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
struct bpos start, struct bpos end,
unsigned update_flags,
@ -777,9 +810,7 @@ err:
}
bch2_trans_iter_exit(trans, &iter);
if (!ret && trans_was_restarted(trans, restart_count))
ret = -BCH_ERR_transaction_restart_nested;
return ret;
return ret ?: trans_was_restarted(trans, restart_count);
}
/*
@ -793,7 +824,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
u64 *journal_seq)
{
int ret = bch2_trans_run(c,
bch2_btree_delete_range_trans(&trans, id, start, end,
bch2_btree_delete_range_trans(trans, id, start, end,
update_flags, journal_seq));
if (ret == -BCH_ERR_transaction_restart_nested)
ret = 0;
@ -818,6 +849,7 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
return bch2_trans_update_buffered(trans, btree, k);
}
__printf(2, 0)
static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args)
{
struct printbuf buf = PRINTBUF;
@ -854,6 +886,7 @@ err:
return ret;
}
__printf(3, 0)
static int
__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
va_list args)
@ -865,12 +898,13 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
} else {
ret = bch2_trans_do(c, NULL, NULL,
BTREE_INSERT_LAZY_RW|commit_flags,
__bch2_trans_log_msg(&trans.extra_journal_entries, fmt, args));
__bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args));
}
return ret;
}
__printf(2, 3)
int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...)
{
va_list args;
@ -886,6 +920,7 @@ int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...)
* Use for logging messages during recovery to enable reserved space and avoid
* blocking.
*/
__printf(2, 3)
int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...)
{
va_list args;

View File

@ -4,7 +4,6 @@
#include "btree_iter.h"
#include "journal.h"
#include "journal.h"
struct bch_fs;
struct btree;
@ -58,14 +57,15 @@ int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
unsigned, unsigned);
int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
int bch2_btree_delete_at_buffered(struct btree_trans *, enum btree_id, struct bpos);
int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned);
int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
struct bkey_i *, enum btree_update_flags);
int __bch2_btree_insert(struct btree_trans *, enum btree_id, struct bkey_i *,
int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *,
enum btree_update_flags);
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
struct disk_reservation *, u64 *, int flags);
struct disk_reservation *, int flags);
int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id,
struct bpos, struct bpos, unsigned, u64 *);
@ -114,8 +114,8 @@ void bch2_trans_commit_hook(struct btree_trans *,
struct btree_trans_commit_hook *);
int __bch2_trans_commit(struct btree_trans *, unsigned);
int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
/**
* bch2_trans_commit - insert keys at given iterator positions
@ -145,29 +145,16 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_flags)))
#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \
#define bch2_trans_run(_c, _do) \
({ \
struct btree_trans trans; \
int _ret; \
\
bch2_trans_init(&trans, (_c), 0, 0); \
_ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \
bch2_trans_exit(&trans); \
\
struct btree_trans *trans = bch2_trans_get(_c); \
int _ret = (_do); \
bch2_trans_put(trans); \
_ret; \
})
#define bch2_trans_run(_c, _do) \
({ \
struct btree_trans trans; \
int _ret; \
\
bch2_trans_init(&trans, (_c), 0, 0); \
_ret = (_do); \
bch2_trans_exit(&trans); \
\
_ret; \
})
#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \
bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do))
#define trans_for_each_update(_trans, _i) \
for ((_i) = (_trans)->updates; \

View File

@ -143,10 +143,15 @@ static size_t btree_node_u64s_with_format(struct btree *b,
}
/**
* btree_node_format_fits - check if we could rewrite node with a new format
* bch2_btree_node_format_fits - check if we could rewrite node with a new format
*
* This assumes all keys can pack with the new format -- it just checks if
* the re-packed keys would fit inside the node itself.
* @c: filesystem handle
* @b: btree node to rewrite
* @new_f: bkey format to translate keys to
*
* Returns: true if all re-packed keys will be able to fit in a new node.
*
* Assumes all keys will successfully pack with the new format.
*/
bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
struct bkey_format *new_f)
@ -244,7 +249,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct write_point *wp;
struct btree *b;
BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
struct open_buckets ob = { .nr = 0 };
struct open_buckets obs = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim
@ -257,7 +262,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct btree_alloc *a =
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
ob = a->ob;
obs = a->ob;
bkey_copy(&tmp.k, &a->k);
mutex_unlock(&c->btree_reserve_cache_lock);
goto mem_alloc;
@ -292,7 +297,7 @@ retry:
bkey_btree_ptr_v2_init(&tmp.k);
bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, btree_sectors(c), false);
bch2_open_bucket_get(c, wp, &ob);
bch2_open_bucket_get(c, wp, &obs);
bch2_alloc_sectors_done(c, wp);
mem_alloc:
b = bch2_btree_node_mem_alloc(trans, interior_node);
@ -304,7 +309,7 @@ mem_alloc:
BUG_ON(b->ob.nr);
bkey_copy(&b->key, &tmp.k);
b->ob = ob;
b->ob = obs;
return b;
}
@ -592,12 +597,11 @@ static void btree_update_nodes_written(struct btree_update *as)
{
struct bch_fs *c = as->c;
struct btree *b;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
u64 journal_seq = 0;
unsigned i;
int ret;
bch2_trans_init(&trans, c, 0, 512);
/*
* If we're already in an error state, it might be because a btree node
* was never written, and we might be trying to free that same btree
@ -618,7 +622,7 @@ static void btree_update_nodes_written(struct btree_update *as)
b = as->old_nodes[i];
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
seq = b->data ? b->data->keys.seq : 0;
six_unlock_read(&b->c.lock);
@ -640,13 +644,13 @@ static void btree_update_nodes_written(struct btree_update *as)
* journal reclaim does btree updates when flushing bkey_cached entries,
* which may require allocations as well.
*/
ret = commit_do(&trans, &as->disk_res, &journal_seq,
ret = commit_do(trans, &as->disk_res, &journal_seq,
BCH_WATERMARK_reclaim|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_JOURNAL_RECLAIM,
btree_update_nodes_written_trans(&trans, as));
bch2_trans_unlock(&trans);
btree_update_nodes_written_trans(trans, as));
bch2_trans_unlock(trans);
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
"%s(): error %s", __func__, bch2_err_str(ret));
@ -655,7 +659,7 @@ err:
struct btree_path *path;
b = as->b;
path = get_unlocked_mut_path(&trans, as->btree_id, b->c.level, b->key.k.p);
path = get_unlocked_mut_path(trans, as->btree_id, b->c.level, b->key.k.p);
/*
* @b is the node we did the final insert into:
*
@ -678,13 +682,13 @@ err:
* we may rarely end up with a locked path besides the one we
* have here:
*/
bch2_trans_unlock(&trans);
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent);
mark_btree_node_locked(&trans, path, b->c.level, SIX_LOCK_intent);
bch2_trans_unlock(trans);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
mark_btree_node_locked(trans, path, b->c.level, BTREE_NODE_INTENT_LOCKED);
path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock);
path->l[b->c.level].b = b;
bch2_btree_node_lock_write_nofail(&trans, path, &b->c);
bch2_btree_node_lock_write_nofail(trans, path, &b->c);
mutex_lock(&c->btree_interior_update_lock);
@ -697,15 +701,15 @@ err:
* btree_interior_update_lock:
*/
if (as->b == b) {
struct bset *i = btree_bset_last(b);
BUG_ON(!b->c.level);
BUG_ON(!btree_node_dirty(b));
if (!ret) {
i->journal_seq = cpu_to_le64(
struct bset *last = btree_bset_last(b);
last->journal_seq = cpu_to_le64(
max(journal_seq,
le64_to_cpu(i->journal_seq)));
le64_to_cpu(last->journal_seq)));
bch2_btree_add_journal_pin(c, b, journal_seq);
} else {
@ -724,8 +728,8 @@ err:
six_unlock_write(&b->c.lock);
btree_node_write_if_need(c, b, SIX_LOCK_intent);
btree_node_unlock(&trans, path, b->c.level);
bch2_path_put(&trans, path, true);
btree_node_unlock(trans, path, b->c.level);
bch2_path_put(trans, path, true);
}
bch2_journal_pin_drop(&c->journal, &as->journal);
@ -745,7 +749,7 @@ err:
for (i = 0; i < as->nr_new_nodes; i++) {
b = as->new_nodes[i];
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
}
@ -753,8 +757,8 @@ err:
for (i = 0; i < as->nr_open_buckets; i++)
bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
bch2_btree_update_free(as, &trans);
bch2_trans_exit(&trans);
bch2_btree_update_free(as, trans);
bch2_trans_put(trans);
}
static void btree_interior_update_work(struct work_struct *work)
@ -1216,18 +1220,6 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
bch2_recalc_btree_reserve(c);
}
/**
* bch_btree_set_root - update the root in memory and on disk
*
* To ensure forward progress, the current task must not be holding any
* btree node write locks. However, you must hold an intent lock on the
* old root.
*
* Note: This allocates a journal entry but doesn't add any keys to
* it. All the btree roots are part of every journal write, so there
* is nothing new to be done. This just guarantees that there is a
* journal write.
*/
static void bch2_btree_set_root(struct btree_update *as,
struct btree_trans *trans,
struct btree_path *path,
@ -1341,12 +1333,12 @@ __bch2_btree_insert_keys_interior(struct btree_update *as,
;
while (!bch2_keylist_empty(keys)) {
struct bkey_i *k = bch2_keylist_front(keys);
insert = bch2_keylist_front(keys);
if (bpos_gt(k->k.p, b->key.k.p))
if (bpos_gt(insert->k.p, b->key.k.p))
break;
bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, k);
bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert);
bch2_keylist_pop_front(keys);
}
}
@ -1513,12 +1505,12 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p);
six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, path1, n1->c.level, SIX_LOCK_intent);
mark_btree_node_locked(trans, path1, n1->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, path1, n1);
path2 = get_unlocked_mut_path(trans, path->btree_id, n2->c.level, n2->key.k.p);
six_lock_increment(&n2->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, path2, n2->c.level, SIX_LOCK_intent);
mark_btree_node_locked(trans, path2, n2->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, path2, n2);
/*
@ -1539,7 +1531,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
path2->locks_want++;
BUG_ON(btree_node_locked(path2, n3->c.level));
six_lock_increment(&n3->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, path2, n3->c.level, SIX_LOCK_intent);
mark_btree_node_locked(trans, path2, n3->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, path2, n3);
n3->sib_u64s[0] = U16_MAX;
@ -1563,7 +1555,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p);
six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, path1, n1->c.level, SIX_LOCK_intent);
mark_btree_node_locked(trans, path1, n1->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, path1, n1);
if (parent)
@ -1661,12 +1653,16 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
}
/**
* bch_btree_insert_node - insert bkeys into a given btree node
* bch2_btree_insert_node - insert bkeys into a given btree node
*
* @iter: btree iterator
* @as: btree_update object
* @trans: btree_trans object
* @path: path that points to current node
* @b: node to insert keys into
* @keys: list of keys to insert
* @hook: insert callback
* @persistent: if not null, @persistent will wait on journal write
* @flags: transaction commit flags
*
* Returns: 0 on success, typically transaction restart error on failure
*
* Inserts as many keys as it can into a given btree node, splitting it if full.
* If a split occurred, this function will return early. This can only happen
@ -1890,7 +1886,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p);
six_lock_increment(&n->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
mark_btree_node_locked(trans, new_path, n->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, new_path, n);
bkey_init(&delete.k);
@ -1934,9 +1930,6 @@ err_free_update:
goto out;
}
/**
* bch_btree_node_rewrite - Rewrite/move a btree node
*/
int bch2_btree_node_rewrite(struct btree_trans *trans,
struct btree_iter *iter,
struct btree *b,
@ -1967,7 +1960,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
new_path = get_unlocked_mut_path(trans, iter->btree_id, n->c.level, n->key.k.p);
six_lock_increment(&n->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
mark_btree_node_locked(trans, new_path, n->c.level, BTREE_NODE_INTENT_LOCKED);
bch2_btree_path_level_init(trans, new_path, n);
trace_and_count(c, btree_node_rewrite, c, b);
@ -2055,9 +2048,9 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
int ret;
ret = bch2_trans_do(c, NULL, NULL, 0,
async_btree_node_rewrite_trans(&trans, a));
async_btree_node_rewrite_trans(trans, a));
if (ret)
bch_err(c, "%s: error %s", __func__, bch2_err_str(ret));
bch_err_fn(c, ret);
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
kfree(a);
}
@ -2096,8 +2089,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
ret = bch2_fs_read_write_early(c);
if (ret) {
bch_err(c, "%s: error going read-write: %s",
__func__, bch2_err_str(ret));
bch_err_msg(c, ret, "going read-write");
kfree(a);
return;
}
@ -2372,7 +2364,7 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
{
bch2_trans_run(c, __bch2_btree_root_alloc(&trans, id));
bch2_trans_run(c, __bch2_btree_root_alloc(trans, id));
}
void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)

View File

@ -296,7 +296,7 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
mutex_lock(&wb->flush_lock);
return bch2_trans_run(c,
__bch2_btree_write_buffer_flush(&trans, BTREE_INSERT_NOCHECK_RW, true));
__bch2_btree_write_buffer_flush(trans, BTREE_INSERT_NOCHECK_RW, true));
}
static inline u64 btree_write_buffer_ref(int idx)

View File

@ -680,7 +680,7 @@ static int check_bucket_ref(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
u16 bucket_sectors = !ptr->cached
u32 bucket_sectors = !ptr->cached
? dirty_sectors
: cached_sectors;
struct printbuf buf = PRINTBUF;
@ -752,9 +752,9 @@ static int check_bucket_ref(struct btree_trans *trans,
goto err;
}
if ((unsigned) (bucket_sectors + sectors) > U32_MAX) {
if ((u64) bucket_sectors + sectors > U32_MAX) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
"while marking %s",
ptr->dev, bucket_nr, b_gen,
bch2_data_types[bucket_data_type ?: ptr_data_type],
@ -1201,7 +1201,7 @@ not_found:
new->k.p = bkey_start_pos(p.k);
new->k.p.offset += *idx - start;
bch2_key_resize(&new->k, next_idx - *idx);
ret = __bch2_btree_insert(trans, BTREE_ID_extents, &new->k_i,
ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i,
BTREE_TRIGGER_NORUN);
}
@ -1300,7 +1300,7 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans,
static int warned_disk_usage = 0;
bool warn = false;
unsigned disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
struct replicas_delta *d = deltas->d, *d2;
struct replicas_delta *d, *d2;
struct replicas_delta *top = (void *) deltas->d + deltas->used;
struct bch_fs_usage *dst;
s64 added = 0, should_not_have_added;
@ -1923,7 +1923,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans,
int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca)
{
int ret = bch2_trans_run(c, __bch2_trans_mark_dev_sb(&trans, ca));
int ret = bch2_trans_run(c, __bch2_trans_mark_dev_sb(trans, ca));
if (ret)
bch_err_fn(c, ret);

View File

@ -40,15 +40,42 @@ static inline size_t sector_to_bucket_and_offset(const struct bch_dev *ca, secto
for (_b = (_buckets)->b + (_buckets)->first_bucket; \
_b < (_buckets)->b + (_buckets)->nbuckets; _b++)
/*
* Ugly hack alert:
*
* We need to cram a spinlock in a single byte, because that's what we have left
* in struct bucket, and we care about the size of these - during fsck, we need
* in memory state for every single bucket on every device.
*
* We used to do
* while (xchg(&b->lock, 1) cpu_relax();
* but, it turns out not all architectures support xchg on a single byte.
*
* So now we use bit_spin_lock(), with fun games since we can't burn a whole
* ulong for this - we just need to make sure the lock bit always ends up in the
* first byte.
*/
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define BUCKET_LOCK_BITNR 0
#else
#define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1)
#endif
union ulong_byte_assert {
ulong ulong;
u8 byte;
};
static inline void bucket_unlock(struct bucket *b)
{
smp_store_release(&b->lock, 0);
BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte);
bit_spin_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock);
}
static inline void bucket_lock(struct bucket *b)
{
while (xchg(&b->lock, 1))
cpu_relax();
bit_spin_lock(BUCKET_LOCK_BITNR, (void *) &b->lock);
}
static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca)
@ -180,7 +207,7 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_waterma
switch (watermark) {
case BCH_WATERMARK_NR:
unreachable();
BUG();
case BCH_WATERMARK_stripe:
reserved += ca->mi.nbuckets >> 6;
fallthrough;

View File

@ -133,7 +133,7 @@ retry_rehash:
b->t = n;
kvfree(t);
pr_debug("took %zu rehashes, table at %zu/%zu elements",
pr_debug("took %zu rehashes, table at %zu/%lu elements",
nr_rehashes, nr_elements, 1UL << b->t->bits);
out:
mutex_unlock(&b->lock);

View File

@ -86,10 +86,9 @@ static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
devs[i] = strndup_user((const char __user *)(unsigned long)
user_devs[i],
PATH_MAX);
if (!devs[i]) {
ret = -ENOMEM;
ret= PTR_ERR_OR_ZERO(devs[i]);
if (ret)
goto err;
}
}
c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
@ -117,8 +116,9 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg
return -EINVAL;
path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
if (!path)
return -ENOMEM;
ret = PTR_ERR_OR_ZERO(path);
if (ret)
return ret;
err = bch2_fs_open_incremental(path);
kfree(path);
@ -149,9 +149,10 @@ static long bch2_global_ioctl(unsigned cmd, void __user *arg)
static long bch2_ioctl_query_uuid(struct bch_fs *c,
struct bch_ioctl_query_uuid __user *user_arg)
{
return copy_to_user(&user_arg->uuid,
&c->sb.user_uuid,
sizeof(c->sb.user_uuid));
if (copy_to_user(&user_arg->uuid, &c->sb.user_uuid,
sizeof(c->sb.user_uuid)))
return -EFAULT;
return 0;
}
#if 0
@ -188,8 +189,9 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
return -EINVAL;
path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
if (!path)
return -ENOMEM;
ret = PTR_ERR_OR_ZERO(path);
if (ret)
return ret;
ret = bch2_dev_add(c, path);
kfree(path);
@ -230,8 +232,9 @@ static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
return -EINVAL;
path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
if (!path)
return -ENOMEM;
ret = PTR_ERR_OR_ZERO(path);
if (ret)
return ret;
ret = bch2_dev_online(c, path);
kfree(path);
@ -338,7 +341,10 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
if (len < sizeof(e))
return -EINVAL;
return copy_to_user(buf, &e, sizeof(e)) ?: sizeof(e);
if (copy_to_user(buf, &e, sizeof(e)))
return -EFAULT;
return sizeof(e);
}
static const struct file_operations bcachefs_data_ops = {
@ -417,7 +423,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
return -EFAULT;
arg = kzalloc(sizeof(*arg) + replica_entries_bytes, GFP_KERNEL);
arg = kzalloc(size_add(sizeof(*arg), replica_entries_bytes), GFP_KERNEL);
if (!arg)
return -ENOMEM;
@ -466,9 +472,11 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
percpu_up_read(&c->mark_lock);
kfree(src);
if (!ret)
ret = copy_to_user(user_arg, arg,
sizeof(*arg) + arg->replica_entries_bytes);
if (ret)
goto err;
if (copy_to_user(user_arg, arg,
sizeof(*arg) + arg->replica_entries_bytes))
ret = -EFAULT;
err:
kfree(arg);
return ret;
@ -513,7 +521,10 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
percpu_ref_put(&ca->ref);
return copy_to_user(user_arg, &arg, sizeof(arg));
if (copy_to_user(user_arg, &arg, sizeof(arg)))
return -EFAULT;
return 0;
}
static long bch2_ioctl_read_super(struct bch_fs *c,
@ -550,8 +561,9 @@ static long bch2_ioctl_read_super(struct bch_fs *c,
goto err;
}
ret = copy_to_user((void __user *)(unsigned long)arg.sb,
sb, vstruct_bytes(sb));
if (copy_to_user((void __user *)(unsigned long)arg.sb, sb,
vstruct_bytes(sb)))
ret = -EFAULT;
err:
if (!IS_ERR_OR_NULL(ca))
percpu_ref_put(&ca->ref);
@ -617,6 +629,9 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
arg.pad)
return -EINVAL;
if (arg.nbuckets > U32_MAX)
return -EINVAL;
ca = bch2_device_lookup(c, arg.dev, arg.flags);
if (IS_ERR(ca))
return PTR_ERR(ca);

View File

@ -139,7 +139,7 @@ static inline int do_encrypt(struct crypto_sync_skcipher *tfm,
for (i = 0; i < pages; i++) {
unsigned offset = offset_in_page(buf);
unsigned pg_len = min(len, PAGE_SIZE - offset);
unsigned pg_len = min_t(size_t, len, PAGE_SIZE - offset);
sg_set_page(sg + i, vmalloc_to_page(buf), pg_len, offset);
buf += pg_len;
@ -159,15 +159,16 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
crypto_alloc_sync_skcipher("chacha20", 0, 0);
int ret;
if (!chacha20) {
pr_err("error requesting chacha20 module: %li", PTR_ERR(chacha20));
return PTR_ERR(chacha20);
ret = PTR_ERR_OR_ZERO(chacha20);
if (ret) {
pr_err("error requesting chacha20 cipher: %s", bch2_err_str(ret));
return ret;
}
ret = crypto_skcipher_setkey(&chacha20->base,
(void *) key, sizeof(*key));
if (ret) {
pr_err("crypto_skcipher_setkey() error: %i", ret);
pr_err("error from crypto_skcipher_setkey(): %s", bch2_err_str(ret));
goto err;
}
@ -366,11 +367,11 @@ struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a,
BUG_ON(!bch2_checksum_mergeable(type));
while (b_len) {
unsigned b = min_t(unsigned, b_len, PAGE_SIZE);
unsigned page_len = min_t(unsigned, b_len, PAGE_SIZE);
bch2_checksum_update(&state,
page_address(ZERO_PAGE(0)), b);
b_len -= b;
page_address(ZERO_PAGE(0)), page_len);
b_len -= page_len;
}
a.lo = (__le64 __force) bch2_checksum_final(&state);
a.lo ^= b.lo;
@ -395,9 +396,9 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
unsigned csum_type;
struct bch_csum csum;
} splits[3] = {
{ crc_a, len_a, new_csum_type },
{ crc_b, len_b, new_csum_type },
{ NULL, bio_sectors(bio) - len_a - len_b, new_csum_type },
{ crc_a, len_a, new_csum_type, { 0 }},
{ crc_b, len_b, new_csum_type, { 0 } },
{ NULL, bio_sectors(bio) - len_a - len_b, new_csum_type, { 0 } },
}, *i;
bool mergeable = crc_old.csum_type == new_csum_type &&
bch2_checksum_mergeable(new_csum_type);
@ -558,6 +559,7 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
return ret;
}
#ifndef __KERNEL__
int bch2_revoke_key(struct bch_sb *sb)
{
key_serial_t key_id;
@ -575,6 +577,7 @@ int bch2_revoke_key(struct bch_sb *sb)
return 0;
}
#endif
int bch2_decrypt_sb_key(struct bch_fs *c,
struct bch_sb_field_crypt *crypt,
@ -596,7 +599,7 @@ int bch2_decrypt_sb_key(struct bch_fs *c,
/* decrypt real key: */
ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
&sb_key, sizeof(sb_key));
&sb_key, sizeof(sb_key));
if (ret)
goto err;

View File

@ -40,15 +40,16 @@ struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce,
*/
#define csum_vstruct(_c, _type, _nonce, _i) \
({ \
const void *start = ((const void *) (_i)) + sizeof((_i)->csum); \
const void *end = vstruct_end(_i); \
const void *_start = ((const void *) (_i)) + sizeof((_i)->csum);\
\
bch2_checksum(_c, _type, _nonce, start, end - start); \
bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\
})
int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
int bch2_request_key(struct bch_sb *, struct bch_key *);
#ifndef __KERNEL__
int bch2_revoke_key(struct bch_sb *);
#endif
int bch2_encrypt(struct bch_fs *, unsigned, struct nonce,
void *data, size_t);

View File

@ -3,7 +3,6 @@
#include "checksum.h"
#include "compress.h"
#include "extents.h"
#include "io.h"
#include "super-io.h"
#include <linux/lz4.h>
@ -571,7 +570,6 @@ void bch2_fs_compress_exit(struct bch_fs *c)
static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
{
size_t decompress_workspace_size = 0;
bool decompress_workspace_needed;
ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
c->opts.encoded_extent_max);
struct {
@ -581,7 +579,8 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
size_t decompress_workspace;
} compression_types[] = {
{ BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4,
max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS) },
max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS),
0 },
{ BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
zlib_inflate_workspacesize(), },
@ -620,9 +619,6 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
if (!(features & (1 << i->feature)))
continue;
if (i->decompress_workspace)
decompress_workspace_needed = true;
if (mempool_initialized(&c->compress_workspace[i->type]))
continue;

View File

@ -43,7 +43,7 @@ static void bch2_sb_counters_to_text(struct printbuf *out, struct bch_sb *sb,
prt_tab(out);
prt_printf(out, "%llu", le64_to_cpu(ctrs->d[i]));
prt_newline(out);
};
}
};
int bch2_sb_counters_to_cpu(struct bch_fs *c)

View File

@ -9,7 +9,7 @@
#include "ec.h"
#include "error.h"
#include "extents.h"
#include "io.h"
#include "io_write.h"
#include "keylist.h"
#include "move.h"
#include "nocow_locking.h"
@ -49,10 +49,6 @@ static void trace_move_extent_fail2(struct data_update *m,
if (insert) {
i = 0;
bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) {
struct bkey_s new_s;
new_s.k = (void *) new.k;
new_s.v = (void *) new.v;
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
(ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
!ptr->cached)
@ -307,7 +303,7 @@ out:
int bch2_data_update_index_update(struct bch_write_op *op)
{
return bch2_trans_run(op->c, __bch2_data_update_index_update(&trans, op));
return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op));
}
void bch2_data_update_read_done(struct data_update *m,

View File

@ -4,7 +4,7 @@
#define _BCACHEFS_DATA_UPDATE_H
#include "bkey_buf.h"
#include "io_types.h"
#include "io_write_types.h"
struct moving_context;

View File

@ -19,7 +19,6 @@
#include "extents.h"
#include "fsck.h"
#include "inode.h"
#include "io.h"
#include "super.h"
#include <linux/console.h>
@ -154,10 +153,8 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
BUG_ON(b->nsets != 1);
for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_p_next(k))
if (k->type == KEY_TYPE_btree_ptr_v2) {
struct bch_btree_ptr_v2 *v = (void *) bkeyp_val(&b->format, k);
v->mem_ptr = 0;
}
if (k->type == KEY_TYPE_btree_ptr_v2)
((struct bch_btree_ptr_v2 *) bkeyp_val(&b->format, k))->mem_ptr = 0;
v = c->verify_data;
bkey_copy(&v->key, &b->key);
@ -322,16 +319,16 @@ static ssize_t flush_buf(struct dump_iter *i)
{
if (i->buf.pos) {
size_t bytes = min_t(size_t, i->buf.pos, i->size);
int err = copy_to_user(i->ubuf, i->buf.buf, bytes);
int copied = bytes - copy_to_user(i->ubuf, i->buf.buf, bytes);
if (err)
return err;
i->ret += copied;
i->ubuf += copied;
i->size -= copied;
i->buf.pos -= copied;
memmove(i->buf.buf, i->buf.buf + copied, i->buf.pos);
i->ret += bytes;
i->ubuf += bytes;
i->size -= bytes;
i->buf.pos -= bytes;
memmove(i->buf.buf, i->buf.buf + bytes, i->buf.pos);
if (copied != bytes)
return -EFAULT;
}
return i->size ? 0 : i->ret;
@ -369,7 +366,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iter *i = file->private_data;
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
ssize_t ret;
@ -382,17 +379,17 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
if (ret)
return ret;
bch2_trans_init(&trans, i->c, 0, 0);
ret = for_each_btree_key2(&trans, iter, i->id, i->from,
trans = bch2_trans_get(i->c);
ret = for_each_btree_key2(trans, iter, i->id, i->from,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ({
bch2_bkey_val_to_text(&i->buf, i->c, k);
prt_newline(&i->buf);
drop_locks_do(&trans, flush_buf(i));
drop_locks_do(trans, flush_buf(i));
}));
i->from = iter.pos;
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (!ret)
ret = flush_buf(i);
@ -411,7 +408,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iter *i = file->private_data;
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct btree *b;
ssize_t ret;
@ -427,26 +424,26 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
if (bpos_eq(SPOS_MAX, i->from))
return i->ret;
bch2_trans_init(&trans, i->c, 0, 0);
trans = bch2_trans_get(i->c);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
for_each_btree_node(&trans, iter, i->id, i->from, 0, b, ret) {
for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) {
bch2_btree_node_to_text(&i->buf, i->c, b);
i->from = !bpos_eq(SPOS_MAX, b->key.k.p)
? bpos_successor(b->key.k.p)
: b->key.k.p;
ret = drop_locks_do(&trans, flush_buf(i));
ret = drop_locks_do(trans, flush_buf(i));
if (ret)
break;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (!ret)
ret = flush_buf(i);
@ -465,7 +462,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iter *i = file->private_data;
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
ssize_t ret;
@ -478,9 +475,9 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
if (ret)
return ret;
bch2_trans_init(&trans, i->c, 0, 0);
trans = bch2_trans_get(i->c);
ret = for_each_btree_key2(&trans, iter, i->id, i->from,
ret = for_each_btree_key2(trans, iter, i->id, i->from,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ({
struct btree_path_level *l = &iter.path->l[0];
@ -493,11 +490,11 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
}
bch2_bfloat_to_text(&i->buf, l->b, _k);
drop_locks_do(&trans, flush_buf(i));
drop_locks_do(trans, flush_buf(i));
}));
i->from = iter.pos;
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (!ret)
ret = flush_buf(i);

View File

@ -479,21 +479,19 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
const struct bch_hash_info *hash_info,
const struct qstr *name, subvol_inum *inum)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
int ret;
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = __bch2_dirent_lookup_trans(&trans, &iter, dir, hash_info,
ret = __bch2_dirent_lookup_trans(trans, &iter, dir, hash_info,
name, inum, 0);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (!ret)
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
return ret;
}
@ -522,7 +520,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_s_c_dirent dirent;
@ -533,15 +531,14 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
int ret;
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_dirents,
for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
SPOS(inum.inum, ctx->pos, snapshot),
POS(inum.inum, U64_MAX), 0, k, ret) {
if (k.k->type != KEY_TYPE_dirent)
@ -549,7 +546,7 @@ retry:
dirent = bkey_s_c_to_dirent(k);
ret = bch2_dirent_read_target(&trans, inum, dirent, &target);
ret = bch2_dirent_read_target(trans, inum, dirent, &target);
if (ret < 0)
break;
if (ret)
@ -558,7 +555,7 @@ retry:
/* dir_emit() can fault and block: */
bch2_bkey_buf_reassemble(&sk, c, k);
dirent = bkey_i_to_s_c_dirent(sk.k);
bch2_trans_unlock(&trans);
bch2_trans_unlock(trans);
name = bch2_dirent_get_name(dirent);
@ -574,16 +571,16 @@ retry:
* read_target looks up subvolumes, we can overflow paths if the
* directory has many subvolumes in it
*/
ret = btree_trans_too_many_iters(&trans);
ret = btree_trans_too_many_iters(trans);
if (ret)
break;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
bch2_trans_put(trans);
bch2_bkey_buf_exit(&sk, c);
return ret;

View File

@ -32,21 +32,21 @@ static int bch2_sb_disk_groups_validate(struct bch_sb *sb,
for (i = 0; i < sb->nr_devices; i++) {
struct bch_member *m = mi->members + i;
unsigned g;
unsigned group_id;
if (!BCH_MEMBER_GROUP(m))
continue;
g = BCH_MEMBER_GROUP(m) - 1;
group_id = BCH_MEMBER_GROUP(m) - 1;
if (g >= nr_groups) {
if (group_id >= nr_groups) {
prt_printf(err, "disk %u has invalid label %u (have %u)",
i, g, nr_groups);
i, group_id, nr_groups);
return -BCH_ERR_invalid_sb_disk_groups;
}
if (BCH_GROUP_DELETED(&groups->entries[g])) {
prt_printf(err, "disk %u has deleted label %u", i, g);
if (BCH_GROUP_DELETED(&groups->entries[group_id])) {
prt_printf(err, "disk %u has deleted label %u", i, group_id);
return -BCH_ERR_invalid_sb_disk_groups;
}
}
@ -183,8 +183,7 @@ int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
struct bch_member *m = mi->members + i;
struct bch_disk_group_cpu *dst =
&cpu_g->entries[BCH_MEMBER_GROUP(m)];
struct bch_disk_group_cpu *dst;
if (!bch2_member_exists(m))
continue;

View File

@ -11,10 +11,11 @@
#include "btree_update.h"
#include "btree_write_buffer.h"
#include "buckets.h"
#include "checksum.h"
#include "disk_groups.h"
#include "ec.h"
#include "error.h"
#include "io.h"
#include "io_read.h"
#include "keylist.h"
#include "recovery.h"
#include "replicas.h"
@ -475,7 +476,7 @@ err:
static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
{
return bch2_trans_run(c, get_stripe_key_trans(&trans, idx, stripe));
return bch2_trans_run(c, get_stripe_key_trans(trans, idx, stripe));
}
/* recovery read path: */
@ -787,12 +788,10 @@ static void ec_stripe_delete_work(struct work_struct *work)
{
struct bch_fs *c =
container_of(work, struct bch_fs, ec_stripe_delete_work);
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
int ret;
u64 idx;
bch2_trans_init(&trans, c, 0, 0);
while (1) {
mutex_lock(&c->ec_stripes_heap_lock);
idx = stripe_idx_to_delete(c);
@ -801,15 +800,15 @@ static void ec_stripe_delete_work(struct work_struct *work)
if (!idx)
break;
ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL,
ec_stripe_delete(&trans, idx));
ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
ec_stripe_delete(trans, idx));
if (ret) {
bch_err_fn(c, ret);
break;
}
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
}
@ -998,24 +997,22 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
ret = bch2_btree_write_buffer_flush(&trans);
ret = bch2_btree_write_buffer_flush(trans);
if (ret)
goto err;
for (i = 0; i < nr_data; i++) {
ret = ec_stripe_update_bucket(&trans, s, i);
ret = ec_stripe_update_bucket(trans, s, i);
if (ret)
break;
}
err:
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}
@ -1123,7 +1120,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
ret = bch2_trans_do(c, &s->res, NULL,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL,
ec_stripe_key_update(&trans,
ec_stripe_key_update(trans,
bkey_i_to_stripe(&s->new_stripe.key),
!s->have_existing_stripe));
if (ret) {
@ -1133,8 +1130,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
ret = ec_stripe_update_extents(c, &s->new_stripe);
if (ret) {
bch_err(c, "error creating stripe: error updating pointers: %s",
bch2_err_str(ret));
bch_err_msg(c, ret, "creating stripe: error updating pointers");
goto err;
}
err:
@ -1822,7 +1818,7 @@ void bch2_fs_ec_flush(struct bch_fs *c)
int bch2_stripes_read(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
const struct bch_stripe *s;
@ -1830,9 +1826,7 @@ int bch2_stripes_read(struct bch_fs *c)
unsigned i;
int ret;
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
if (k.k->type != KEY_TYPE_stripe)
continue;
@ -1855,9 +1849,9 @@ int bch2_stripes_read(struct bch_fs *c)
bch2_stripes_heap_insert(c, m, k.k->p.offset);
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);

View File

@ -240,7 +240,7 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s,
bch2_ec_do_stripe_creates(c);
break;
default:
unreachable();
BUG();
}
}

View File

@ -12,8 +12,6 @@ static const char * const bch2_errcode_strs[] = {
NULL
};
#define BCH_ERR_0 0
static unsigned bch2_errcode_parents[] = {
#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = class,
BCH_ERRCODES()
@ -61,3 +59,10 @@ int __bch2_err_class(int err)
return -err;
}
const char *bch2_blk_status_to_str(blk_status_t status)
{
if (status == BLK_STS_REMOVED)
return "device removed";
return blk_status_to_str(status);
}

View File

@ -99,6 +99,7 @@
x(ENOENT, ENOENT_str_hash_set_must_replace) \
x(ENOENT, ENOENT_inode) \
x(ENOENT, ENOENT_not_subvol) \
x(ENOENT, ENOENT_not_directory) \
x(ENOENT, ENOENT_directory_dead) \
x(ENOENT, ENOENT_subvolume) \
x(ENOENT, ENOENT_snapshot_tree) \
@ -218,7 +219,14 @@
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_bad_node) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_incompatible)
x(BCH_ERR_btree_node_read_err, btree_node_read_err_incompatible) \
x(0, nopromote) \
x(BCH_ERR_nopromote, nopromote_may_not) \
x(BCH_ERR_nopromote, nopromote_already_promoted) \
x(BCH_ERR_nopromote, nopromote_unwritten) \
x(BCH_ERR_nopromote, nopromote_congested) \
x(BCH_ERR_nopromote, nopromote_in_flight) \
x(BCH_ERR_nopromote, nopromote_enomem)
enum bch_errcode {
BCH_ERR_START = 2048,
@ -249,4 +257,8 @@ static inline long bch2_err_class(long err)
return err < 0 ? __bch2_err_class(err) : err;
}
#define BLK_STS_REMOVED ((__force blk_status_t)128)
const char *bch2_blk_status_to_str(blk_status_t);
#endif /* _BCACHFES_ERRCODE_H */

View File

@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "error.h"
#include "io.h"
#include "super.h"
#define FSCK_ERR_RATELIMIT_NR 10

View File

@ -8,7 +8,8 @@
#include "fs-io-buffered.h"
#include "fs-io-direct.h"
#include "fs-io-pagecache.h"
#include "io.h"
#include "io_read.h"
#include "io_write.h"
#include <linux/backing-dev.h>
#include <linux/pagemap.h>
@ -269,7 +270,7 @@ void bch2_readahead(struct readahead_control *ractl)
struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_io_opts opts;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct folio *folio;
struct readpages_iter readpages_iter;
int ret;
@ -279,8 +280,6 @@ void bch2_readahead(struct readahead_control *ractl)
ret = readpages_iter_init(&readpages_iter, ractl);
BUG_ON(ret);
bch2_trans_init(&trans, c, 0, 0);
bch2_pagecache_add_get(inode);
while ((folio = readpage_iter_peek(&readpages_iter))) {
@ -299,31 +298,27 @@ void bch2_readahead(struct readahead_control *ractl)
rbio->bio.bi_end_io = bch2_readpages_end_io;
BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
bchfs_read(&trans, rbio, inode_inum(inode),
bchfs_read(trans, rbio, inode_inum(inode),
&readpages_iter);
bch2_trans_unlock(&trans);
bch2_trans_unlock(trans);
}
bch2_pagecache_add_put(inode);
bch2_trans_exit(&trans);
bch2_trans_put(trans);
darray_exit(&readpages_iter.folios);
}
static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio,
subvol_inum inum, struct folio *folio)
{
struct btree_trans trans;
bch2_folio_create(folio, __GFP_NOFAIL);
rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
rbio->bio.bi_iter.bi_sector = folio_sector(folio);
BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
bch2_trans_init(&trans, c, 0, 0);
bchfs_read(&trans, rbio, inum, NULL);
bch2_trans_exit(&trans);
bch2_trans_run(c, (bchfs_read(trans, rbio, inum, NULL), 0));
}
static void bch2_read_single_folio_end_io(struct bio *bio)
@ -694,12 +689,12 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
if (IS_ERR_OR_NULL(folio))
goto err_unlock;
if (folio_test_uptodate(folio))
goto out;
offset = pos - folio_pos(folio);
len = min_t(size_t, len, folio_end_pos(folio) - pos);
if (folio_test_uptodate(folio))
goto out;
/* If we're writing entire folio, don't need to read it in first: */
if (!offset && len == folio_size(folio))
goto out;
@ -800,10 +795,10 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
return copied;
}
static noinline void folios_trunc(folios *folios, struct folio **fi)
static noinline void folios_trunc(folios *fs, struct folio **fi)
{
while (folios->data + folios->nr > fi) {
struct folio *f = darray_pop(folios);
while (fs->data + fs->nr > fi) {
struct folio *f = darray_pop(fs);
folio_unlock(f);
folio_put(f);
@ -817,35 +812,35 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_folio_reservation res;
folios folios;
folios fs;
struct folio **fi, *f;
unsigned copied = 0, f_offset;
u64 end = pos + len, f_pos;
unsigned copied = 0, f_offset, f_copied;
u64 end = pos + len, f_pos, f_len;
loff_t last_folio_pos = inode->v.i_size;
int ret = 0;
BUG_ON(!len);
bch2_folio_reservation_init(c, inode, &res);
darray_init(&folios);
darray_init(&fs);
ret = bch2_filemap_get_contig_folios_d(mapping, pos, end,
FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT,
mapping_gfp_mask(mapping),
&folios);
&fs);
if (ret)
goto out;
BUG_ON(!folios.nr);
BUG_ON(!fs.nr);
f = darray_first(folios);
f = darray_first(fs);
if (pos != folio_pos(f) && !folio_test_uptodate(f)) {
ret = bch2_read_single_folio(f, mapping);
if (ret)
goto out;
}
f = darray_last(folios);
f = darray_last(fs);
end = min(end, folio_end_pos(f));
last_folio_pos = folio_pos(f);
if (end != folio_end_pos(f) && !folio_test_uptodate(f)) {
@ -858,15 +853,15 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
}
}
ret = bch2_folio_set(c, inode_inum(inode), folios.data, folios.nr);
ret = bch2_folio_set(c, inode_inum(inode), fs.data, fs.nr);
if (ret)
goto out;
f_pos = pos;
f_offset = pos - folio_pos(darray_first(folios));
darray_for_each(folios, fi) {
struct folio *f = *fi;
u64 f_len = min(end, folio_end_pos(f)) - f_pos;
f_offset = pos - folio_pos(darray_first(fs));
darray_for_each(fs, fi) {
f = *fi;
f_len = min(end, folio_end_pos(f)) - f_pos;
/*
* XXX: per POSIX and fstests generic/275, on -ENOSPC we're
@ -878,11 +873,11 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
*/
ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len);
if (unlikely(ret)) {
folios_trunc(&folios, fi);
if (!folios.nr)
folios_trunc(&fs, fi);
if (!fs.nr)
goto out;
end = min(end, folio_end_pos(darray_last(folios)));
end = min(end, folio_end_pos(darray_last(fs)));
break;
}
@ -891,18 +886,17 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
}
if (mapping_writably_mapped(mapping))
darray_for_each(folios, fi)
darray_for_each(fs, fi)
flush_dcache_folio(*fi);
f_pos = pos;
f_offset = pos - folio_pos(darray_first(folios));
darray_for_each(folios, fi) {
struct folio *f = *fi;
u64 f_len = min(end, folio_end_pos(f)) - f_pos;
unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
f_offset = pos - folio_pos(darray_first(fs));
darray_for_each(fs, fi) {
f = *fi;
f_len = min(end, folio_end_pos(f)) - f_pos;
f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
if (!f_copied) {
folios_trunc(&folios, fi);
folios_trunc(&fs, fi);
break;
}
@ -911,7 +905,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
pos + copied + f_copied < inode->v.i_size) {
iov_iter_revert(iter, f_copied);
folio_zero_range(f, 0, folio_size(f));
folios_trunc(&folios, fi);
folios_trunc(&fs, fi);
break;
}
@ -919,7 +913,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
copied += f_copied;
if (f_copied != f_len) {
folios_trunc(&folios, fi + 1);
folios_trunc(&fs, fi + 1);
break;
}
@ -938,10 +932,10 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
spin_unlock(&inode->v.i_lock);
f_pos = pos;
f_offset = pos - folio_pos(darray_first(folios));
darray_for_each(folios, fi) {
struct folio *f = *fi;
u64 f_len = min(end, folio_end_pos(f)) - f_pos;
f_offset = pos - folio_pos(darray_first(fs));
darray_for_each(fs, fi) {
f = *fi;
f_len = min(end, folio_end_pos(f)) - f_pos;
if (!folio_test_uptodate(f))
folio_mark_uptodate(f);
@ -954,7 +948,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
inode->ei_last_dirtied = (unsigned long) current;
out:
darray_for_each(folios, fi) {
darray_for_each(fs, fi) {
folio_unlock(*fi);
folio_put(*fi);
}
@ -967,7 +961,7 @@ out:
if (last_folio_pos >= inode->v.i_size)
truncate_pagecache(&inode->v, inode->v.i_size);
darray_exit(&folios);
darray_exit(&fs);
bch2_folio_reservation_put(c, inode, &res);
return copied ?: ret;
@ -1055,8 +1049,6 @@ ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;
}
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(&inode->v);
inode_lock(&inode->v);
ret = generic_write_checks(iocb, from);
@ -1076,7 +1068,6 @@ ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->ki_pos += ret;
unlock:
inode_unlock(&inode->v);
current->backing_dev_info = NULL;
if (ret > 0)
ret = generic_write_sync(iocb, ret);

View File

@ -7,10 +7,12 @@
#include "fs-io.h"
#include "fs-io-direct.h"
#include "fs-io-pagecache.h"
#include "io.h"
#include "io_read.h"
#include "io_write.h"
#include <linux/kthread.h>
#include <linux/pagemap.h>
#include <linux/prefetch.h>
#include <linux/task_io_accounting_ops.h>
/* O_DIRECT reads */
@ -232,23 +234,21 @@ static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum,
u64 offset, u64 size,
unsigned nr_replicas, bool compressed)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
u64 end = offset + size;
u32 snapshot;
bool ret = true;
int err;
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
err = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
err = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (err)
goto err;
for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
SPOS(inum.inum, offset, snapshot),
BTREE_ITER_SLOTS, k, err) {
if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end)))
@ -263,11 +263,11 @@ retry:
}
offset = iter.pos.offset;
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(err, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return err ? false : ret;
}

View File

@ -14,7 +14,7 @@
int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
loff_t start, u64 end,
int fgp_flags, gfp_t gfp,
folios *folios)
folios *fs)
{
struct folio *f;
u64 pos = start;
@ -24,7 +24,7 @@ int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
if ((u64) pos >= (u64) start + (1ULL << 20))
fgp_flags &= ~FGP_CREAT;
ret = darray_make_room_gfp(folios, 1, gfp & GFP_KERNEL);
ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
if (ret)
break;
@ -32,16 +32,16 @@ int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
if (IS_ERR_OR_NULL(f))
break;
BUG_ON(folios->nr && folio_pos(f) != pos);
BUG_ON(fs->nr && folio_pos(f) != pos);
pos = folio_end_pos(f);
darray_push(folios, f);
darray_push(fs, f);
}
if (!folios->nr && !ret && (fgp_flags & FGP_CREAT))
if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
ret = -ENOMEM;
return folios->nr ? 0 : ret;
return fs->nr ? 0 : ret;
}
/* pagecache_block must be held */
@ -73,12 +73,15 @@ int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
return ret;
}
#if 0
/* Useful for debug tracing: */
static const char * const bch2_folio_sector_states[] = {
#define x(n) #n,
BCH_FOLIO_SECTOR_STATE()
#undef x
NULL
};
#endif
static inline enum bch_folio_sector_state
folio_sector_dirty(enum bch_folio_sector_state state)
@ -177,20 +180,20 @@ static void __bch2_folio_set(struct folio *folio,
* extents btree:
*/
int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
struct folio **folios, unsigned nr_folios)
struct folio **fs, unsigned nr_folios)
{
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bch_folio *s;
u64 offset = folio_sector(folios[0]);
u64 offset = folio_sector(fs[0]);
unsigned folio_idx;
u32 snapshot;
bool need_set = false;
int ret;
for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
s = bch2_folio_create(folios[folio_idx], GFP_KERNEL);
s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
if (!s)
return -ENOMEM;
@ -201,22 +204,22 @@ int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
return 0;
folio_idx = 0;
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
SPOS(inum.inum, offset, snapshot),
BTREE_ITER_SLOTS, k, ret) {
unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
unsigned state = bkey_to_sector_state(k);
while (folio_idx < nr_folios) {
struct folio *folio = folios[folio_idx];
struct folio *folio = fs[folio_idx];
u64 folio_start = folio_sector(folio);
u64 folio_end = folio_end_sector(folio);
unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
@ -240,11 +243,11 @@ retry:
}
offset = iter.pos.offset;
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}

View File

@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
#include "clock.h"
@ -16,7 +17,7 @@
#include "fsck.h"
#include "inode.h"
#include "journal.h"
#include "io.h"
#include "io_misc.h"
#include "keylist.h"
#include "quota.h"
#include "reflink.h"
@ -164,7 +165,6 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
#endif
}
/* fsync: */
/*
@ -207,31 +207,29 @@ static inline int range_has_data(struct bch_fs *c, u32 subvol,
struct bpos start,
struct bpos end)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(&trans, subvol, &start.snapshot);
ret = bch2_subvolume_get_snapshot(trans, subvol, &start.snapshot);
if (ret)
goto err;
for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, start, end, 0, k, ret)
for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents, start, end, 0, k, ret)
if (bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k)) {
ret = 1;
break;
}
start = iter.pos;
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}
@ -241,8 +239,8 @@ static int __bch2_truncate_folio(struct bch_inode_info *inode,
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
struct bch_folio *s;
unsigned start_offset = start & (PAGE_SIZE - 1);
unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1;
unsigned start_offset;
unsigned end_offset;
unsigned i;
struct folio *folio;
s64 i_sectors_delta = 0;
@ -391,33 +389,12 @@ static int bch2_extend(struct mnt_idmap *idmap,
return bch2_setattr_nonsize(idmap, inode, iattr);
}
static int bch2_truncate_finish_fn(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
void *p)
{
bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
return 0;
}
static int bch2_truncate_start_fn(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi, void *p)
{
u64 *new_i_size = p;
bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY;
bi->bi_size = *new_i_size;
return 0;
}
int bch2_truncate(struct mnt_idmap *idmap,
int bchfs_truncate(struct mnt_idmap *idmap,
struct bch_inode_info *inode, struct iattr *iattr)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
struct bch_inode_unpacked inode_u;
u64 new_i_size = iattr->ia_size;
s64 i_sectors_delta = 0;
int ret = 0;
@ -466,6 +443,8 @@ int bch2_truncate(struct mnt_idmap *idmap,
if (unlikely(ret < 0))
goto err;
truncate_setsize(&inode->v, iattr->ia_size);
/*
* When extending, we're going to write the new i_size to disk
* immediately so we need to flush anything above the current on disk
@ -487,32 +466,22 @@ int bch2_truncate(struct mnt_idmap *idmap,
if (ret)
goto err;
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode(c, inode, bch2_truncate_start_fn,
&new_i_size, 0);
mutex_unlock(&inode->ei_update_lock);
if (unlikely(ret))
goto err;
truncate_setsize(&inode->v, iattr->ia_size);
ret = bch2_fpunch(c, inode_inum(inode),
round_up(iattr->ia_size, block_bytes(c)) >> 9,
U64_MAX, &i_sectors_delta);
ret = bch2_truncate(c, inode_inum(inode), iattr->ia_size, &i_sectors_delta);
bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
if (unlikely(ret)) {
/*
* If we error here, VFS caches are now inconsistent with btree
*/
set_bit(EI_INODE_ERROR, &inode->ei_flags);
goto err;
}
bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks &&
!bch2_journal_error(&c->journal), c,
"inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks,
inode->ei_inode.bi_sectors);
if (unlikely(ret))
goto err;
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0);
mutex_unlock(&inode->ei_update_lock);
ret = bch2_setattr_nonsize(idmap, inode, iattr);
err:
@ -577,175 +546,33 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
struct bkey_buf copy;
struct btree_trans trans;
struct btree_iter src, dst, del;
loff_t shift, new_size;
u64 src_start;
s64 i_sectors_delta = 0;
int ret = 0;
if ((offset | len) & (block_bytes(c) - 1))
return -EINVAL;
if (insert) {
if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
return -EFBIG;
if (offset >= inode->v.i_size)
return -EINVAL;
src_start = U64_MAX;
shift = len;
} else {
if (offset + len >= inode->v.i_size)
return -EINVAL;
src_start = offset + len;
shift = -len;
}
new_size = inode->v.i_size + shift;
ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
if (ret)
return ret;
if (insert) {
i_size_write(&inode->v, new_size);
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode_size(c, inode, new_size,
ATTR_MTIME|ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
} else {
s64 i_sectors_delta = 0;
if (insert)
i_size_write(&inode->v, inode->v.i_size + len);
ret = bch2_fpunch(c, inode_inum(inode),
offset >> 9, (offset + len) >> 9,
&i_sectors_delta);
bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
ret = bch2_fcollapse_finsert(c, inode_inum(inode), offset >> 9, len >> 9,
insert, &i_sectors_delta);
if (!ret && !insert)
i_size_write(&inode->v, inode->v.i_size - len);
bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
if (ret)
return ret;
}
bch2_bkey_buf_init(&copy);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
bch2_trans_iter_init(&trans, &src, BTREE_ID_extents,
POS(inode->v.i_ino, src_start >> 9),
BTREE_ITER_INTENT);
bch2_trans_copy_iter(&dst, &src);
bch2_trans_copy_iter(&del, &src);
while (ret == 0 ||
bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
struct bkey_i delete;
struct bkey_s_c k;
struct bpos next_pos;
struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
struct bpos atomic_end;
unsigned trigger_flags = 0;
u32 snapshot;
bch2_trans_begin(&trans);
ret = bch2_subvolume_get_snapshot(&trans,
inode->ei_subvol, &snapshot);
if (ret)
continue;
bch2_btree_iter_set_snapshot(&src, snapshot);
bch2_btree_iter_set_snapshot(&dst, snapshot);
bch2_btree_iter_set_snapshot(&del, snapshot);
bch2_trans_begin(&trans);
k = insert
? bch2_btree_iter_peek_prev(&src)
: bch2_btree_iter_peek_upto(&src, POS(inode->v.i_ino, U64_MAX));
if ((ret = bkey_err(k)))
continue;
if (!k.k || k.k->p.inode != inode->v.i_ino)
break;
if (insert &&
bkey_le(k.k->p, POS(inode->v.i_ino, offset >> 9)))
break;
reassemble:
bch2_bkey_buf_reassemble(&copy, c, k);
if (insert &&
bkey_lt(bkey_start_pos(k.k), move_pos))
bch2_cut_front(move_pos, copy.k);
copy.k->k.p.offset += shift >> 9;
bch2_btree_iter_set_pos(&dst, bkey_start_pos(&copy.k->k));
ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end);
if (ret)
continue;
if (!bkey_eq(atomic_end, copy.k->k.p)) {
if (insert) {
move_pos = atomic_end;
move_pos.offset -= shift >> 9;
goto reassemble;
} else {
bch2_cut_back(atomic_end, copy.k);
}
}
bkey_init(&delete.k);
delete.k.p = copy.k->k.p;
delete.k.size = copy.k->k.size;
delete.k.p.offset -= shift >> 9;
bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k));
next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
if (copy.k->k.size != k.k->size) {
/* We might end up splitting compressed extents: */
unsigned nr_ptrs =
bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k));
ret = bch2_disk_reservation_get(c, &disk_res,
copy.k->k.size, nr_ptrs,
BCH_DISK_RESERVATION_NOFAIL);
BUG_ON(ret);
}
ret = bch2_btree_iter_traverse(&del) ?:
bch2_trans_update(&trans, &del, &delete, trigger_flags) ?:
bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?:
bch2_trans_commit(&trans, &disk_res, NULL,
BTREE_INSERT_NOFAIL);
bch2_disk_reservation_put(c, &disk_res);
if (!ret)
bch2_btree_iter_set_pos(&src, next_pos);
}
bch2_trans_iter_exit(&trans, &del);
bch2_trans_iter_exit(&trans, &dst);
bch2_trans_iter_exit(&trans, &src);
bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&copy, c);
if (ret)
return ret;
mutex_lock(&inode->ei_update_lock);
if (!insert) {
i_size_write(&inode->v, new_size);
ret = bch2_write_inode_size(c, inode, new_size,
ATTR_MTIME|ATTR_CTIME);
} else {
/* We need an inode update to update bi_journal_seq for fsync: */
ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
ATTR_MTIME|ATTR_CTIME);
}
mutex_unlock(&inode->ei_update_lock);
return ret;
}
@ -753,16 +580,15 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
u64 start_sector, u64 end_sector)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bpos end_pos = POS(inode->v.i_ino, end_sector);
struct bch_io_opts opts;
int ret = 0;
bch2_inode_opts_get(&opts, c, &inode->ei_inode);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512);
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
POS(inode->v.i_ino, start_sector),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
@ -775,9 +601,9 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
u64 hole_start, hole_end;
u32 snapshot;
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(&trans,
ret = bch2_subvolume_get_snapshot(trans,
inode->ei_subvol, &snapshot);
if (ret)
goto bkey_err;
@ -814,7 +640,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
&hole_start,
&hole_end,
opts.data_replicas, true))
ret = drop_locks_do(&trans,
ret = drop_locks_do(trans,
(bch2_clamp_data_hole(&inode->v,
&hole_start,
&hole_end,
@ -837,7 +663,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
goto bkey_err;
}
ret = bch2_extent_fallocate(&trans, inode_inum(inode), &iter,
ret = bch2_extent_fallocate(trans, inode_inum(inode), &iter,
sectors, opts, &i_sectors_delta,
writepoint_hashed((unsigned long) current));
if (ret)
@ -845,7 +671,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
drop_locks_do(&trans,
drop_locks_do(trans,
(bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0));
bkey_err:
bch2_quota_reservation_put(c, inode, &quota_res);
@ -857,14 +683,14 @@ bkey_err:
struct quota_res quota_res = { 0 };
s64 i_sectors_delta = 0;
bch2_fpunch_at(&trans, &iter, inode_inum(inode),
bch2_fpunch_at(trans, &iter, inode_inum(inode),
end_sector, &i_sectors_delta);
bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
bch2_quota_reservation_put(c, inode, &quota_res);
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
return ret;
}
@ -970,26 +796,24 @@ static int quota_reserve_range(struct bch_inode_info *inode,
u64 start, u64 end)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
u32 snapshot;
u64 sectors = end - start;
u64 pos = start;
int ret;
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(&trans, inode->ei_subvol, &snapshot);
ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot);
if (ret)
goto err;
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, pos, snapshot), 0);
while (!(ret = btree_trans_too_many_iters(&trans)) &&
while (!(ret = btree_trans_too_many_iters(trans)) &&
(k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k &&
!(ret = bkey_err(k))) {
if (bkey_extent_is_allocation(k.k)) {
@ -1001,17 +825,14 @@ retry:
bch2_btree_iter_advance(&iter);
}
pos = iter.pos.offset;
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (ret)
return ret;
return bch2_quota_reservation_add(c, inode, res, sectors, true);
return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true);
}
loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
@ -1104,7 +925,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
{
struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
subvol_inum inum = inode_inum(inode);
@ -1116,15 +937,15 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
if (offset >= isize)
return -ENXIO;
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents,
for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, offset >> 9, snapshot),
POS(inode->v.i_ino, U64_MAX),
0, k, ret) {
@ -1134,12 +955,12 @@ retry:
} else if (k.k->p.offset >> 9 > isize)
break;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (ret)
return ret;
@ -1157,7 +978,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
{
struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
subvol_inum inum = inode_inum(inode);
@ -1169,15 +990,15 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
if (offset >= isize)
return -ENXIO;
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, offset >> 9, snapshot),
BTREE_ITER_SLOTS, k, ret) {
if (k.k->p.inode != inode->v.i_ino) {
@ -1195,12 +1016,12 @@ retry:
offset = max(offset, bkey_start_offset(k.k) << 9);
}
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (ret)
return ret;

View File

@ -6,7 +6,7 @@
#include "buckets.h"
#include "fs.h"
#include "io_types.h"
#include "io_write_types.h"
#include "quota.h"
#include <linux/uio.h>
@ -165,7 +165,7 @@ int __must_check bch2_write_inode_size(struct bch_fs *,
int bch2_fsync(struct file *, loff_t, loff_t, int);
int bch2_truncate(struct mnt_idmap *,
int bchfs_truncate(struct mnt_idmap *,
struct bch_inode_info *, struct iattr *);
long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);

View File

@ -122,7 +122,10 @@ static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
return copy_to_user(arg, &fa, sizeof(fa));
if (copy_to_user(arg, &fa, sizeof(fa)))
return -EFAULT;
return 0;
}
static int fssetxattr_inode_update_fn(struct btree_trans *trans,

View File

@ -5,7 +5,7 @@
/* Inode flags: */
/* bcachefs inode flags -> vfs inode flags: */
static const unsigned bch_flags_to_vfs[] = {
static const __maybe_unused unsigned bch_flags_to_vfs[] = {
[__BCH_INODE_SYNC] = S_SYNC,
[__BCH_INODE_IMMUTABLE] = S_IMMUTABLE,
[__BCH_INODE_APPEND] = S_APPEND,
@ -13,7 +13,7 @@ static const unsigned bch_flags_to_vfs[] = {
};
/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
static const unsigned bch_flags_to_uflags[] = {
static const __maybe_unused unsigned bch_flags_to_uflags[] = {
[__BCH_INODE_SYNC] = FS_SYNC_FL,
[__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL,
[__BCH_INODE_APPEND] = FS_APPEND_FL,
@ -22,7 +22,7 @@ static const unsigned bch_flags_to_uflags[] = {
};
/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
static const unsigned bch_flags_to_xflags[] = {
static const __maybe_unused unsigned bch_flags_to_xflags[] = {
[__BCH_INODE_SYNC] = FS_XFLAG_SYNC,
[__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE,
[__BCH_INODE_APPEND] = FS_XFLAG_APPEND,

View File

@ -19,7 +19,7 @@
#include "fs-io-pagecache.h"
#include "fsck.h"
#include "inode.h"
#include "io.h"
#include "io_read.h"
#include "journal.h"
#include "keylist.h"
#include "quota.h"
@ -82,29 +82,27 @@ int __must_check bch2_write_inode(struct bch_fs *c,
inode_set_fn set,
void *p, unsigned fields)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct bch_inode_unpacked inode_u;
int ret;
bch2_trans_init(&trans, c, 0, 512);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_inode_peek(&trans, &iter, &inode_u, inode_inum(inode),
ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode),
BTREE_ITER_INTENT) ?:
(set ? set(&trans, inode, &inode_u, p) : 0) ?:
bch2_inode_write(&trans, &iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL);
(set ? set(trans, inode, &inode_u, p) : 0) ?:
bch2_inode_write(trans, &iter, &inode_u) ?:
bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
/*
* the btree node lock protects inode->ei_inode, not ei_update_lock;
* this is important for inode updates via bchfs_write_index_update
*/
if (!ret)
bch2_inode_update_after_write(&trans, inode, &inode_u, fields);
bch2_inode_update_after_write(trans, inode, &inode_u, fields);
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
@ -114,7 +112,7 @@ retry:
inode_inum(inode).subvol,
inode_inum(inode).inum);
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret < 0 ? ret : 0;
}
@ -182,7 +180,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
{
struct bch_inode_unpacked inode_u;
struct bch_inode_info *inode;
struct btree_trans trans;
struct btree_trans *trans;
struct bch_subvolume subvol;
int ret;
@ -196,14 +194,14 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
if (!(inode->v.i_state & I_NEW))
return &inode->v;
bch2_trans_init(&trans, c, 8, 0);
ret = lockrestart_do(&trans,
bch2_subvolume_get(&trans, inum.subvol, true, 0, &subvol) ?:
bch2_inode_find_by_inum_trans(&trans, inum, &inode_u));
trans = bch2_trans_get(c);
ret = lockrestart_do(trans,
bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
bch2_inode_find_by_inum_trans(trans, inum, &inode_u));
if (!ret)
bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol);
bch2_trans_exit(&trans);
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
bch2_trans_put(trans);
if (ret) {
iget_failed(&inode->v);
@ -226,7 +224,7 @@ __bch2_create(struct mnt_idmap *idmap,
unsigned flags)
{
struct bch_fs *c = dir->v.i_sb->s_fs_info;
struct btree_trans trans;
struct btree_trans *trans;
struct bch_inode_unpacked dir_u;
struct bch_inode_info *inode, *old;
struct bch_inode_unpacked inode_u;
@ -256,13 +254,11 @@ __bch2_create(struct mnt_idmap *idmap,
if (!(flags & BCH_CREATE_TMPFILE))
mutex_lock(&dir->ei_update_lock);
bch2_trans_init(&trans, c, 8,
2048 + (!(flags & BCH_CREATE_TMPFILE)
? dentry->d_name.len : 0));
trans = bch2_trans_get(c);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_create_trans(&trans,
ret = bch2_create_trans(trans,
inode_inum(dir), &dir_u, &inode_u,
!(flags & BCH_CREATE_TMPFILE)
? &dentry->d_name : NULL,
@ -278,9 +274,9 @@ retry:
inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
inum.inum = inode_u.bi_inum;
ret = bch2_subvolume_get(&trans, inum.subvol, true,
ret = bch2_subvolume_get(trans, inum.subvol, true,
BTREE_ITER_WITH_UPDATES, &subvol) ?:
bch2_trans_commit(&trans, NULL, &journal_seq, 0);
bch2_trans_commit(trans, NULL, &journal_seq, 0);
if (unlikely(ret)) {
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
KEY_TYPE_QUOTA_WARN);
@ -291,13 +287,13 @@ err_before_quota:
}
if (!(flags & BCH_CREATE_TMPFILE)) {
bch2_inode_update_after_write(&trans, dir, &dir_u,
bch2_inode_update_after_write(trans, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
mutex_unlock(&dir->ei_update_lock);
}
bch2_iget5_set(&inode->v, &inum);
bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol);
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
@ -337,7 +333,7 @@ err_before_quota:
unlock_new_inode(&inode->v);
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
err:
posix_acl_release(default_acl);
posix_acl_release(acl);
@ -346,7 +342,7 @@ err_trans:
if (!(flags & BCH_CREATE_TMPFILE))
mutex_unlock(&dir->ei_update_lock);
bch2_trans_exit(&trans);
bch2_trans_put(trans);
make_bad_inode(&inode->v);
iput(&inode->v);
inode = ERR_PTR(ret);
@ -401,26 +397,25 @@ static int __bch2_link(struct bch_fs *c,
struct bch_inode_info *dir,
struct dentry *dentry)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct bch_inode_unpacked dir_u, inode_u;
int ret;
mutex_lock(&inode->ei_update_lock);
bch2_trans_init(&trans, c, 4, 1024);
ret = commit_do(&trans, NULL, NULL, 0,
bch2_link_trans(&trans,
ret = commit_do(trans, NULL, NULL, 0,
bch2_link_trans(trans,
inode_inum(dir), &dir_u,
inode_inum(inode), &inode_u,
&dentry->d_name));
if (likely(!ret)) {
bch2_inode_update_after_write(&trans, dir, &dir_u,
bch2_inode_update_after_write(trans, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_CTIME);
bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME);
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
mutex_unlock(&inode->ei_update_lock);
return ret;
}
@ -451,24 +446,23 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
struct bch_inode_info *dir = to_bch_ei(vdir);
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct bch_inode_unpacked dir_u, inode_u;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
int ret;
bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
bch2_trans_init(&trans, c, 4, 1024);
ret = commit_do(&trans, NULL, NULL,
ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL,
bch2_unlink_trans(&trans,
bch2_unlink_trans(trans,
inode_inum(dir), &dir_u,
&inode_u, &dentry->d_name,
deleting_snapshot));
if (unlikely(ret))
goto err;
bch2_inode_update_after_write(&trans, dir, &dir_u,
bch2_inode_update_after_write(trans, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
bch2_inode_update_after_write(&trans, inode, &inode_u,
bch2_inode_update_after_write(trans, inode, &inode_u,
ATTR_MTIME);
if (inode_u.bi_subvol) {
@ -479,8 +473,8 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
set_nlink(&inode->v, 0);
}
err:
bch2_trans_exit(&trans);
bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
bch2_trans_put(trans);
return ret;
}
@ -543,7 +537,7 @@ static int bch2_rename2(struct mnt_idmap *idmap,
struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
struct bch_inode_unpacked dst_dir_u, src_dir_u;
struct bch_inode_unpacked src_inode_u, dst_inode_u;
struct btree_trans trans;
struct btree_trans *trans;
enum bch_rename_mode mode = flags & RENAME_EXCHANGE
? BCH_RENAME_EXCHANGE
: dst_dentry->d_inode
@ -560,7 +554,7 @@ static int bch2_rename2(struct mnt_idmap *idmap,
return ret;
}
bch2_trans_init(&trans, c, 8, 2048);
trans = bch2_trans_get(c);
bch2_lock_inodes(INODE_UPDATE_LOCK,
src_dir,
@ -587,8 +581,8 @@ static int bch2_rename2(struct mnt_idmap *idmap,
goto err;
}
ret = commit_do(&trans, NULL, NULL, 0,
bch2_rename_trans(&trans,
ret = commit_do(trans, NULL, NULL, 0,
bch2_rename_trans(trans,
inode_inum(src_dir), &src_dir_u,
inode_inum(dst_dir), &dst_dir_u,
&src_inode_u,
@ -603,21 +597,21 @@ static int bch2_rename2(struct mnt_idmap *idmap,
BUG_ON(dst_inode &&
dst_inode->v.i_ino != dst_inode_u.bi_inum);
bch2_inode_update_after_write(&trans, src_dir, &src_dir_u,
bch2_inode_update_after_write(trans, src_dir, &src_dir_u,
ATTR_MTIME|ATTR_CTIME);
if (src_dir != dst_dir)
bch2_inode_update_after_write(&trans, dst_dir, &dst_dir_u,
bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u,
ATTR_MTIME|ATTR_CTIME);
bch2_inode_update_after_write(&trans, src_inode, &src_inode_u,
bch2_inode_update_after_write(trans, src_inode, &src_inode_u,
ATTR_CTIME);
if (dst_inode)
bch2_inode_update_after_write(&trans, dst_inode, &dst_inode_u,
bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u,
ATTR_CTIME);
err:
bch2_trans_exit(&trans);
bch2_trans_put(trans);
bch2_fs_quota_transfer(c, src_inode,
bch_qid(&src_inode->ei_inode),
@ -680,7 +674,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_qid qid;
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter inode_iter = { NULL };
struct bch_inode_unpacked inode_u;
struct posix_acl *acl = NULL;
@ -701,13 +695,13 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap,
if (ret)
goto err;
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
kfree(acl);
acl = NULL;
ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode),
ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
BTREE_ITER_INTENT);
if (ret)
goto btree_err;
@ -715,29 +709,29 @@ retry:
bch2_setattr_copy(idmap, inode, &inode_u, attr);
if (attr->ia_valid & ATTR_MODE) {
ret = bch2_acl_chmod(&trans, inode_inum(inode), &inode_u,
ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u,
inode_u.bi_mode, &acl);
if (ret)
goto btree_err;
}
ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL, NULL,
ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
btree_err:
bch2_trans_iter_exit(&trans, &inode_iter);
bch2_trans_iter_exit(trans, &inode_iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (unlikely(ret))
goto err_trans;
bch2_inode_update_after_write(&trans, inode, &inode_u, attr->ia_valid);
bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid);
if (acl)
set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
err_trans:
bch2_trans_exit(&trans);
bch2_trans_put(trans);
err:
mutex_unlock(&inode->ei_update_lock);
@ -798,7 +792,7 @@ static int bch2_setattr(struct mnt_idmap *idmap,
return ret;
return iattr->ia_valid & ATTR_SIZE
? bch2_truncate(idmap, inode, iattr)
? bchfs_truncate(idmap, inode, iattr)
: bch2_setattr_nonsize(idmap, inode, iattr);
}
@ -879,7 +873,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
{
struct bch_fs *c = vinode->i_sb->s_fs_info;
struct bch_inode_info *ei = to_bch_ei(vinode);
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_buf cur, prev;
@ -900,18 +894,18 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
bch2_bkey_buf_init(&cur);
bch2_bkey_buf_init(&prev);
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(&trans, ei->ei_subvol, &snapshot);
ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot);
if (ret)
goto err;
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(ei->v.i_ino, start, snapshot), 0);
while (!(ret = btree_trans_too_many_iters(&trans)) &&
while (!(ret = btree_trans_too_many_iters(trans)) &&
(k = bch2_btree_iter_peek_upto(&iter, end)).k &&
!(ret = bkey_err(k))) {
enum btree_id data_btree = BTREE_ID_extents;
@ -928,7 +922,7 @@ retry:
bch2_bkey_buf_reassemble(&cur, c, k);
ret = bch2_read_indirect_extent(&trans, &data_btree,
ret = bch2_read_indirect_extent(trans, &data_btree,
&offset_into_extent, &cur);
if (ret)
break;
@ -947,7 +941,7 @@ retry:
cur.k->k.p.offset += cur.k->k.size;
if (have_extent) {
bch2_trans_unlock(&trans);
bch2_trans_unlock(trans);
ret = bch2_fill_extent(c, info,
bkey_i_to_s_c(prev.k), 0);
if (ret)
@ -961,18 +955,18 @@ retry:
POS(iter.pos.inode, iter.pos.offset + sectors));
}
start = iter.pos.offset;
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (!ret && have_extent) {
bch2_trans_unlock(&trans);
bch2_trans_unlock(trans);
ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
FIEMAP_EXTENT_LAST);
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
bch2_bkey_buf_exit(&cur, c);
bch2_bkey_buf_exit(&prev, c);
return ret < 0 ? ret : 0;
@ -1230,7 +1224,7 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child
struct bch_inode_info *inode = to_bch_ei(child->d_inode);
struct bch_inode_info *dir = to_bch_ei(parent->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter1;
struct btree_iter iter2;
struct bkey_s_c k;
@ -1245,23 +1239,23 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child
if (!S_ISDIR(dir->v.i_mode))
return -EINVAL;
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
bch2_trans_iter_init(&trans, &iter1, BTREE_ID_dirents,
bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents,
POS(dir->ei_inode.bi_inum, 0), 0);
bch2_trans_iter_init(&trans, &iter2, BTREE_ID_dirents,
bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents,
POS(dir->ei_inode.bi_inum, 0), 0);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(&trans, dir->ei_subvol, &snapshot);
ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot);
if (ret)
goto err;
bch2_btree_iter_set_snapshot(&iter1, snapshot);
bch2_btree_iter_set_snapshot(&iter2, snapshot);
ret = bch2_inode_find_by_inum_trans(&trans, inode_inum(inode), &inode_u);
ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u);
if (ret)
goto err;
@ -1279,7 +1273,7 @@ retry:
}
d = bkey_s_c_to_dirent(k);
ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
if (ret > 0)
ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
if (ret)
@ -1301,7 +1295,7 @@ retry:
continue;
d = bkey_s_c_to_dirent(k);
ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
if (ret < 0)
break;
if (ret)
@ -1325,9 +1319,9 @@ err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_iter_exit(&trans, &iter1);
bch2_trans_iter_exit(&trans, &iter2);
bch2_trans_exit(&trans);
bch2_trans_iter_exit(trans, &iter1);
bch2_trans_iter_exit(trans, &iter2);
bch2_trans_put(trans);
return ret;
}
@ -1661,7 +1655,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
up_write(&c->state_lock);
}
if (opts.errors >= 0)
if (opt_defined(opts, errors))
c->opts.errors = opts.errors;
err:
return bch2_err_class(ret);
@ -1722,6 +1716,35 @@ static void bch2_put_super(struct super_block *sb)
__bch2_fs_stop(c);
}
/*
* bcachefs doesn't currently integrate intwrite freeze protection but the
* internal write references serve the same purpose. Therefore reuse the
* read-only transition code to perform the quiesce. The caveat is that we don't
* currently have the ability to block tasks that want a write reference while
* the superblock is frozen. This is fine for now, but we should either add
* blocking support or find a way to integrate sb_start_intwrite() and friends.
*/
static int bch2_freeze(struct super_block *sb)
{
struct bch_fs *c = sb->s_fs_info;
down_write(&c->state_lock);
bch2_fs_read_only(c);
up_write(&c->state_lock);
return 0;
}
static int bch2_unfreeze(struct super_block *sb)
{
struct bch_fs *c = sb->s_fs_info;
int ret;
down_write(&c->state_lock);
ret = bch2_fs_read_write(c);
up_write(&c->state_lock);
return ret;
}
static const struct super_operations bch_super_operations = {
.alloc_inode = bch2_alloc_inode,
.destroy_inode = bch2_destroy_inode,
@ -1733,10 +1756,8 @@ static const struct super_operations bch_super_operations = {
.show_options = bch2_show_options,
.remount_fs = bch2_remount,
.put_super = bch2_put_super,
#if 0
.freeze_fs = bch2_freeze,
.unfreeze_fs = bch2_unfreeze,
#endif
};
static int bch2_set_super(struct super_block *s, void *data)
@ -1890,7 +1911,7 @@ got_sb:
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
ret = PTR_ERR_OR_ZERO(vinode);
if (ret) {
bch_err(c, "error mounting: error getting root inode: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "mounting: error getting root inode");
goto err_put_super;
}

View File

@ -197,7 +197,7 @@ int bch2_vfs_init(void);
#else
#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) do {} while (0)
#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
snapshot_id_list *s) {}

View File

@ -80,7 +80,7 @@ static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot,
if (!ret)
*subvol = le32_to_cpu(s.subvol);
else if (bch2_err_matches(ret, ENOENT))
bch_err(trans->c, "snapshot %u not fonud", snapshot);
bch_err(trans->c, "snapshot %u not found", snapshot);
return ret;
}
@ -127,8 +127,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
ret = bch2_inode_unpack(k, inode);
err:
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err(trans->c, "error fetching inode %llu: %s",
inode_nr, bch2_err_str(ret));
bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@ -154,8 +153,7 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
*snapshot = iter.pos.snapshot;
err:
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err(trans->c, "error fetching inode %llu:%u: %s",
inode_nr, *snapshot, bch2_err_str(ret));
bch_err_msg(trans->c, ret, "fetching inode %llu:%u", inode_nr, *snapshot);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@ -206,17 +204,16 @@ static int __write_inode(struct btree_trans *trans,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
}
static int write_inode(struct btree_trans *trans,
struct bch_inode_unpacked *inode,
u32 snapshot)
static int fsck_write_inode(struct btree_trans *trans,
struct bch_inode_unpacked *inode,
u32 snapshot)
{
int ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
__write_inode(trans, inode, snapshot));
if (ret)
bch_err(trans->c, "error in fsck: error updating inode: %s",
bch2_err_str(ret));
bch_err_fn(trans->c, ret);
return ret;
}
@ -278,13 +275,13 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol,
}
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err(c, "error looking up lost+found: %s", bch2_err_str(ret));
bch_err_fn(c, ret);
if (ret)
return ret;
if (d_type != DT_DIR) {
bch_err(c, "error looking up lost+found: not a directory");
return ret;
return -BCH_ERR_ENOENT_not_directory;
}
/*
@ -301,7 +298,7 @@ create_lostfound:
0, 0, S_IFDIR|0700, 0, NULL, NULL,
(subvol_inum) { }, 0);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err(c, "error creating lost+found: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "creating lost+found");
return ret;
}
@ -365,8 +362,7 @@ static int reattach_inode(struct btree_trans *trans,
BTREE_INSERT_NOFAIL,
__reattach_inode(trans, inode, inode_snapshot));
if (ret) {
bch_err(trans->c, "error reattaching inode %llu: %s",
inode->bi_inum, bch2_err_str(ret));
bch_err_msg(trans->c, ret, "reattaching inode %llu", inode->bi_inum);
return ret;
}
@ -475,7 +471,12 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
* key_visible_in_snapshot - returns true if @id is a descendent of @ancestor,
* and @ancestor hasn't been overwritten in @seen
*
* That is, returns whether key in @ancestor snapshot is visible in @id snapshot
* @c: filesystem handle
* @seen: list of snapshot ids already seen at current position
* @id: descendent snapshot id
* @ancestor: ancestor snapshot id
*
* Returns: whether key in @ancestor snapshot is visible in @id snapshot
*/
static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen,
u32 id, u32 ancestor)
@ -520,14 +521,16 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see
* snapshot id @dst, test whether there is some snapshot in which @dst is
* visible.
*
* This assumes we're visiting @src keys in natural key order.
* @c: filesystem handle
* @s: list of snapshot IDs already seen at @src
* @src: snapshot ID of src key
* @dst: snapshot ID of dst key
* Returns: true if there is some snapshot in which @dst is visible
*
* @s - list of snapshot IDs already seen at @src
* @src - snapshot ID of src key
* @dst - snapshot ID of dst key
* Assumes we're visiting @src keys in natural key order
*/
static int ref_visible(struct bch_fs *c, struct snapshots_seen *s,
u32 src, u32 dst)
static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s,
u32 src, u32 dst)
{
return dst <= src
? key_visible_in_snapshot(c, s, dst, src)
@ -618,10 +621,7 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
w->first_this_inode = true;
if (trans_was_restarted(trans, restart_count))
return -BCH_ERR_transaction_restart_nested;
return 0;
return trans_was_restarted(trans, restart_count);
}
static struct inode_walker_entry *
@ -822,7 +822,7 @@ bad_hash:
bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err(c, "hash_redo_key err %s", bch2_err_str(ret));
bch_err_fn(c, ret);
if (ret)
return ret;
ret = -BCH_ERR_transaction_restart_nested;
@ -886,7 +886,8 @@ static int check_inode(struct btree_trans *trans,
ret = __write_inode(trans, &u, iter->pos.snapshot);
if (ret) {
bch_err_msg(c, ret, "in fsck: error updating inode");
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err_msg(c, ret, "in fsck updating inode");
return ret;
}
@ -904,8 +905,7 @@ static int check_inode(struct btree_trans *trans,
ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err(c, "error in fsck: error while deleting inode: %s",
bch2_err_str(ret));
bch_err_msg(c, ret, "in fsck deleting inode");
return ret;
}
@ -928,8 +928,7 @@ static int check_inode(struct btree_trans *trans,
POS(u.bi_inum, U64_MAX),
0, NULL);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err(c, "error in fsck: error truncating inode: %s",
bch2_err_str(ret));
bch_err_msg(c, ret, "in fsck truncating inode");
if (ret)
return ret;
@ -954,8 +953,7 @@ static int check_inode(struct btree_trans *trans,
sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot);
if (sectors < 0) {
bch_err(c, "error in fsck: error recounting inode sectors: %s",
bch2_err_str(sectors));
bch_err_msg(c, sectors, "fsck recounting inode sectors");
return sectors;
}
@ -974,13 +972,13 @@ static int check_inode(struct btree_trans *trans,
if (do_update) {
ret = __write_inode(trans, &u, iter->pos.snapshot);
if (ret) {
bch_err_msg(c, ret, "in fsck: error updating inode");
bch_err_msg(c, ret, "in fsck updating inode");
return ret;
}
}
err:
fsck_err:
if (ret)
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err_fn(c, ret);
return ret;
}
@ -989,7 +987,7 @@ noinline_for_stack
int bch2_check_inodes(struct bch_fs *c)
{
bool full = c->opts.fsck;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bch_inode_unpacked prev = { 0 };
struct snapshots_seen s;
@ -997,16 +995,15 @@ int bch2_check_inodes(struct bch_fs *c)
int ret;
snapshots_seen_init(&s);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes,
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_inode(&trans, &iter, k, &prev, &s, full));
check_inode(trans, &iter, k, &prev, &s, full));
bch2_trans_exit(&trans);
snapshots_seen_exit(&s);
bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
return ret;
@ -1081,7 +1078,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
w->last_pos.inode, i->snapshot,
i->inode.bi_sectors, i->count)) {
i->inode.bi_sectors = i->count;
ret = write_inode(trans, &i->inode, i->snapshot);
ret = fsck_write_inode(trans, &i->inode, i->snapshot);
if (ret)
break;
}
@ -1089,9 +1086,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
fsck_err:
if (ret)
bch_err_fn(c, ret);
if (!ret && trans_was_restarted(trans, restart_count))
ret = -BCH_ERR_transaction_restart_nested;
return ret;
return ret ?: trans_was_restarted(trans, restart_count);
}
struct extent_end {
@ -1441,7 +1436,7 @@ int bch2_check_extents(struct bch_fs *c)
{
struct inode_walker w = inode_walker_init();
struct snapshots_seen s;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct extent_ends extent_ends;
@ -1450,23 +1445,22 @@ int bch2_check_extents(struct bch_fs *c)
snapshots_seen_init(&s);
extent_ends_init(&extent_ends);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_extents,
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_extents,
POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
&res, NULL,
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
bch2_disk_reservation_put(c, &res);
check_extent(&trans, &iter, k, &w, &s, &extent_ends);
check_extent(trans, &iter, k, &w, &s, &extent_ends);
})) ?:
check_i_sectors(&trans, &w);
check_i_sectors(trans, &w);
bch2_disk_reservation_put(c, &res);
extent_ends_exit(&extent_ends);
inode_walker_exit(&w);
bch2_trans_exit(&trans);
snapshots_seen_exit(&s);
bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
@ -1501,7 +1495,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
"directory %llu:%u with wrong i_nlink: got %u, should be %llu",
w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
i->inode.bi_nlink = i->count;
ret = write_inode(trans, &i->inode, i->snapshot);
ret = fsck_write_inode(trans, &i->inode, i->snapshot);
if (ret)
break;
}
@ -1509,9 +1503,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
fsck_err:
if (ret)
bch_err_fn(c, ret);
if (!ret && trans_was_restarted(trans, restart_count))
ret = -BCH_ERR_transaction_restart_nested;
return ret;
return ret ?: trans_was_restarted(trans, restart_count);
}
static int check_dirent_target(struct btree_trans *trans,
@ -1809,23 +1801,22 @@ int bch2_check_dirents(struct bch_fs *c)
struct inode_walker target = inode_walker_init();
struct snapshots_seen s;
struct bch_hash_info hash_info;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
snapshots_seen_init(&s);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_dirents,
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_dirents,
POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
k,
NULL, NULL,
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_dirent(&trans, &iter, k, &hash_info, &dir, &target, &s));
check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s));
bch2_trans_exit(&trans);
bch2_trans_put(trans);
snapshots_seen_exit(&s);
inode_walker_exit(&dir);
inode_walker_exit(&target);
@ -1879,23 +1870,18 @@ int bch2_check_xattrs(struct bch_fs *c)
{
struct inode_walker inode = inode_walker_init();
struct bch_hash_info hash_info;
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs,
ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
k,
NULL, NULL,
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_xattr(&trans, &iter, k, &hash_info, &inode));
bch2_trans_exit(&trans);
check_xattr(trans, &iter, k, &hash_info, &inode)));
if (ret)
bch_err_fn(c, ret);
return ret;
@ -1927,10 +1913,10 @@ static int check_root_trans(struct btree_trans *trans)
ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
__bch2_btree_insert(trans, BTREE_ID_subvolumes,
bch2_btree_insert_trans(trans, BTREE_ID_subvolumes,
&root_subvol.k_i, 0));
if (ret) {
bch_err(c, "error writing root subvol: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "writing root subvol");
goto err;
}
@ -1949,7 +1935,7 @@ static int check_root_trans(struct btree_trans *trans)
ret = __write_inode(trans, &root_inode, snapshot);
if (ret)
bch_err(c, "error writing root inode: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "writing root inode");
}
err:
fsck_err:
@ -1964,7 +1950,7 @@ int bch2_check_root(struct bch_fs *c)
ret = bch2_trans_do(c, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
check_root_trans(&trans));
check_root_trans(trans));
if (ret)
bch_err_fn(c, ret);
@ -2116,16 +2102,14 @@ fsck_err:
*/
int bch2_check_directory_structure(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_inode_unpacked u;
pathbuf path = { 0, };
int ret;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
@ -2142,12 +2126,12 @@ int bch2_check_directory_structure(struct bch_fs *c)
if (u.bi_flags & BCH_INODE_UNLINKED)
continue;
ret = check_path(&trans, &path, &u, iter.pos.snapshot);
ret = check_path(trans, &path, &u, iter.pos.snapshot);
if (ret)
break;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
darray_exit(&path);
if (ret)
@ -2155,8 +2139,6 @@ int bch2_check_directory_structure(struct bch_fs *c)
return ret;
}
/* check_nlink pass: */
struct nlink_table {
size_t nr;
size_t size;
@ -2238,15 +2220,13 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
struct nlink_table *t,
u64 start, u64 *end)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct bch_inode_unpacked u;
int ret = 0;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
for_each_btree_key(&trans, iter, BTREE_ID_inodes,
for_each_btree_key(trans, iter, BTREE_ID_inodes,
POS(0, start),
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
@ -2275,8 +2255,8 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
}
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
if (ret)
bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
@ -2288,7 +2268,7 @@ noinline_for_stack
static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
u64 range_start, u64 range_end)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct snapshots_seen s;
struct btree_iter iter;
struct bkey_s_c k;
@ -2297,9 +2277,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
snapshots_seen_init(&s);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN,
for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN,
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
@ -2319,12 +2297,12 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
break;
}
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
if (ret)
bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
bch2_trans_exit(&trans);
bch2_trans_put(trans);
snapshots_seen_exit(&s);
return ret;
}
@ -2375,22 +2353,17 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
struct nlink_table *links,
u64 range_start, u64 range_end)
{
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
size_t idx = 0;
int ret = 0;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes,
POS(0, range_start),
BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_nlinks_update_inode(&trans, &iter, k, links, &idx, range_end));
bch2_trans_exit(&trans);
ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
POS(0, range_start),
BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end)));
if (ret < 0) {
bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
return ret;
@ -2472,13 +2445,12 @@ int bch2_fix_reflink_p(struct bch_fs *c)
return 0;
ret = bch2_trans_run(c,
for_each_btree_key_commit(&trans, iter,
for_each_btree_key_commit(trans, iter,
BTREE_ID_extents, POS_MIN,
BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k,
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
fix_reflink_p_key(&trans, &iter, k)));
fix_reflink_p_key(trans, &iter, k)));
if (ret)
bch_err_fn(c, ret);
return ret;

View File

@ -120,8 +120,7 @@ static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed,
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
struct bch_inode_unpacked unpacked;
int ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i),
&unpacked);
ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i), &unpacked);
BUG_ON(ret);
BUG_ON(unpacked.bi_inum != inode->bi_inum);
BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed);
@ -318,7 +317,7 @@ int bch2_inode_unpack(struct bkey_s_c k,
return bch2_inode_unpack_slowpath(k, unpacked);
}
int bch2_inode_peek(struct btree_trans *trans,
static int bch2_inode_peek_nowarn(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *inode,
subvol_inum inum, unsigned flags)
@ -349,7 +348,17 @@ int bch2_inode_peek(struct btree_trans *trans,
return 0;
err:
bch2_trans_iter_exit(trans, iter);
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
return ret;
}
int bch2_inode_peek(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *inode,
subvol_inum inum, unsigned flags)
{
int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err_msg(trans->c, ret, "looking up inum %u:%llu:", inum.subvol, inum.inum);
return ret;
}
@ -817,7 +826,7 @@ err:
int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct bkey_i_inode_generation delete;
struct bch_inode_unpacked inode_u;
@ -825,8 +834,6 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
u32 snapshot;
int ret;
bch2_trans_init(&trans, c, 0, 1024);
/*
* If this was a directory, there shouldn't be any real dirents left -
* but there could be whiteouts (from hash collisions) that we should
@ -835,19 +842,19 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
* XXX: the dirent could ideally would delete whiteouts when they're no
* longer needed
*/
ret = bch2_inode_delete_keys(&trans, inum, BTREE_ID_extents) ?:
bch2_inode_delete_keys(&trans, inum, BTREE_ID_xattrs) ?:
bch2_inode_delete_keys(&trans, inum, BTREE_ID_dirents);
ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?:
bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?:
bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents);
if (ret)
goto err;
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
k = bch2_bkey_get_iter(&trans, &iter, BTREE_ID_inodes,
k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
SPOS(0, inum.inum, snapshot),
BTREE_ITER_INTENT|BTREE_ITER_CACHED);
ret = bkey_err(k);
@ -855,7 +862,7 @@ retry:
goto err;
if (!bkey_is_inode(k.k)) {
bch2_fs_inconsistent(trans.c,
bch2_fs_inconsistent(c,
"inode %llu:%u not found when deleting",
inum.inum, snapshot);
ret = -EIO;
@ -868,15 +875,28 @@ retry:
delete.k.p = iter.pos;
delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
ret = bch2_trans_update(&trans, &iter, &delete.k_i, 0) ?:
bch2_trans_commit(&trans, NULL, NULL,
ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
err:
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}
int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans,
subvol_inum inum,
struct bch_inode_unpacked *inode)
{
struct btree_iter iter;
int ret;
ret = bch2_inode_peek_nowarn(trans, &iter, inode, inum, 0);
if (!ret)
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@ -897,7 +917,7 @@ int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
struct bch_inode_unpacked *inode)
{
return bch2_trans_do(c, NULL, NULL, 0,
bch2_inode_find_by_inum_trans(&trans, inum, inode));
bch2_inode_find_by_inum_trans(trans, inum, inode));
}
int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
@ -1069,14 +1089,12 @@ delete:
int bch2_delete_dead_inodes(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
int ret;
bch2_trans_init(&trans, c, 0, 0);
ret = bch2_btree_write_buffer_flush_sync(&trans);
ret = bch2_btree_write_buffer_flush_sync(trans);
if (ret)
goto err;
@ -1086,26 +1104,26 @@ int bch2_delete_dead_inodes(struct bch_fs *c)
* but we can't retry because the btree write buffer won't have been
* flushed and we'd spin:
*/
for_each_btree_key(&trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
ret = lockrestart_do(&trans, may_delete_deleted_inode(&trans, k.k->p));
ret = lockrestart_do(trans, may_delete_deleted_inode(trans, k.k->p));
if (ret < 0)
break;
if (ret) {
if (!test_bit(BCH_FS_RW, &c->flags)) {
bch2_trans_unlock(&trans);
bch2_trans_unlock(trans);
bch2_fs_lazy_rw(c);
}
ret = bch2_inode_rm_snapshot(&trans, k.k->p.offset, k.k->p.snapshot);
ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
break;
}
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}

View File

@ -118,6 +118,9 @@ int bch2_inode_create(struct btree_trans *, struct btree_iter *,
int bch2_inode_rm(struct bch_fs *, subvol_inum);
int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *,
subvol_inum,
struct bch_inode_unpacked *);
int bch2_inode_find_by_inum_trans(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *);
int bch2_inode_find_by_inum(struct bch_fs *, subvol_inum,

View File

@ -1,202 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_IO_H
#define _BCACHEFS_IO_H
#include "checksum.h"
#include "bkey_buf.h"
#include "io_types.h"
#define to_wbio(_bio) \
container_of((_bio), struct bch_write_bio, bio)
#define to_rbio(_bio) \
container_of((_bio), struct bch_read_bio, bio)
void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
void bch2_latency_acct(struct bch_dev *, u64, int);
#else
static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {}
#endif
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
enum bch_data_type, const struct bkey_i *, bool);
#define BLK_STS_REMOVED ((__force blk_status_t)128)
const char *bch2_blk_status_to_str(blk_status_t);
#define BCH_WRITE_FLAGS() \
x(ALLOC_NOWAIT) \
x(CACHED) \
x(DATA_ENCODED) \
x(PAGES_STABLE) \
x(PAGES_OWNED) \
x(ONLY_SPECIFIED_DEVS) \
x(WROTE_DATA_INLINE) \
x(FROM_INTERNAL) \
x(CHECK_ENOSPC) \
x(SYNC) \
x(MOVE) \
x(IN_WORKER) \
x(DONE) \
x(IO_ERROR) \
x(CONVERT_UNWRITTEN)
enum __bch_write_flags {
#define x(f) __BCH_WRITE_##f,
BCH_WRITE_FLAGS()
#undef x
};
enum bch_write_flags {
#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f),
BCH_WRITE_FLAGS()
#undef x
};
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
{
return op->watermark == BCH_WATERMARK_copygc
? op->c->copygc_wq
: op->c->btree_update_wq;
}
int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
struct bkey_i *, bool *, s64 *, s64 *);
int bch2_extent_update(struct btree_trans *, subvol_inum,
struct btree_iter *, struct bkey_i *,
struct disk_reservation *, u64, s64 *, bool);
int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
unsigned, struct bch_io_opts, s64 *,
struct write_point_specifier);
int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
subvol_inum, u64, s64 *);
int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
struct bch_io_opts opts)
{
op->c = c;
op->end_io = NULL;
op->flags = 0;
op->written = 0;
op->error = 0;
op->csum_type = bch2_data_checksum_type(c, opts);
op->compression_opt = opts.compression;
op->nr_replicas = 0;
op->nr_replicas_required = c->opts.data_replicas_required;
op->watermark = BCH_WATERMARK_normal;
op->incompressible = 0;
op->open_buckets.nr = 0;
op->devs_have.nr = 0;
op->target = 0;
op->opts = opts;
op->subvol = 0;
op->pos = POS_MAX;
op->version = ZERO_VERSION;
op->write_point = (struct write_point_specifier) { 0 };
op->res = (struct disk_reservation) { 0 };
op->new_i_size = U64_MAX;
op->i_sectors_delta = 0;
op->devs_need_flush = NULL;
}
void bch2_write(struct closure *);
void bch2_write_point_do_index_updates(struct work_struct *);
static inline struct bch_write_bio *wbio_init(struct bio *bio)
{
struct bch_write_bio *wbio = to_wbio(bio);
memset(&wbio->wbio, 0, sizeof(wbio->wbio));
return wbio;
}
void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *);
struct bch_devs_mask;
struct cache_promote_op;
struct extent_ptr_decoded;
int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
struct bkey_buf *);
static inline int bch2_read_indirect_extent(struct btree_trans *trans,
enum btree_id *data_btree,
unsigned *offset_into_extent,
struct bkey_buf *k)
{
if (k->k->k.type != KEY_TYPE_reflink_p)
return 0;
*data_btree = BTREE_ID_reflink;
return __bch2_read_indirect_extent(trans, offset_into_extent, k);
}
enum bch_read_flags {
BCH_READ_RETRY_IF_STALE = 1 << 0,
BCH_READ_MAY_PROMOTE = 1 << 1,
BCH_READ_USER_MAPPED = 1 << 2,
BCH_READ_NODECODE = 1 << 3,
BCH_READ_LAST_FRAGMENT = 1 << 4,
/* internal: */
BCH_READ_MUST_BOUNCE = 1 << 5,
BCH_READ_MUST_CLONE = 1 << 6,
BCH_READ_IN_RETRY = 1 << 7,
};
int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
struct bvec_iter, struct bpos, enum btree_id,
struct bkey_s_c, unsigned,
struct bch_io_failures *, unsigned);
static inline void bch2_read_extent(struct btree_trans *trans,
struct bch_read_bio *rbio, struct bpos read_pos,
enum btree_id data_btree, struct bkey_s_c k,
unsigned offset_into_extent, unsigned flags)
{
__bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
data_btree, k, offset_into_extent, NULL, flags);
}
void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
subvol_inum, struct bch_io_failures *, unsigned flags);
static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
subvol_inum inum)
{
struct bch_io_failures failed = { .nr = 0 };
BUG_ON(rbio->_state);
rbio->c = c;
rbio->start_time = local_clock();
rbio->subvol = inum.subvol;
__bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed,
BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE|
BCH_READ_USER_MAPPED);
}
static inline struct bch_read_bio *rbio_init(struct bio *bio,
struct bch_io_opts opts)
{
struct bch_read_bio *rbio = to_rbio(bio);
rbio->_state = 0;
rbio->promote = NULL;
rbio->opts = opts;
return rbio;
}
void bch2_fs_io_exit(struct bch_fs *);
int bch2_fs_io_init(struct bch_fs *);
#endif /* _BCACHEFS_IO_H */

497
libbcachefs/io_misc.c Normal file
View File

@ -0,0 +1,497 @@
// SPDX-License-Identifier: GPL-2.0
/*
* io_misc.c - fallocate, fpunch, truncate:
*/
#include "bcachefs.h"
#include "alloc_foreground.h"
#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
#include "clock.h"
#include "error.h"
#include "extents.h"
#include "extent_update.h"
#include "inode.h"
#include "io_misc.h"
#include "io_write.h"
#include "logged_ops.h"
#include "subvolume.h"
/* Overwrites whatever was present with zeroes: */
int bch2_extent_fallocate(struct btree_trans *trans,
subvol_inum inum,
struct btree_iter *iter,
unsigned sectors,
struct bch_io_opts opts,
s64 *i_sectors_delta,
struct write_point_specifier write_point)
{
struct bch_fs *c = trans->c;
struct disk_reservation disk_res = { 0 };
struct closure cl;
struct open_buckets open_buckets = { 0 };
struct bkey_s_c k;
struct bkey_buf old, new;
unsigned sectors_allocated = 0;
bool have_reservation = false;
bool unwritten = opts.nocow &&
c->sb.version >= bcachefs_metadata_version_unwritten_extents;
int ret;
bch2_bkey_buf_init(&old);
bch2_bkey_buf_init(&new);
closure_init_stack(&cl);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
return ret;
sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset);
if (!have_reservation) {
unsigned new_replicas =
max(0, (int) opts.data_replicas -
(int) bch2_bkey_nr_ptrs_fully_allocated(k));
/*
* Get a disk reservation before (in the nocow case) calling
* into the allocator:
*/
ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0);
if (unlikely(ret))
goto err;
bch2_bkey_buf_reassemble(&old, c, k);
}
if (have_reservation) {
if (!bch2_extents_match(k, bkey_i_to_s_c(old.k)))
goto err;
bch2_key_resize(&new.k->k, sectors);
} else if (!unwritten) {
struct bkey_i_reservation *reservation;
bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64));
reservation = bkey_reservation_init(new.k);
reservation->k.p = iter->pos;
bch2_key_resize(&reservation->k, sectors);
reservation->v.nr_replicas = opts.data_replicas;
} else {
struct bkey_i_extent *e;
struct bch_devs_list devs_have;
struct write_point *wp;
struct bch_extent_ptr *ptr;
devs_have.nr = 0;
bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX);
e = bkey_extent_init(new.k);
e->k.p = iter->pos;
ret = bch2_alloc_sectors_start_trans(trans,
opts.foreground_target,
false,
write_point,
&devs_have,
opts.data_replicas,
opts.data_replicas,
BCH_WATERMARK_normal, 0, &cl, &wp);
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
ret = -BCH_ERR_transaction_restart_nested;
if (ret)
goto err;
sectors = min(sectors, wp->sectors_free);
sectors_allocated = sectors;
bch2_key_resize(&e->k, sectors);
bch2_open_bucket_get(c, wp, &open_buckets);
bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false);
bch2_alloc_sectors_done(c, wp);
extent_for_each_ptr(extent_i_to_s(e), ptr)
ptr->unwritten = true;
}
have_reservation = true;
ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res,
0, i_sectors_delta, true);
err:
if (!ret && sectors_allocated)
bch2_increment_clock(c, sectors_allocated, WRITE);
bch2_open_buckets_put(c, &open_buckets);
bch2_disk_reservation_put(c, &disk_res);
bch2_bkey_buf_exit(&new, c);
bch2_bkey_buf_exit(&old, c);
if (closure_nr_remaining(&cl) != 1) {
bch2_trans_unlock(trans);
closure_sync(&cl);
}
return ret;
}
/*
* Returns -BCH_ERR_transacton_restart if we had to drop locks:
*/
int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
subvol_inum inum, u64 end,
s64 *i_sectors_delta)
{
struct bch_fs *c = trans->c;
unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
struct bpos end_pos = POS(inum.inum, end);
struct bkey_s_c k;
int ret = 0, ret2 = 0;
u32 snapshot;
while (!ret ||
bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
struct bkey_i delete;
if (ret)
ret2 = ret;
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
continue;
bch2_btree_iter_set_snapshot(iter, snapshot);
/*
* peek_upto() doesn't have ideal semantics for extents:
*/
k = bch2_btree_iter_peek_upto(iter, end_pos);
if (!k.k)
break;
ret = bkey_err(k);
if (ret)
continue;
bkey_init(&delete.k);
delete.k.p = iter->pos;
/* create the biggest key we can */
bch2_key_resize(&delete.k, max_sectors);
bch2_cut_back(end_pos, &delete);
ret = bch2_extent_update(trans, inum, iter, &delete,
&disk_res, 0, i_sectors_delta, false);
bch2_disk_reservation_put(c, &disk_res);
}
return ret ?: ret2;
}
int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
s64 *i_sectors_delta)
{
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
POS(inum.inum, start),
BTREE_ITER_INTENT);
ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ret = 0;
return ret;
}
/* truncate: */
void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_s_c_logged_op_truncate op = bkey_s_c_to_logged_op_truncate(k);
prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol));
prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum));
prt_printf(out, " new_i_size=%llu", le64_to_cpu(op.v->new_i_size));
}
static int truncate_set_isize(struct btree_trans *trans,
subvol_inum inum,
u64 new_i_size)
{
struct btree_iter iter = { NULL };
struct bch_inode_unpacked inode_u;
int ret;
ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT) ?:
(inode_u.bi_size = new_i_size, 0) ?:
bch2_inode_write(trans, &iter, &inode_u);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
static int __bch2_resume_logged_op_truncate(struct btree_trans *trans,
struct bkey_i *op_k,
u64 *i_sectors_delta)
{
struct bch_fs *c = trans->c;
struct btree_iter fpunch_iter;
struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k);
subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
u64 new_i_size = le64_to_cpu(op->v.new_i_size);
int ret;
ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
truncate_set_isize(trans, inum, new_i_size));
if (ret)
goto err;
bch2_trans_iter_init(trans, &fpunch_iter, BTREE_ID_extents,
POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9),
BTREE_ITER_INTENT);
ret = bch2_fpunch_at(trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta);
bch2_trans_iter_exit(trans, &fpunch_iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ret = 0;
err:
bch2_logged_op_finish(trans, op_k);
return ret;
}
int bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k)
{
return __bch2_resume_logged_op_truncate(trans, op_k, NULL);
}
int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta)
{
struct bkey_i_logged_op_truncate op;
bkey_logged_op_truncate_init(&op.k_i);
op.v.subvol = cpu_to_le32(inum.subvol);
op.v.inum = cpu_to_le64(inum.inum);
op.v.new_i_size = cpu_to_le64(new_i_size);
return bch2_trans_run(c,
bch2_logged_op_start(trans, &op.k_i) ?:
__bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta));
}
/* finsert/fcollapse: */
void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_s_c_logged_op_finsert op = bkey_s_c_to_logged_op_finsert(k);
prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol));
prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum));
prt_printf(out, " dst_offset=%lli", le64_to_cpu(op.v->dst_offset));
prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset));
}
static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len)
{
struct btree_iter iter;
struct bch_inode_unpacked inode_u;
int ret;
offset <<= 9;
len <<= 9;
ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT);
if (ret)
return ret;
if (len > 0) {
if (MAX_LFS_FILESIZE - inode_u.bi_size < len) {
ret = -EFBIG;
goto err;
}
if (offset >= inode_u.bi_size) {
ret = -EINVAL;
goto err;
}
}
inode_u.bi_size += len;
inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c);
ret = bch2_inode_write(trans, &iter, &inode_u);
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
struct bkey_i *op_k,
u64 *i_sectors_delta)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k);
subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
u64 dst_offset = le64_to_cpu(op->v.dst_offset);
u64 src_offset = le64_to_cpu(op->v.src_offset);
s64 shift = dst_offset - src_offset;
u64 len = abs(shift);
u64 pos = le64_to_cpu(op->v.pos);
bool insert = shift > 0;
int ret = 0;
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
POS(inum.inum, 0),
BTREE_ITER_INTENT);
switch (op->v.state) {
case LOGGED_OP_FINSERT_start:
op->v.state = LOGGED_OP_FINSERT_shift_extents;
if (insert) {
ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
adjust_i_size(trans, inum, src_offset, len) ?:
bch2_logged_op_update(trans, &op->k_i));
if (ret)
goto err;
} else {
bch2_btree_iter_set_pos(&iter, POS(inum.inum, src_offset));
ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto err;
ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
bch2_logged_op_update(trans, &op->k_i));
}
fallthrough;
case LOGGED_OP_FINSERT_shift_extents:
while (1) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
struct bkey_i delete, *copy;
struct bkey_s_c k;
struct bpos src_pos = POS(inum.inum, src_offset);
u32 snapshot;
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto btree_err;
bch2_btree_iter_set_snapshot(&iter, snapshot);
bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot));
k = insert
? bch2_btree_iter_peek_prev(&iter)
: bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX));
if ((ret = bkey_err(k)))
goto btree_err;
if (!k.k ||
k.k->p.inode != inum.inum ||
bkey_le(k.k->p, POS(inum.inum, src_offset)))
break;
copy = bch2_bkey_make_mut_noupdate(trans, k);
if ((ret = PTR_ERR_OR_ZERO(copy)))
goto btree_err;
if (insert &&
bkey_lt(bkey_start_pos(k.k), src_pos)) {
bch2_cut_front(src_pos, copy);
/* Splitting compressed extent? */
bch2_disk_reservation_add(c, &disk_res,
copy->k.size *
bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy)),
BCH_DISK_RESERVATION_NOFAIL);
}
bkey_init(&delete.k);
delete.k.p = copy->k.p;
delete.k.p.snapshot = snapshot;
delete.k.size = copy->k.size;
copy->k.p.offset += shift;
copy->k.p.snapshot = snapshot;
op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset);
ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?:
bch2_logged_op_update(trans, &op->k_i) ?:
bch2_trans_commit(trans, &disk_res, NULL, BTREE_INSERT_NOFAIL);
btree_err:
bch2_disk_reservation_put(c, &disk_res);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
goto err;
pos = le64_to_cpu(op->v.pos);
}
op->v.state = LOGGED_OP_FINSERT_finish;
if (!insert) {
ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
adjust_i_size(trans, inum, src_offset, shift) ?:
bch2_logged_op_update(trans, &op->k_i));
} else {
/* We need an inode update to update bi_journal_seq for fsync: */
ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
adjust_i_size(trans, inum, 0, 0) ?:
bch2_logged_op_update(trans, &op->k_i));
}
break;
case LOGGED_OP_FINSERT_finish:
break;
}
err:
bch2_logged_op_finish(trans, op_k);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
int bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k)
{
return __bch2_resume_logged_op_finsert(trans, op_k, NULL);
}
int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum,
u64 offset, u64 len, bool insert,
s64 *i_sectors_delta)
{
struct bkey_i_logged_op_finsert op;
s64 shift = insert ? len : -len;
bkey_logged_op_finsert_init(&op.k_i);
op.v.subvol = cpu_to_le32(inum.subvol);
op.v.inum = cpu_to_le64(inum.inum);
op.v.dst_offset = cpu_to_le64(offset + shift);
op.v.src_offset = cpu_to_le64(offset);
op.v.pos = cpu_to_le64(insert ? U64_MAX : offset);
return bch2_trans_run(c,
bch2_logged_op_start(trans, &op.k_i) ?:
__bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta));
}

34
libbcachefs/io_misc.h Normal file
View File

@ -0,0 +1,34 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_IO_MISC_H
#define _BCACHEFS_IO_MISC_H
int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
unsigned, struct bch_io_opts, s64 *,
struct write_point_specifier);
int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
subvol_inum, u64, s64 *);
int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
void bch2_logged_op_truncate_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_logged_op_truncate ((struct bkey_ops) { \
.val_to_text = bch2_logged_op_truncate_to_text, \
.min_val_size = 24, \
})
int bch2_resume_logged_op_truncate(struct btree_trans *, struct bkey_i *);
int bch2_truncate(struct bch_fs *, subvol_inum, u64, u64 *);
void bch2_logged_op_finsert_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_logged_op_finsert ((struct bkey_ops) { \
.val_to_text = bch2_logged_op_finsert_to_text, \
.min_val_size = 24, \
})
int bch2_resume_logged_op_finsert(struct btree_trans *, struct bkey_i *);
int bch2_fcollapse_finsert(struct bch_fs *, subvol_inum, u64, u64, bool, s64 *);
#endif /* _BCACHEFS_IO_MISC_H */

1210
libbcachefs/io_read.c Normal file

File diff suppressed because it is too large Load Diff

158
libbcachefs/io_read.h Normal file
View File

@ -0,0 +1,158 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_IO_READ_H
#define _BCACHEFS_IO_READ_H
#include "bkey_buf.h"
struct bch_read_bio {
struct bch_fs *c;
u64 start_time;
u64 submit_time;
/*
* Reads will often have to be split, and if the extent being read from
* was checksummed or compressed we'll also have to allocate bounce
* buffers and copy the data back into the original bio.
*
* If we didn't have to split, we have to save and restore the original
* bi_end_io - @split below indicates which:
*/
union {
struct bch_read_bio *parent;
bio_end_io_t *end_io;
};
/*
* Saved copy of bio->bi_iter, from submission time - allows us to
* resubmit on IO error, and also to copy data back to the original bio
* when we're bouncing:
*/
struct bvec_iter bvec_iter;
unsigned offset_into_extent;
u16 flags;
union {
struct {
u16 bounce:1,
split:1,
kmalloc:1,
have_ioref:1,
narrow_crcs:1,
hole:1,
retry:2,
context:2;
};
u16 _state;
};
struct bch_devs_list devs_have;
struct extent_ptr_decoded pick;
/*
* pos we read from - different from data_pos for indirect extents:
*/
u32 subvol;
struct bpos read_pos;
/*
* start pos of data we read (may not be pos of data we want) - for
* promote, narrow extents paths:
*/
enum btree_id data_btree;
struct bpos data_pos;
struct bversion version;
struct promote_op *promote;
struct bch_io_opts opts;
struct work_struct work;
struct bio bio;
};
#define to_rbio(_bio) container_of((_bio), struct bch_read_bio, bio)
struct bch_devs_mask;
struct cache_promote_op;
struct extent_ptr_decoded;
int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
struct bkey_buf *);
static inline int bch2_read_indirect_extent(struct btree_trans *trans,
enum btree_id *data_btree,
unsigned *offset_into_extent,
struct bkey_buf *k)
{
if (k->k->k.type != KEY_TYPE_reflink_p)
return 0;
*data_btree = BTREE_ID_reflink;
return __bch2_read_indirect_extent(trans, offset_into_extent, k);
}
enum bch_read_flags {
BCH_READ_RETRY_IF_STALE = 1 << 0,
BCH_READ_MAY_PROMOTE = 1 << 1,
BCH_READ_USER_MAPPED = 1 << 2,
BCH_READ_NODECODE = 1 << 3,
BCH_READ_LAST_FRAGMENT = 1 << 4,
/* internal: */
BCH_READ_MUST_BOUNCE = 1 << 5,
BCH_READ_MUST_CLONE = 1 << 6,
BCH_READ_IN_RETRY = 1 << 7,
};
int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
struct bvec_iter, struct bpos, enum btree_id,
struct bkey_s_c, unsigned,
struct bch_io_failures *, unsigned);
static inline void bch2_read_extent(struct btree_trans *trans,
struct bch_read_bio *rbio, struct bpos read_pos,
enum btree_id data_btree, struct bkey_s_c k,
unsigned offset_into_extent, unsigned flags)
{
__bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
data_btree, k, offset_into_extent, NULL, flags);
}
void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
subvol_inum, struct bch_io_failures *, unsigned flags);
static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
subvol_inum inum)
{
struct bch_io_failures failed = { .nr = 0 };
BUG_ON(rbio->_state);
rbio->c = c;
rbio->start_time = local_clock();
rbio->subvol = inum.subvol;
__bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed,
BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE|
BCH_READ_USER_MAPPED);
}
static inline struct bch_read_bio *rbio_init(struct bio *bio,
struct bch_io_opts opts)
{
struct bch_read_bio *rbio = to_rbio(bio);
rbio->_state = 0;
rbio->promote = NULL;
rbio->opts = opts;
return rbio;
}
void bch2_fs_io_read_exit(struct bch_fs *);
int bch2_fs_io_read_init(struct bch_fs *);
#endif /* _BCACHEFS_IO_READ_H */

File diff suppressed because it is too large Load Diff

110
libbcachefs/io_write.h Normal file
View File

@ -0,0 +1,110 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_IO_WRITE_H
#define _BCACHEFS_IO_WRITE_H
#include "checksum.h"
#include "io_write_types.h"
#define to_wbio(_bio) \
container_of((_bio), struct bch_write_bio, bio)
void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
void bch2_latency_acct(struct bch_dev *, u64, int);
#else
static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {}
#endif
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
enum bch_data_type, const struct bkey_i *, bool);
#define BCH_WRITE_FLAGS() \
x(ALLOC_NOWAIT) \
x(CACHED) \
x(DATA_ENCODED) \
x(PAGES_STABLE) \
x(PAGES_OWNED) \
x(ONLY_SPECIFIED_DEVS) \
x(WROTE_DATA_INLINE) \
x(FROM_INTERNAL) \
x(CHECK_ENOSPC) \
x(SYNC) \
x(MOVE) \
x(IN_WORKER) \
x(DONE) \
x(IO_ERROR) \
x(CONVERT_UNWRITTEN)
enum __bch_write_flags {
#define x(f) __BCH_WRITE_##f,
BCH_WRITE_FLAGS()
#undef x
};
enum bch_write_flags {
#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f),
BCH_WRITE_FLAGS()
#undef x
};
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
{
return op->watermark == BCH_WATERMARK_copygc
? op->c->copygc_wq
: op->c->btree_update_wq;
}
int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
struct bkey_i *, bool *, s64 *, s64 *);
int bch2_extent_update(struct btree_trans *, subvol_inum,
struct btree_iter *, struct bkey_i *,
struct disk_reservation *, u64, s64 *, bool);
static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
struct bch_io_opts opts)
{
op->c = c;
op->end_io = NULL;
op->flags = 0;
op->written = 0;
op->error = 0;
op->csum_type = bch2_data_checksum_type(c, opts);
op->compression_opt = opts.compression;
op->nr_replicas = 0;
op->nr_replicas_required = c->opts.data_replicas_required;
op->watermark = BCH_WATERMARK_normal;
op->incompressible = 0;
op->open_buckets.nr = 0;
op->devs_have.nr = 0;
op->target = 0;
op->opts = opts;
op->subvol = 0;
op->pos = POS_MAX;
op->version = ZERO_VERSION;
op->write_point = (struct write_point_specifier) { 0 };
op->res = (struct disk_reservation) { 0 };
op->new_i_size = U64_MAX;
op->i_sectors_delta = 0;
op->devs_need_flush = NULL;
}
void bch2_write(struct closure *);
void bch2_write_point_do_index_updates(struct work_struct *);
static inline struct bch_write_bio *wbio_init(struct bio *bio)
{
struct bch_write_bio *wbio = to_wbio(bio);
memset(&wbio->wbio, 0, sizeof(wbio->wbio));
return wbio;
}
void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *);
void bch2_fs_io_write_exit(struct bch_fs *);
int bch2_fs_io_write_init(struct bch_fs *);
#endif /* _BCACHEFS_IO_WRITE_H */

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_IO_TYPES_H
#define _BCACHEFS_IO_TYPES_H
#ifndef _BCACHEFS_IO_WRITE_TYPES_H
#define _BCACHEFS_IO_WRITE_TYPES_H
#include "alloc_types.h"
#include "btree_types.h"
@ -13,75 +13,6 @@
#include <linux/llist.h>
#include <linux/workqueue.h>
struct bch_read_bio {
struct bch_fs *c;
u64 start_time;
u64 submit_time;
/*
* Reads will often have to be split, and if the extent being read from
* was checksummed or compressed we'll also have to allocate bounce
* buffers and copy the data back into the original bio.
*
* If we didn't have to split, we have to save and restore the original
* bi_end_io - @split below indicates which:
*/
union {
struct bch_read_bio *parent;
bio_end_io_t *end_io;
};
/*
* Saved copy of bio->bi_iter, from submission time - allows us to
* resubmit on IO error, and also to copy data back to the original bio
* when we're bouncing:
*/
struct bvec_iter bvec_iter;
unsigned offset_into_extent;
u16 flags;
union {
struct {
u16 bounce:1,
split:1,
kmalloc:1,
have_ioref:1,
narrow_crcs:1,
hole:1,
retry:2,
context:2;
};
u16 _state;
};
struct bch_devs_list devs_have;
struct extent_ptr_decoded pick;
/*
* pos we read from - different from data_pos for indirect extents:
*/
u32 subvol;
struct bpos read_pos;
/*
* start pos of data we read (may not be pos of data we want) - for
* promote, narrow extents paths:
*/
enum btree_id data_btree;
struct bpos data_pos;
struct bversion version;
struct promote_op *promote;
struct bch_io_opts opts;
struct work_struct work;
struct bio bio;
};
struct bch_write_bio {
struct_group(wbio,
struct bch_fs *c;
@ -162,4 +93,4 @@ struct bch_write_op {
struct bch_write_bio wbio;
};
#endif /* _BCACHEFS_IO_TYPES_H */
#endif /* _BCACHEFS_IO_WRITE_TYPES_H */

View File

@ -132,13 +132,21 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
return stuck;
}
/* journal entry close/open: */
void __bch2_journal_buf_put(struct journal *j)
/*
* Final processing when the last reference of a journal buffer has been
* dropped. Drop the pin list reference acquired at journal entry open and write
* the buffer, if requested.
*/
void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
lockdep_assert_held(&j->lock);
if (__bch2_journal_pin_put(j, seq))
bch2_journal_reclaim_fast(j);
if (write)
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
}
/*
@ -204,13 +212,11 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val)
buf->data->last_seq = cpu_to_le64(buf->last_seq);
BUG_ON(buf->last_seq > le64_to_cpu(buf->data->seq));
__bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq));
cancel_delayed_work(&j->write_work);
bch2_journal_space_available(j);
bch2_journal_buf_put(j, old.idx);
__bch2_journal_buf_put(j, old.idx, le64_to_cpu(buf->data->seq));
}
void bch2_journal_halt(struct journal *j)
@ -588,8 +594,13 @@ out:
/**
* bch2_journal_flush_seq_async - wait for a journal entry to be written
* @j: journal object
* @seq: seq to flush
* @parent: closure object to wait with
* Returns: 1 if @seq has already been flushed, 0 if @seq is being flushed,
* -EIO if @seq will never be flushed
*
* like bch2_journal_wait_on_seq, except that it triggers a write immediately if
* Like bch2_journal_wait_on_seq, except that it triggers a write immediately if
* necessary
*/
int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
@ -829,12 +840,12 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
break;
ret = bch2_trans_run(c,
bch2_trans_mark_metadata_bucket(&trans, ca,
bch2_trans_mark_metadata_bucket(trans, ca,
ob[nr_got]->bucket, BCH_DATA_journal,
ca->mi.bucket_size));
if (ret) {
bch2_open_bucket_put(c, ob[nr_got]);
bch_err(c, "error marking new journal buckets: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "marking new journal buckets");
break;
}
@ -910,7 +921,7 @@ err_unblock:
if (ret && !new_fs)
for (i = 0; i < nr_got; i++)
bch2_trans_run(c,
bch2_trans_mark_metadata_bucket(&trans, ca,
bch2_trans_mark_metadata_bucket(trans, ca,
bu[i], BCH_DATA_free, 0));
err_free:
if (!new_fs)
@ -944,7 +955,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
goto unlock;
while (ja->nr < nr) {
struct disk_reservation disk_res = { 0, 0 };
struct disk_reservation disk_res = { 0, 0, 0 };
/*
* note: journal buckets aren't really counted as _sectors_ used yet, so

View File

@ -252,9 +252,10 @@ static inline bool journal_entry_empty(struct jset *j)
return true;
}
void __bch2_journal_buf_put(struct journal *);
static inline void bch2_journal_buf_put(struct journal *j, unsigned idx)
/*
* Drop reference on a buffer index and return true if the count has hit zero.
*/
static inline union journal_res_state journal_state_buf_put(struct journal *j, unsigned idx)
{
union journal_res_state s;
@ -264,9 +265,30 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx)
.buf2_count = idx == 2,
.buf3_count = idx == 3,
}).v, &j->reservations.counter);
return s;
}
if (!journal_state_count(s, idx) && idx == s.unwritten_idx)
__bch2_journal_buf_put(j);
void bch2_journal_buf_put_final(struct journal *, u64, bool);
static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
{
union journal_res_state s;
s = journal_state_buf_put(j, idx);
if (!journal_state_count(s, idx))
bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx);
}
static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
{
union journal_res_state s;
s = journal_state_buf_put(j, idx);
if (!journal_state_count(s, idx)) {
spin_lock(&j->lock);
bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx);
spin_unlock(&j->lock);
}
}
/*
@ -286,7 +308,7 @@ static inline void bch2_journal_res_put(struct journal *j,
BCH_JSET_ENTRY_btree_keys,
0, 0, 0);
bch2_journal_buf_put(j, res->idx);
bch2_journal_buf_put(j, res->idx, res->seq);
res->ref = 0;
}

View File

@ -8,7 +8,6 @@
#include "checksum.h"
#include "disk_groups.h"
#include "error.h"
#include "io.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
@ -238,17 +237,17 @@ static void journal_entry_err_msg(struct printbuf *out,
#define journal_entry_err(c, version, jset, entry, msg, ...) \
({ \
struct printbuf buf = PRINTBUF; \
struct printbuf _buf = PRINTBUF; \
\
journal_entry_err_msg(&buf, version, jset, entry); \
prt_printf(&buf, msg, ##__VA_ARGS__); \
journal_entry_err_msg(&_buf, version, jset, entry); \
prt_printf(&_buf, msg, ##__VA_ARGS__); \
\
switch (flags & BKEY_INVALID_WRITE) { \
case READ: \
mustfix_fsck_err(c, "%s", buf.buf); \
mustfix_fsck_err(c, "%s", _buf.buf); \
break; \
case WRITE: \
bch_err(c, "corrupt metadata before write: %s\n", buf.buf);\
bch_err(c, "corrupt metadata before write: %s\n", _buf.buf);\
if (bch2_fs_inconsistent(c)) { \
ret = -BCH_ERR_fsck_errors_not_fixed; \
goto fsck_err; \
@ -256,7 +255,7 @@ static void journal_entry_err_msg(struct printbuf *out,
break; \
} \
\
printbuf_exit(&buf); \
printbuf_exit(&_buf); \
true; \
})
@ -1282,7 +1281,7 @@ int bch2_journal_read(struct bch_fs *c,
continue;
for (ptr = 0; ptr < i->nr_ptrs; ptr++) {
struct bch_dev *ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev);
ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev);
if (!i->ptrs[ptr].csum_good)
bch_err_dev_offset(ca, i->ptrs[ptr].sector,
@ -1380,16 +1379,21 @@ static void __journal_write_alloc(struct journal *j,
}
/**
* journal_next_bucket - move on to the next journal bucket if possible
* journal_write_alloc - decide where to write next journal entry
*
* @j: journal object
* @w: journal buf (entry to be written)
*
* Returns: 0 on success, or -EROFS on failure
*/
static int journal_write_alloc(struct journal *j, struct journal_buf *w,
unsigned sectors)
static int journal_write_alloc(struct journal *j, struct journal_buf *w)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_devs_mask devs;
struct journal_device *ja;
struct bch_dev *ca;
struct dev_alloc_list devs_sorted;
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
unsigned target = c->opts.metadata_target ?:
c->opts.foreground_target;
unsigned i, replicas = 0, replicas_want =
@ -1550,6 +1554,7 @@ static void journal_write_done(struct closure *cl)
if (!journal_state_count(new, new.unwritten_idx) &&
journal_last_unwritten_seq(j) <= journal_cur_seq(j)) {
spin_unlock(&j->lock);
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
} else if (journal_last_unwritten_seq(j) == journal_cur_seq(j) &&
new.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) {
@ -1562,10 +1567,11 @@ static void journal_write_done(struct closure *cl)
* might want to be written now:
*/
spin_unlock(&j->lock);
mod_delayed_work(c->io_complete_wq, &j->write_work, max(0L, delta));
} else {
spin_unlock(&j->lock);
}
spin_unlock(&j->lock);
}
static void journal_write_endio(struct bio *bio)
@ -1813,7 +1819,7 @@ void bch2_journal_write(struct closure *cl)
retry_alloc:
spin_lock(&j->lock);
ret = journal_write_alloc(j, w, sectors);
ret = journal_write_alloc(j, w);
if (ret && j->can_discard) {
spin_unlock(&j->lock);

View File

@ -290,9 +290,8 @@ void bch2_journal_do_discards(struct journal *j)
* entry, holding it open to ensure it gets replayed during recovery:
*/
static void bch2_journal_reclaim_fast(struct journal *j)
void bch2_journal_reclaim_fast(struct journal *j)
{
struct journal_entry_pin_list temp;
bool popped = false;
lockdep_assert_held(&j->lock);
@ -303,7 +302,7 @@ static void bch2_journal_reclaim_fast(struct journal *j)
*/
while (!fifo_empty(&j->pin) &&
!atomic_read(&fifo_peek_front(&j->pin).count)) {
fifo_pop(&j->pin, temp);
j->pin.front++;
popped = true;
}
@ -311,19 +310,16 @@ static void bch2_journal_reclaim_fast(struct journal *j)
bch2_journal_space_available(j);
}
void __bch2_journal_pin_put(struct journal *j, u64 seq)
bool __bch2_journal_pin_put(struct journal *j, u64 seq)
{
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
if (atomic_dec_and_test(&pin_list->count))
bch2_journal_reclaim_fast(j);
return atomic_dec_and_test(&pin_list->count);
}
void bch2_journal_pin_put(struct journal *j, u64 seq)
{
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
if (atomic_dec_and_test(&pin_list->count)) {
if (__bch2_journal_pin_put(j, seq)) {
spin_lock(&j->lock);
bch2_journal_reclaim_fast(j);
spin_unlock(&j->lock);
@ -419,6 +415,8 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
/**
* bch2_journal_pin_flush: ensure journal pin callback is no longer running
* @j: journal object
* @pin: pin to flush
*/
void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
{
@ -579,7 +577,11 @@ static u64 journal_seq_to_flush(struct journal *j)
}
/**
* bch2_journal_reclaim - free up journal buckets
* __bch2_journal_reclaim - free up journal buckets
* @j: journal object
* @direct: direct or background reclaim?
* @kicked: requested to run since we last ran?
* Returns: 0 on success, or -EIO if the journal has been shutdown
*
* Background journal reclaim writes out btree nodes. It should be run
* early enough so that we never completely run out of journal buckets.
@ -758,7 +760,7 @@ int bch2_journal_reclaim_start(struct journal *j)
"bch-reclaim/%s", c->name);
ret = PTR_ERR_OR_ZERO(p);
if (ret) {
bch_err(c, "error creating journal reclaim thread: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "creating journal reclaim thread");
return ret;
}

View File

@ -31,7 +31,8 @@ journal_seq_pin(struct journal *j, u64 seq)
return &j->pin.data[seq & j->pin.mask];
}
void __bch2_journal_pin_put(struct journal *, u64);
void bch2_journal_reclaim_fast(struct journal *);
bool __bch2_journal_pin_put(struct journal *, u64);
void bch2_journal_pin_put(struct journal *, u64);
void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);

View File

@ -250,20 +250,18 @@ void bch2_blacklist_entries_gc(struct work_struct *work)
struct journal_seq_blacklist_table *t;
struct bch_sb_field_journal_seq_blacklist *bl;
struct journal_seq_blacklist_entry *src, *dst;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
unsigned i, nr, new_nr;
int ret;
bch2_trans_init(&trans, c, 0, 0);
for (i = 0; i < BTREE_ID_NR; i++) {
struct btree_iter iter;
struct btree *b;
bch2_trans_node_iter_init(&trans, &iter, i, POS_MIN,
bch2_trans_node_iter_init(trans, &iter, i, POS_MIN,
0, 0, BTREE_ITER_PREFETCH);
retry:
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
b = bch2_btree_iter_peek_node(&iter);
@ -275,10 +273,10 @@ retry:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (ret)
return;

110
libbcachefs/logged_ops.c Normal file
View File

@ -0,0 +1,110 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "bkey_buf.h"
#include "btree_update.h"
#include "error.h"
#include "io_misc.h"
#include "logged_ops.h"
struct bch_logged_op_fn {
u8 type;
int (*resume)(struct btree_trans *, struct bkey_i *);
};
static const struct bch_logged_op_fn logged_op_fns[] = {
#define x(n) { \
.type = KEY_TYPE_logged_op_##n, \
.resume = bch2_resume_logged_op_##n, \
},
BCH_LOGGED_OPS()
#undef x
};
static const struct bch_logged_op_fn *logged_op_fn(enum bch_bkey_type type)
{
for (unsigned i = 0; i < ARRAY_SIZE(logged_op_fns); i++)
if (logged_op_fns[i].type == type)
return logged_op_fns + i;
return NULL;
}
static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
struct bkey_buf sk;
u32 restart_count = trans->restart_count;
int ret;
if (!fn)
return 0;
bch2_bkey_buf_init(&sk);
bch2_bkey_buf_reassemble(&sk, c, k);
ret = fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count);
bch2_bkey_buf_exit(&sk, c);
return ret;
}
int bch2_resume_logged_ops(struct bch_fs *c)
{
struct btree_iter iter;
struct bkey_s_c k;
int ret;
ret = bch2_trans_run(c,
for_each_btree_key2(trans, iter,
BTREE_ID_logged_ops, POS_MIN, BTREE_ITER_PREFETCH, k,
resume_logged_op(trans, &iter, k)));
if (ret)
bch_err_fn(c, ret);
return ret;
}
static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
{
struct btree_iter iter;
int ret;
ret = bch2_bkey_get_empty_slot(trans, &iter, BTREE_ID_logged_ops, POS_MAX);
if (ret)
return ret;
k->k.p = iter.pos;
ret = bch2_trans_update(trans, &iter, k, 0);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
int bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
{
return commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
__bch2_logged_op_start(trans, k));
}
void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
{
int ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
bch2_btree_delete(trans, BTREE_ID_logged_ops, k->k.p, 0));
/*
* This needs to be a fatal error because we've left an unfinished
* operation in the logged ops btree.
*
* We should only ever see an error here if the filesystem has already
* been shut down, but make sure of that here:
*/
if (ret) {
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
bch2_fs_fatal_error(c, "%s: error deleting logged operation %s: %s",
__func__, buf.buf, bch2_err_str(ret));
printbuf_exit(&buf);
}
}

20
libbcachefs/logged_ops.h Normal file
View File

@ -0,0 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_LOGGED_OPS_H
#define _BCACHEFS_LOGGED_OPS_H
#include "bkey.h"
#define BCH_LOGGED_OPS() \
x(truncate) \
x(finsert)
static inline int bch2_logged_op_update(struct btree_trans *trans, struct bkey_i *op)
{
return bch2_btree_insert_nonextent(trans, BTREE_ID_logged_ops, op, 0);
}
int bch2_resume_logged_ops(struct bch_fs *);
int bch2_logged_op_start(struct btree_trans *, struct bkey_i *);
void bch2_logged_op_finish(struct btree_trans *, struct bkey_i *);
#endif /* _BCACHEFS_LOGGED_OPS_H */

View File

@ -151,10 +151,10 @@ int bch2_check_lrus(struct bch_fs *c)
int ret = 0;
ret = bch2_trans_run(c,
for_each_btree_key_commit(&trans, iter,
for_each_btree_key_commit(trans, iter,
BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
bch2_check_lru_key(&trans, &iter, k, &last_flushed_pos)));
bch2_check_lru_key(trans, &iter, k, &last_flushed_pos)));
if (ret)
bch_err_fn(c, ret);
return ret;

View File

@ -10,7 +10,7 @@
#include "buckets.h"
#include "errcode.h"
#include "extents.h"
#include "io.h"
#include "io_write.h"
#include "journal.h"
#include "keylist.h"
#include "migrate.h"
@ -78,34 +78,32 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
enum btree_id id;
int ret = 0;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
for (id = 0; id < BTREE_ID_NR; id++) {
if (!btree_type_has_ptrs(id))
continue;
ret = for_each_btree_key_commit(&trans, iter, id, POS_MIN,
ret = for_each_btree_key_commit(trans, iter, id, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
NULL, NULL, BTREE_INSERT_NOFAIL,
bch2_dev_usrdata_drop_key(&trans, &iter, k, dev_idx, flags));
bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags));
if (ret)
break;
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}
static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct closure cl;
struct btree *b;
@ -117,16 +115,16 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
if (flags & BCH_FORCE_IF_METADATA_LOST)
return -EINVAL;
trans = bch2_trans_get(c);
bch2_bkey_buf_init(&k);
bch2_trans_init(&trans, c, 0, 0);
closure_init_stack(&cl);
for (id = 0; id < BTREE_ID_NR; id++) {
bch2_trans_node_iter_init(&trans, &iter, id, POS_MIN, 0, 0,
bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0,
BTREE_ITER_PREFETCH);
retry:
ret = 0;
while (bch2_trans_begin(&trans),
while (bch2_trans_begin(trans),
(b = bch2_btree_iter_peek_node(&iter)) &&
!(ret = PTR_ERR_OR_ZERO(b))) {
if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
@ -141,15 +139,14 @@ retry:
break;
}
ret = bch2_btree_node_update_key(&trans, &iter, b, k.k, 0, false);
ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
ret = 0;
continue;
}
if (ret) {
bch_err(c, "Error updating btree node key: %s",
bch2_err_str(ret));
bch_err_msg(c, ret, "updating btree node key");
break;
}
next:
@ -158,7 +155,7 @@ next:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
if (ret)
goto err;
@ -167,8 +164,8 @@ next:
bch2_btree_interior_updates_flush(c);
ret = 0;
err:
bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&k, c);
bch2_trans_put(trans);
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));

View File

@ -14,7 +14,8 @@
#include "errcode.h"
#include "error.h"
#include "inode.h"
#include "io.h"
#include "io_read.h"
#include "io_write.h"
#include "journal_reclaim.h"
#include "keylist.h"
#include "move.h"
@ -524,7 +525,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
struct bch_fs *c = ctxt->c;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct bkey_buf sk;
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct data_update_opts data_opts;
@ -532,7 +533,6 @@ static int __bch2_move_data(struct moving_context *ctxt,
int ret = 0, ret2;
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
if (ctxt->stats) {
ctxt->stats->data_type = BCH_DATA_user;
@ -540,15 +540,15 @@ static int __bch2_move_data(struct moving_context *ctxt,
ctxt->stats->pos = start;
}
bch2_trans_iter_init(&trans, &iter, btree_id, start,
bch2_trans_iter_init(trans, &iter, btree_id, start,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS);
if (ctxt->rate)
bch2_ratelimit_reset(ctxt->rate);
while (!move_ratelimit(&trans, ctxt)) {
bch2_trans_begin(&trans);
while (!move_ratelimit(trans, ctxt)) {
bch2_trans_begin(trans);
k = bch2_btree_iter_peek(&iter);
if (!k.k)
@ -569,7 +569,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
if (!bkey_extent_is_direct_data(k.k))
goto next_nondata;
ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum);
ret = move_get_io_opts(trans, &io_opts, k, &cur_inum);
if (ret)
continue;
@ -584,7 +584,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
ret2 = bch2_move_extent(&trans, &iter, ctxt, NULL,
ret2 = bch2_move_extent(trans, &iter, ctxt, NULL,
io_opts, btree_id, k, data_opts);
if (ret2) {
if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
@ -592,7 +592,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
if (ret2 == -ENOMEM) {
/* memory allocation failure, wait for some IO to finish */
bch2_move_ctxt_wait_for_io(ctxt, &trans);
bch2_move_ctxt_wait_for_io(ctxt, trans);
continue;
}
@ -609,8 +609,8 @@ next_nondata:
bch2_btree_iter_advance(&iter);
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
bch2_bkey_buf_exit(&sk, c);
return ret;
@ -627,7 +627,7 @@ int bch2_move_data(struct bch_fs *c,
{
struct moving_context ctxt;
enum btree_id id;
int ret;
int ret = 0;
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
@ -723,7 +723,6 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
if (!bp.level) {
const struct bch_extent_ptr *ptr;
struct bkey_s_c k;
unsigned i = 0;
k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0);
@ -826,15 +825,14 @@ int bch2_evacuate_bucket(struct bch_fs *c,
struct write_point_specifier wp,
bool wait_on_copygc)
{
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct moving_context ctxt;
int ret;
bch2_trans_init(&trans, c, 0, 0);
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
ret = __bch2_evacuate_bucket(&trans, &ctxt, NULL, bucket, gen, data_opts);
ret = __bch2_evacuate_bucket(trans, &ctxt, NULL, bucket, gen, data_opts);
bch2_moving_ctxt_exit(&ctxt);
bch2_trans_exit(&trans);
bch2_trans_put(trans);
return ret;
}
@ -851,14 +849,13 @@ static int bch2_move_btree(struct bch_fs *c,
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct btree_trans trans;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct btree *b;
enum btree_id id;
struct data_update_opts data_opts;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
progress_list_add(c, stats);
stats->data_type = BCH_DATA_btree;
@ -871,11 +868,11 @@ static int bch2_move_btree(struct bch_fs *c,
if (!bch2_btree_id_root(c, id)->b)
continue;
bch2_trans_node_iter_init(&trans, &iter, id, POS_MIN, 0, 0,
bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0,
BTREE_ITER_PREFETCH);
retry:
ret = 0;
while (bch2_trans_begin(&trans),
while (bch2_trans_begin(trans),
(b = bch2_btree_iter_peek_node(&iter)) &&
!(ret = PTR_ERR_OR_ZERO(b))) {
if (kthread && kthread_should_stop())
@ -890,7 +887,7 @@ retry:
if (!pred(c, arg, b, &io_opts, &data_opts))
goto next;
ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret;
ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
@ -901,13 +898,13 @@ next:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
if (kthread && kthread_should_stop())
break;
}
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);

View File

@ -2,6 +2,7 @@
#ifndef _BCACHEFS_MOVE_H
#define _BCACHEFS_MOVE_H
#include "bcachefs_ioctl.h"
#include "btree_iter.h"
#include "buckets.h"
#include "data_update.h"

View File

@ -13,25 +13,17 @@
#include "btree_write_buffer.h"
#include "buckets.h"
#include "clock.h"
#include "disk_groups.h"
#include "errcode.h"
#include "error.h"
#include "extents.h"
#include "eytzinger.h"
#include "io.h"
#include "keylist.h"
#include "lru.h"
#include "move.h"
#include "movinggc.h"
#include "super-io.h"
#include "trace.h"
#include <linux/bsearch.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/math64.h>
#include <linux/sched/task.h>
#include <linux/sort.h>
#include <linux/wait.h>
struct buckets_in_flight {
@ -156,7 +148,7 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
size_t nr_to_get = max(16UL, buckets_in_flight->nr / 4);
size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4);
size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0;
int ret;
@ -172,7 +164,7 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans,
lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
0, k, ({
struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
int ret = 0;
int ret2 = 0;
saw++;
@ -181,11 +173,11 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans,
else if (bucket_in_flight(buckets_in_flight, b.k))
in_flight++;
else {
ret = darray_push(buckets, b) ?: buckets->nr >= nr_to_get;
if (ret >= 0)
ret2 = darray_push(buckets, b) ?: buckets->nr >= nr_to_get;
if (ret2 >= 0)
sectors += b.sectors;
}
ret;
ret2;
}));
pr_debug("have: %zu (%zu) saw %zu in flight %zu not movable %zu got %zu (%zu)/%zu buckets ret %i",
@ -242,7 +234,7 @@ err:
ret = 0;
if (ret < 0 && !bch2_err_matches(ret, EROFS))
bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "from bch2_move_data()");
moved = atomic64_read(&ctxt->stats->sectors_moved) - moved;
trace_and_count(c, copygc, c, moved, 0, 0, 0);
@ -308,25 +300,24 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
static int bch2_copygc_thread(void *arg)
{
struct bch_fs *c = arg;
struct btree_trans trans;
struct btree_trans *trans;
struct moving_context ctxt;
struct bch_move_stats move_stats;
struct io_clock *clock = &c->io_clock[WRITE];
struct buckets_in_flight move_buckets;
struct buckets_in_flight buckets;
u64 last, wait;
int ret = 0;
memset(&move_buckets, 0, sizeof(move_buckets));
memset(&buckets, 0, sizeof(buckets));
ret = rhashtable_init(&move_buckets.table, &bch_move_bucket_params);
ret = rhashtable_init(&buckets.table, &bch_move_bucket_params);
if (ret) {
bch_err(c, "error allocating copygc buckets in flight: %s",
bch2_err_str(ret));
bch_err_msg(c, ret, "allocating copygc buckets in flight");
return ret;
}
set_freezable();
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
bch2_move_stats_init(&move_stats, "copygc");
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
@ -334,16 +325,16 @@ static int bch2_copygc_thread(void *arg)
false);
while (!ret && !kthread_should_stop()) {
bch2_trans_unlock(&trans);
bch2_trans_unlock(trans);
cond_resched();
if (!c->copy_gc_enabled) {
move_buckets_wait(&trans, &ctxt, &move_buckets, true);
move_buckets_wait(trans, &ctxt, &buckets, true);
kthread_wait_freezable(c->copy_gc_enabled);
}
if (unlikely(freezing(current))) {
move_buckets_wait(&trans, &ctxt, &move_buckets, true);
move_buckets_wait(trans, &ctxt, &buckets, true);
__refrigerator(false);
continue;
}
@ -354,7 +345,7 @@ static int bch2_copygc_thread(void *arg)
if (wait > clock->max_slop) {
c->copygc_wait_at = last;
c->copygc_wait = last + wait;
move_buckets_wait(&trans, &ctxt, &move_buckets, true);
move_buckets_wait(trans, &ctxt, &buckets, true);
trace_and_count(c, copygc_wait, c, wait, last + wait);
bch2_kthread_io_clock_wait(clock, last + wait,
MAX_SCHEDULE_TIMEOUT);
@ -364,15 +355,15 @@ static int bch2_copygc_thread(void *arg)
c->copygc_wait = 0;
c->copygc_running = true;
ret = bch2_copygc(&trans, &ctxt, &move_buckets);
ret = bch2_copygc(trans, &ctxt, &buckets);
c->copygc_running = false;
wake_up(&c->copygc_running_wq);
}
move_buckets_wait(&trans, &ctxt, &move_buckets, true);
rhashtable_destroy(&move_buckets.table);
bch2_trans_exit(&trans);
move_buckets_wait(trans, &ctxt, &buckets, true);
rhashtable_destroy(&buckets.table);
bch2_trans_put(trans);
bch2_moving_ctxt_exit(&ctxt);
return 0;
@ -404,7 +395,7 @@ int bch2_copygc_start(struct bch_fs *c)
t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
ret = PTR_ERR_OR_ZERO(t);
if (ret) {
bch_err(c, "error creating copygc thread: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "creating copygc thread");
return ret;
}

View File

@ -471,8 +471,9 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
val = "0";
}
/* Unknown options are ignored: */
if (id < 0)
goto bad_opt;
continue;
if (!(bch2_opt_table[id].flags & OPT_MOUNT))
goto bad_opt;

View File

@ -469,7 +469,7 @@ struct bch_opts {
#undef x
};
static const struct bch_opts bch2_opts_default = {
static const __maybe_unused struct bch_opts bch2_opts_default = {
#define x(_name, _bits, _mode, _type, _sb_opt, _default, ...) \
._name##_defined = true, \
._name = _default, \

View File

@ -81,8 +81,10 @@ void bch2_prt_printf(struct printbuf *out, const char *fmt, ...)
}
/**
* printbuf_str - returns printbuf's buf as a C string, guaranteed to be null
* terminated
* bch2_printbuf_str() - returns printbuf's buf as a C string, guaranteed to be
* null terminated
* @buf: printbuf to terminate
* Returns: Printbuf contents, as a nul terminated C string
*/
const char *bch2_printbuf_str(const struct printbuf *buf)
{
@ -97,8 +99,9 @@ const char *bch2_printbuf_str(const struct printbuf *buf)
}
/**
* printbuf_exit - exit a printbuf, freeing memory it owns and poisoning it
* bch2_printbuf_exit() - exit a printbuf, freeing memory it owns and poisoning it
* against accidental use.
* @buf: printbuf to exit
*/
void bch2_printbuf_exit(struct printbuf *buf)
{
@ -120,7 +123,7 @@ void bch2_printbuf_tabstop_pop(struct printbuf *buf)
}
/*
* printbuf_tabstop_set - add a tabstop, n spaces from the previous tabstop
* bch2_printbuf_tabstop_set() - add a tabstop, n spaces from the previous tabstop
*
* @buf: printbuf to control
* @spaces: number of spaces from previous tabpstop
@ -144,7 +147,7 @@ int bch2_printbuf_tabstop_push(struct printbuf *buf, unsigned spaces)
}
/**
* printbuf_indent_add - add to the current indent level
* bch2_printbuf_indent_add() - add to the current indent level
*
* @buf: printbuf to control
* @spaces: number of spaces to add to the current indent level
@ -164,7 +167,7 @@ void bch2_printbuf_indent_add(struct printbuf *buf, unsigned spaces)
}
/**
* printbuf_indent_sub - subtract from the current indent level
* bch2_printbuf_indent_sub() - subtract from the current indent level
*
* @buf: printbuf to control
* @spaces: number of spaces to subtract from the current indent level
@ -227,9 +230,8 @@ static void __prt_tab(struct printbuf *out)
}
/**
* prt_tab - Advance printbuf to the next tabstop
*
* @buf: printbuf to control
* bch2_prt_tab() - Advance printbuf to the next tabstop
* @out: printbuf to control
*
* Advance output to the next tabstop by printing spaces.
*/
@ -267,7 +269,7 @@ static void __prt_tab_rjust(struct printbuf *buf)
}
/**
* prt_tab_rjust - Advance printbuf to the next tabstop, right justifying
* bch2_prt_tab_rjust - Advance printbuf to the next tabstop, right justifying
* previous output
*
* @buf: printbuf to control
@ -284,11 +286,11 @@ void bch2_prt_tab_rjust(struct printbuf *buf)
}
/**
* prt_bytes_indented - Print an array of chars, handling embedded control characters
* bch2_prt_bytes_indented() - Print an array of chars, handling embedded control characters
*
* @out: printbuf to output to
* @str: string to print
* @count: number of bytes to print
* @out: output printbuf
* @str: string to print
* @count: number of bytes to print
*
* The following contol characters are handled as so:
* \n: prt_newline newline that obeys current indent level
@ -335,32 +337,38 @@ void bch2_prt_bytes_indented(struct printbuf *out, const char *str, unsigned cou
}
/**
* prt_human_readable_u64 - Print out a u64 in human readable units
* bch2_prt_human_readable_u64() - Print out a u64 in human readable units
* @out: output printbuf
* @v: integer to print
*
* Units of 2^10 (default) or 10^3 are controlled via @buf->si_units
* Units of 2^10 (default) or 10^3 are controlled via @out->si_units
*/
void bch2_prt_human_readable_u64(struct printbuf *buf, u64 v)
void bch2_prt_human_readable_u64(struct printbuf *out, u64 v)
{
bch2_printbuf_make_room(buf, 10);
buf->pos += string_get_size(v, 1, !buf->si_units,
buf->buf + buf->pos,
printbuf_remaining_size(buf));
bch2_printbuf_make_room(out, 10);
out->pos += string_get_size(v, 1, !out->si_units,
out->buf + out->pos,
printbuf_remaining_size(out));
}
/**
* prt_human_readable_s64 - Print out a s64 in human readable units
* bch2_prt_human_readable_s64() - Print out a s64 in human readable units
* @out: output printbuf
* @v: integer to print
*
* Units of 2^10 (default) or 10^3 are controlled via @buf->si_units
* Units of 2^10 (default) or 10^3 are controlled via @out->si_units
*/
void bch2_prt_human_readable_s64(struct printbuf *buf, s64 v)
void bch2_prt_human_readable_s64(struct printbuf *out, s64 v)
{
if (v < 0)
prt_char(buf, '-');
bch2_prt_human_readable_u64(buf, abs(v));
prt_char(out, '-');
bch2_prt_human_readable_u64(out, abs(v));
}
/**
* prt_units_u64 - Print out a u64 according to printbuf unit options
* bch2_prt_units_u64() - Print out a u64 according to printbuf unit options
* @out: output printbuf
* @v: integer to print
*
* Units are either raw (default), or human reabable units (controlled via
* @buf->human_readable_units)
@ -374,7 +382,9 @@ void bch2_prt_units_u64(struct printbuf *out, u64 v)
}
/**
* prt_units_s64 - Print out a s64 according to printbuf unit options
* bch2_prt_units_s64() - Print out a s64 according to printbuf unit options
* @out: output printbuf
* @v: integer to print
*
* Units are either raw (default), or human reabable units (controlled via
* @buf->human_readable_units)

View File

@ -572,7 +572,7 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans,
if (!s_t.master_subvol)
goto advance;
ret = bch2_inode_find_by_inum_trans(trans,
ret = bch2_inode_find_by_inum_nowarn_trans(trans,
(subvol_inum) {
le32_to_cpu(s_t.master_subvol),
k.k->p.offset,
@ -599,7 +599,7 @@ advance:
int bch2_fs_quota_read(struct bch_fs *c)
{
struct bch_sb_field_quota *sb_quota;
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret;
@ -614,16 +614,16 @@ int bch2_fs_quota_read(struct bch_fs *c)
bch2_sb_quota_read(c);
mutex_unlock(&c->sb_lock);
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
ret = for_each_btree_key2(&trans, iter, BTREE_ID_quotas,
ret = for_each_btree_key2(trans, iter, BTREE_ID_quotas,
POS_MIN, BTREE_ITER_PREFETCH, k,
__bch2_quota_set(c, k, NULL)) ?:
for_each_btree_key2(&trans, iter, BTREE_ID_inodes,
for_each_btree_key2(trans, iter, BTREE_ID_inodes,
POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
bch2_fs_quota_read_inode(&trans, &iter, k));
bch2_fs_quota_read_inode(trans, &iter, k));
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
@ -786,7 +786,6 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
{
struct bch_fs *c = sb->s_fs_info;
struct bch_sb_field_quota *sb_quota;
struct bch_memquota_type *q;
int ret = 0;
if (0) {
@ -810,8 +809,6 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS))
return -EINVAL;
q = &c->quotas[type];
mutex_lock(&c->sb_lock);
sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
if (!sb_quota) {
@ -959,7 +956,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_set_quota_trans(&trans, &new_quota, qdq)) ?:
bch2_set_quota_trans(trans, &new_quota, qdq)) ?:
__bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq);
return bch2_err_class(ret);

View File

@ -8,8 +8,6 @@
#include "compress.h"
#include "disk_groups.h"
#include "errcode.h"
#include "extents.h"
#include "io.h"
#include "move.h"
#include "rebalance.h"
#include "super-io.h"
@ -350,7 +348,7 @@ int bch2_rebalance_start(struct bch_fs *c)
p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
ret = PTR_ERR_OR_ZERO(p);
if (ret) {
bch_err(c, "error creating rebalance thread: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "creating rebalance thread");
return ret;
}

View File

@ -20,6 +20,7 @@
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "lru.h"
#include "logged_ops.h"
#include "move.h"
#include "quota.h"
#include "recovery.h"
@ -164,7 +165,7 @@ static int bch2_journal_replay(struct bch_fs *c)
(!k->allocated
? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim
: 0),
bch2_journal_replay_key(&trans, k));
bch2_journal_replay_key(trans, k));
if (ret) {
bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s",
bch2_btree_ids[k->btree_id], k->level, bch2_err_str(ret));
@ -422,15 +423,9 @@ static int bch2_initialize_subvolumes(struct bch_fs *c)
root_volume.v.snapshot = cpu_to_le32(U32_MAX);
root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees,
&root_tree.k_i,
NULL, NULL, 0) ?:
bch2_btree_insert(c, BTREE_ID_snapshots,
&root_snapshot.k_i,
NULL, NULL, 0) ?:
bch2_btree_insert(c, BTREE_ID_subvolumes,
&root_volume.k_i,
NULL, NULL, 0);
ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
if (ret)
bch_err_fn(c, ret);
return ret;
@ -471,7 +466,7 @@ noinline_for_stack
static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
{
int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
__bch2_fs_upgrade_for_subvolumes(&trans));
__bch2_fs_upgrade_for_subvolumes(trans));
if (ret)
bch_err_fn(c, ret);
return ret;
@ -561,7 +556,7 @@ static void check_version_upgrade(struct bch_fs *c)
if ((recovery_passes & RECOVERY_PASS_ALL_FSCK) == RECOVERY_PASS_ALL_FSCK)
prt_str(&buf, "fsck required");
else {
prt_str(&buf, "running recovery passses: ");
prt_str(&buf, "running recovery passes: ");
prt_bitflags(&buf, bch2_recovery_passes, recovery_passes);
}
@ -1009,9 +1004,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_inode_pack(&packed_inode, &root_inode);
packed_inode.inode.k.p.snapshot = U32_MAX;
ret = bch2_btree_insert(c, BTREE_ID_inodes,
&packed_inode.inode.k_i,
NULL, NULL, 0);
ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0);
if (ret) {
bch_err_msg(c, ret, "creating root directory");
goto err;
@ -1020,7 +1013,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_inode_init_early(c, &lostfound_inode);
ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_create_trans(&trans,
bch2_create_trans(trans,
BCACHEFS_ROOT_SUBVOL_INUM,
&root_inode, &lostfound_inode,
&lostfound,

View File

@ -24,6 +24,7 @@
x(check_alloc_to_lru_refs, PASS_FSCK) \
x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \
x(bucket_gens_init, 0) \
x(resume_logged_ops, PASS_ALWAYS) \
x(check_snapshot_trees, PASS_FSCK) \
x(check_snapshots, PASS_FSCK) \
x(check_subvols, PASS_FSCK) \

View File

@ -5,9 +5,11 @@
#include "buckets.h"
#include "extents.h"
#include "inode.h"
#include "io.h"
#include "io_misc.h"
#include "io_write.h"
#include "reflink.h"
#include "subvolume.h"
#include "super-io.h"
#include <linux/sched/signal.h>
@ -89,6 +91,9 @@ void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c,
bch2_bkey_ptrs_to_text(out, c, k);
}
#if 0
Currently disabled, needs to be debugged:
bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
{
struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l);
@ -96,6 +101,7 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r);
}
#endif
int bch2_trans_mark_reflink_v(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
@ -247,7 +253,7 @@ s64 bch2_remap_range(struct bch_fs *c,
u64 remap_sectors,
u64 new_i_size, s64 *i_sectors_delta)
{
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter dst_iter, src_iter;
struct bkey_s_c src_k;
struct bkey_buf new_dst, new_src;
@ -269,11 +275,11 @@ s64 bch2_remap_range(struct bch_fs *c,
bch2_bkey_buf_init(&new_dst);
bch2_bkey_buf_init(&new_src);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
trans = bch2_trans_get(c);
bch2_trans_iter_init(&trans, &src_iter, BTREE_ID_extents, src_start,
bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start,
BTREE_ITER_INTENT);
bch2_trans_iter_init(&trans, &dst_iter, BTREE_ID_extents, dst_start,
bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start,
BTREE_ITER_INTENT);
while ((ret == 0 ||
@ -281,21 +287,21 @@ s64 bch2_remap_range(struct bch_fs *c,
bkey_lt(dst_iter.pos, dst_end)) {
struct disk_reservation disk_res = { 0 };
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
if (fatal_signal_pending(current)) {
ret = -EINTR;
break;
}
ret = bch2_subvolume_get_snapshot(&trans, src_inum.subvol,
ret = bch2_subvolume_get_snapshot(trans, src_inum.subvol,
&src_snapshot);
if (ret)
continue;
bch2_btree_iter_set_snapshot(&src_iter, src_snapshot);
ret = bch2_subvolume_get_snapshot(&trans, dst_inum.subvol,
ret = bch2_subvolume_get_snapshot(trans, dst_inum.subvol,
&dst_snapshot);
if (ret)
continue;
@ -312,7 +318,7 @@ s64 bch2_remap_range(struct bch_fs *c,
continue;
if (bkey_lt(src_want, src_iter.pos)) {
ret = bch2_fpunch_at(&trans, &dst_iter, dst_inum,
ret = bch2_fpunch_at(trans, &dst_iter, dst_inum,
min(dst_end.offset,
dst_iter.pos.offset +
src_iter.pos.offset - src_want.offset),
@ -326,7 +332,7 @@ s64 bch2_remap_range(struct bch_fs *c,
bch2_bkey_buf_reassemble(&new_src, c, src_k);
src_k = bkey_i_to_s_c(new_src.k);
ret = bch2_make_extent_indirect(&trans, &src_iter,
ret = bch2_make_extent_indirect(trans, &src_iter,
new_src.k);
if (ret)
continue;
@ -354,14 +360,14 @@ s64 bch2_remap_range(struct bch_fs *c,
min(src_k.k->p.offset - src_want.offset,
dst_end.offset - dst_iter.pos.offset));
ret = bch2_extent_update(&trans, dst_inum, &dst_iter,
ret = bch2_extent_update(trans, dst_inum, &dst_iter,
new_dst.k, &disk_res,
new_i_size, i_sectors_delta,
true);
bch2_disk_reservation_put(c, &disk_res);
}
bch2_trans_iter_exit(&trans, &dst_iter);
bch2_trans_iter_exit(&trans, &src_iter);
bch2_trans_iter_exit(trans, &dst_iter);
bch2_trans_iter_exit(trans, &src_iter);
BUG_ON(!ret && !bkey_eq(dst_iter.pos, dst_end));
BUG_ON(bkey_gt(dst_iter.pos, dst_end));
@ -373,23 +379,23 @@ s64 bch2_remap_range(struct bch_fs *c,
struct bch_inode_unpacked inode_u;
struct btree_iter inode_iter = { NULL };
bch2_trans_begin(&trans);
bch2_trans_begin(trans);
ret2 = bch2_inode_peek(&trans, &inode_iter, &inode_u,
ret2 = bch2_inode_peek(trans, &inode_iter, &inode_u,
dst_inum, BTREE_ITER_INTENT);
if (!ret2 &&
inode_u.bi_size < new_i_size) {
inode_u.bi_size = new_i_size;
ret2 = bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL, NULL,
ret2 = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
}
bch2_trans_iter_exit(&trans, &inode_iter);
bch2_trans_iter_exit(trans, &inode_iter);
} while (bch2_err_matches(ret2, BCH_ERR_transaction_restart));
bch2_trans_exit(&trans);
bch2_trans_put(trans);
bch2_bkey_buf_exit(&new_src, c);
bch2_bkey_buf_exit(&new_dst, c);

View File

@ -429,7 +429,7 @@ out:
return ret;
err:
bch_err(c, "error adding replicas entry: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "adding replicas entry");
goto out;
}

View File

@ -31,7 +31,6 @@ static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
#define SIX_LOCK_HELD_intent (1U << 26)
#define SIX_LOCK_HELD_write (1U << 27)
#define SIX_LOCK_WAITING_read (1U << (28 + SIX_LOCK_read))
#define SIX_LOCK_WAITING_intent (1U << (28 + SIX_LOCK_intent))
#define SIX_LOCK_WAITING_write (1U << (28 + SIX_LOCK_write))
#define SIX_LOCK_NOSPIN (1U << 31)

View File

@ -163,8 +163,7 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
rcu_assign_pointer(c->snapshots, new);
c->snapshot_table_size = new_size;
if (old)
kvfree_rcu(old);
kvfree_rcu_mightsleep(old);
return &rcu_dereference_protected(c->snapshots, true)->s[idx];
}
@ -344,7 +343,7 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
BTREE_ITER_WITH_UPDATES, snapshot, s);
}
int bch2_snapshot_live(struct btree_trans *trans, u32 id)
static int bch2_snapshot_live(struct btree_trans *trans, u32 id)
{
struct bch_snapshot v;
int ret;
@ -371,7 +370,7 @@ int bch2_snapshot_live(struct btree_trans *trans, u32 id)
* it's part of such a linear chain: this correctly sets equivalence classes on
* startup if we run leaf to root (i.e. in natural key order).
*/
int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
unsigned i, nr_live = 0, live_idx = 0;
@ -488,18 +487,18 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
bch2_trans_iter_exit(trans, &iter);
if (!ret && !found) {
struct bkey_i_subvolume *s;
struct bkey_i_subvolume *u;
*subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
s = bch2_bkey_get_mut_typed(trans, &iter,
u = bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_subvolumes, POS(0, *subvol_id),
0, subvolume);
ret = PTR_ERR_OR_ZERO(s);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
return ret;
SET_BCH_SUBVOLUME_SNAP(&s->v, false);
SET_BCH_SUBVOLUME_SNAP(&u->v, false);
}
return ret;
@ -591,11 +590,11 @@ int bch2_check_snapshot_trees(struct bch_fs *c)
int ret;
ret = bch2_trans_run(c,
for_each_btree_key_commit(&trans, iter,
for_each_btree_key_commit(trans, iter,
BTREE_ID_snapshot_trees, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_snapshot_tree(&trans, &iter, k)));
check_snapshot_tree(trans, &iter, k)));
if (ret)
bch_err(c, "error %i checking snapshot trees", ret);
@ -864,11 +863,11 @@ int bch2_check_snapshots(struct bch_fs *c)
* the parent's depth already be correct:
*/
ret = bch2_trans_run(c,
for_each_btree_key_reverse_commit(&trans, iter,
for_each_btree_key_reverse_commit(trans, iter,
BTREE_ID_snapshots, POS_MAX,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_snapshot(&trans, &iter, k)));
check_snapshot(trans, &iter, k)));
if (ret)
bch_err_fn(c, ret);
return ret;
@ -911,7 +910,7 @@ static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s)
swap(s->children[0], s->children[1]);
}
int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
{
struct bch_fs *c = trans->c;
struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
@ -1072,6 +1071,10 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
goto err;
new_snapids[i] = iter.pos.offset;
mutex_lock(&c->snapshot_table_lock);
snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i];
mutex_unlock(&c->snapshot_table_lock);
}
err:
bch2_trans_iter_exit(trans, &iter);
@ -1354,7 +1357,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
int bch2_delete_dead_snapshots(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_s_c_snapshot snap;
@ -1366,35 +1369,35 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
if (!test_bit(BCH_FS_STARTED, &c->flags)) {
ret = bch2_fs_read_write_early(c);
if (ret) {
bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "deleting dead snapshots: error going rw");
return ret;
}
}
bch2_trans_init(&trans, c, 0, 0);
trans = bch2_trans_get(c);
/*
* For every snapshot node: If we have no live children and it's not
* pointed to by a subvolume, delete it:
*/
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots,
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
NULL, NULL, 0,
bch2_delete_redundant_snapshot(&trans, &iter, k));
bch2_delete_redundant_snapshot(trans, &iter, k));
if (ret) {
bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "deleting redundant snapshots");
goto err;
}
for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
bch2_snapshot_set_equiv(&trans, k));
ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
bch2_snapshot_set_equiv(trans, k));
if (ret) {
bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret));
bch_err_msg(c, ret, "in bch2_snapshots_set_equiv");
goto err;
}
for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ret) {
if (k.k->type != KEY_TYPE_snapshot)
continue;
@ -1406,7 +1409,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
break;
}
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
if (ret) {
bch_err_msg(c, ret, "walking snapshots");
@ -1421,16 +1424,16 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
if (!btree_type_has_snapshots(id))
continue;
ret = for_each_btree_key_commit(&trans, iter,
ret = for_each_btree_key_commit(trans, iter,
id, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
&res, NULL, BTREE_INSERT_NOFAIL,
snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
for_each_btree_key_commit(&trans, iter,
snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
for_each_btree_key_commit(trans, iter,
id, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
&res, NULL, BTREE_INSERT_NOFAIL,
move_key_to_correct_snapshot(&trans, &iter, k));
move_key_to_correct_snapshot(trans, &iter, k));
bch2_disk_reservation_put(c, &res);
darray_exit(&equiv_seen);
@ -1441,7 +1444,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
}
}
for_each_btree_key(&trans, iter, BTREE_ID_snapshots,
for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ret) {
u32 snapshot = k.k->p.offset;
u32 equiv = bch2_snapshot_equiv(c, snapshot);
@ -1449,23 +1452,23 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
if (equiv != snapshot)
snapshot_list_add(c, &deleted_interior, snapshot);
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_iter_exit(trans, &iter);
/*
* Fixing children of deleted snapshots can't be done completely
* atomically, if we crash between here and when we delete the interior
* nodes some depth fields will be off:
*/
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots, POS_MIN,
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
BTREE_ITER_INTENT, k,
NULL, NULL, BTREE_INSERT_NOFAIL,
bch2_fix_child_of_deleted_snapshot(&trans, &iter, k, &deleted_interior));
bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
if (ret)
goto err;
darray_for_each(deleted, i) {
ret = commit_do(&trans, NULL, NULL, 0,
bch2_snapshot_node_delete(&trans, *i));
ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, *i));
if (ret) {
bch_err_msg(c, ret, "deleting snapshot %u", *i);
goto err;
@ -1473,8 +1476,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
}
darray_for_each(deleted_interior, i) {
ret = commit_do(&trans, NULL, NULL, 0,
bch2_snapshot_node_delete(&trans, *i));
ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, *i));
if (ret) {
bch_err_msg(c, ret, "deleting snapshot %u", *i);
goto err;
@ -1485,7 +1488,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
err:
darray_exit(&deleted_interior);
darray_exit(&deleted);
bch2_trans_exit(&trans);
bch2_trans_put(trans);
if (ret)
bch_err_fn(c, ret);
return ret;
@ -1618,7 +1621,8 @@ int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct bkey_buf sk;
int ret;
u32 restart_count = trans->restart_count;
int ret = 0;
bch2_bkey_buf_init(&sk);
bch2_bkey_buf_reassemble(&sk, c, k);
@ -1640,7 +1644,8 @@ int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans,
}
bch2_bkey_buf_exit(&sk, c);
return ret;
return ret ?: trans_was_restarted(trans, restart_count);
}
int bch2_snapshots_read(struct bch_fs *c)
@ -1650,11 +1655,11 @@ int bch2_snapshots_read(struct bch_fs *c)
int ret = 0;
ret = bch2_trans_run(c,
for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
bch2_snapshot_set_equiv(&trans, k)) ?:
for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
bch2_snapshot_set_equiv(trans, k)) ?:
for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
(set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
if (ret)

View File

@ -235,8 +235,6 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot *s);
int bch2_snapshot_get_subvol(struct btree_trans *, u32,
struct bch_subvolume *);
int bch2_snapshot_live(struct btree_trans *trans, u32 id);
int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k);
/* only exported for tests: */
int bch2_snapshot_node_create(struct btree_trans *, u32,

View File

@ -41,8 +41,7 @@ static int check_subvol(struct btree_trans *trans,
ret = bch2_subvolume_delete(trans, iter->pos.offset);
if (ret)
bch_err(c, "error deleting subvolume %llu: %s",
iter->pos.offset, bch2_err_str(ret));
bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
return ret ?: -BCH_ERR_transaction_restart_nested;
}
@ -87,10 +86,10 @@ int bch2_check_subvols(struct bch_fs *c)
int ret;
ret = bch2_trans_run(c,
for_each_btree_key_commit(&trans, iter,
for_each_btree_key_commit(trans, iter,
BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_subvol(&trans, &iter, k)));
check_subvol(trans, &iter, k)));
if (ret)
bch_err_fn(c, ret);
return ret;
@ -99,7 +98,7 @@ int bch2_check_subvols(struct bch_fs *c)
/* Subvolumes: */
int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k,
unsigned flags, struct printbuf *err)
enum bkey_invalid_flags flags, struct printbuf *err)
{
if (bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
bkey_gt(k.k->p, SUBVOL_POS_MAX)) {
@ -294,9 +293,9 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor
bch2_evict_subvolume_inodes(c, &s);
for (id = s.data; id < s.data + s.nr; id++) {
ret = bch2_trans_run(c, bch2_subvolume_delete(&trans, *id));
ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id));
if (ret) {
bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret));
bch_err_msg(c, ret, "deleting subvolume %u", *id);
break;
}
}

View File

@ -10,7 +10,7 @@ enum bkey_invalid_flags;
int bch2_check_subvols(struct bch_fs *);
int bch2_subvolume_invalid(const struct bch_fs *, struct bkey_s_c,
unsigned, struct printbuf *);
enum bkey_invalid_flags, struct printbuf *);
void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_subvolume ((struct bkey_ops) { \

View File

@ -6,7 +6,6 @@
#include "disk_groups.h"
#include "ec.h"
#include "error.h"
#include "io.h"
#include "journal.h"
#include "journal_sb.h"
#include "journal_seq_blacklist.h"
@ -23,6 +22,9 @@
#include <linux/backing-dev.h>
#include <linux/sort.h>
static const struct blk_holder_ops bch2_sb_handle_bdev_ops = {
};
struct bch2_metadata_version {
u16 version;
const char *name;
@ -161,7 +163,8 @@ void bch2_free_super(struct bch_sb_handle *sb)
{
kfree(sb->bio);
if (!IS_ERR_OR_NULL(sb->bdev))
blkdev_put(sb->bdev, sb->mode);
blkdev_put(sb->bdev, sb->holder);
kfree(sb->holder);
kfree(sb->sb);
memset(sb, 0, sizeof(*sb));
@ -182,7 +185,7 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
if (sb->sb && sb->buffer_size >= new_buffer_size)
return 0;
if (sb->have_layout) {
if (sb->sb && sb->have_layout) {
u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
if (new_bytes > max_bytes) {
@ -243,9 +246,9 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
/* XXX: we're not checking that offline device have enough space */
for_each_online_member(ca, c, i) {
struct bch_sb_handle *sb = &ca->disk_sb;
struct bch_sb_handle *dev_sb = &ca->disk_sb;
if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) {
percpu_ref_put(&ca->ref);
return NULL;
}
@ -381,7 +384,7 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out,
}
if (bch2_is_zero(sb->uuid.b, sizeof(sb->uuid))) {
prt_printf(out, "Bad intenal UUID (got zeroes)");
prt_printf(out, "Bad internal UUID (got zeroes)");
return -BCH_ERR_invalid_sb_uuid;
}
@ -664,27 +667,30 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
retry:
#endif
memset(sb, 0, sizeof(*sb));
sb->mode = FMODE_READ;
sb->mode = BLK_OPEN_READ;
sb->have_bio = true;
sb->holder = kmalloc(1, GFP_KERNEL);
if (!sb->holder)
return -ENOMEM;
#ifndef __KERNEL__
if (opt_get(*opts, direct_io) == false)
sb->mode |= FMODE_BUFFERED;
sb->mode |= BLK_OPEN_BUFFERED;
#endif
if (!opt_get(*opts, noexcl))
sb->mode |= FMODE_EXCL;
sb->mode |= BLK_OPEN_EXCL;
if (!opt_get(*opts, nochanges))
sb->mode |= FMODE_WRITE;
sb->mode |= BLK_OPEN_WRITE;
sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
if (IS_ERR(sb->bdev) &&
PTR_ERR(sb->bdev) == -EACCES &&
opt_get(*opts, read_only)) {
sb->mode &= ~FMODE_WRITE;
sb->mode &= ~BLK_OPEN_WRITE;
sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
if (!IS_ERR(sb->bdev))
opt_set(*opts, nochanges, true);
}

Some files were not shown because too many files have changed in this diff Show More