mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 14ce2a2031 bcachefs: fixes for building in userspace
This commit is contained in:
parent
8acc54456e
commit
1cf4d51dc4
@ -1 +1 @@
|
||||
e57b5958cf4e8530d26f7c36a6e1427fb284cc70
|
||||
14ce2a2031f3761a4b957aa2e5aac446ce18b87c
|
||||
|
@ -293,11 +293,11 @@ int cmd_list(int argc, char *argv[])
|
||||
list_modes, "list mode");
|
||||
break;
|
||||
case 'f':
|
||||
opts.fix_errors = FSCK_ERR_YES;
|
||||
opts.norecovery = false;
|
||||
opt_set(opts, fix_errors, FSCK_OPT_YES);
|
||||
opt_set(opts, norecovery, false);
|
||||
break;
|
||||
case 'v':
|
||||
opts.verbose_recovery = true;
|
||||
opt_set(opts, verbose_recovery, true);
|
||||
break;
|
||||
case 'h':
|
||||
list_keys_usage();
|
||||
|
@ -28,18 +28,19 @@ int cmd_fsck(int argc, char *argv[])
|
||||
int opt;
|
||||
|
||||
opt_set(opts, degraded, true);
|
||||
opt_set(opts, fix_errors, FSCK_OPT_ASK);
|
||||
|
||||
while ((opt = getopt(argc, argv, "pynfvh")) != -1)
|
||||
switch (opt) {
|
||||
case 'p':
|
||||
opt_set(opts, fix_errors, FSCK_ERR_YES);
|
||||
opt_set(opts, fix_errors, FSCK_OPT_YES);
|
||||
break;
|
||||
case 'y':
|
||||
opt_set(opts, fix_errors, FSCK_ERR_YES);
|
||||
opt_set(opts, fix_errors, FSCK_OPT_YES);
|
||||
break;
|
||||
case 'n':
|
||||
opt_set(opts, nochanges, true);
|
||||
opt_set(opts, fix_errors, FSCK_ERR_NO);
|
||||
opt_set(opts, fix_errors, FSCK_OPT_NO);
|
||||
break;
|
||||
case 'f':
|
||||
/* force check, even if filesystem marked clean: */
|
||||
|
@ -164,7 +164,7 @@ static struct bch_inode_unpacked create_file(struct bch_fs *c,
|
||||
struct bch_inode_unpacked new_inode;
|
||||
int ret;
|
||||
|
||||
bch2_inode_init(c, &new_inode, uid, gid, mode, rdev);
|
||||
bch2_inode_init(c, &new_inode, uid, gid, mode, rdev, parent);
|
||||
|
||||
ret = bch2_inode_create(c, &new_inode, BLOCKDEV_INODE_MAX, 0,
|
||||
&c->unused_inode_hint);
|
||||
@ -247,7 +247,6 @@ static void write_data(struct bch_fs *c,
|
||||
struct bch_inode_unpacked *dst_inode,
|
||||
u64 dst_offset, void *buf, size_t len)
|
||||
{
|
||||
struct disk_reservation res;
|
||||
struct bch_write_op op;
|
||||
struct bio_vec bv;
|
||||
struct closure cl;
|
||||
@ -261,12 +260,15 @@ static void write_data(struct bch_fs *c,
|
||||
op.wbio.bio.bi_iter.bi_size = len;
|
||||
bch2_bio_map(&op.wbio.bio, buf);
|
||||
|
||||
int ret = bch2_disk_reservation_get(c, &res, len >> 9, 0);
|
||||
bch2_write_op_init(&op, c);
|
||||
|
||||
op.write_point = writepoint_hashed(0);
|
||||
op.pos = POS(dst_inode->bi_inum, dst_offset >> 9);
|
||||
|
||||
int ret = bch2_disk_reservation_get(c, &op.res, len >> 9, 0);
|
||||
if (ret)
|
||||
die("error reserving space in new filesystem: %s", strerror(-ret));
|
||||
|
||||
bch2_write_op_init(&op, c, res, NULL, writepoint_hashed(0),
|
||||
POS(dst_inode->bi_inum, dst_offset >> 9), NULL, 0);
|
||||
closure_call(&op.cl, bch2_write, NULL, &cl);
|
||||
closure_sync(&cl);
|
||||
|
||||
|
@ -243,7 +243,8 @@ static inline void bioset_free(struct bio_set *bs)
|
||||
|
||||
static inline int bioset_init(struct bio_set *bs,
|
||||
unsigned pool_size,
|
||||
unsigned front_pad)
|
||||
unsigned front_pad,
|
||||
int flags)
|
||||
{
|
||||
bs->front_pad = front_pad;
|
||||
return 0;
|
||||
@ -251,6 +252,10 @@ static inline int bioset_init(struct bio_set *bs,
|
||||
|
||||
extern struct bio_set *bioset_create(unsigned int, unsigned int);
|
||||
extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
|
||||
enum {
|
||||
BIOSET_NEED_BVECS = 1 << 0,
|
||||
BIOSET_NEED_RESCUER = 1 << 1,
|
||||
};
|
||||
|
||||
extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
|
||||
extern void bio_put(struct bio *);
|
||||
@ -271,13 +276,6 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
|
||||
}
|
||||
|
||||
extern void bio_endio(struct bio *);
|
||||
extern void bio_endio_nodec(struct bio *);
|
||||
|
||||
static inline void bio_io_error(struct bio *bio)
|
||||
{
|
||||
bio->bi_error = -EIO;
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
extern void bio_advance(struct bio *, unsigned);
|
||||
|
||||
|
@ -13,7 +13,27 @@ struct bio_set;
|
||||
struct bio;
|
||||
struct block_device;
|
||||
typedef void (bio_end_io_t) (struct bio *);
|
||||
typedef void (bio_destructor_t) (struct bio *);
|
||||
|
||||
/*
|
||||
* Block error status values. See block/blk-core:blk_errors for the details.
|
||||
*/
|
||||
typedef u8 __bitwise blk_status_t;
|
||||
#define BLK_STS_OK 0
|
||||
#define BLK_STS_NOTSUPP ((__force blk_status_t)1)
|
||||
#define BLK_STS_TIMEOUT ((__force blk_status_t)2)
|
||||
#define BLK_STS_NOSPC ((__force blk_status_t)3)
|
||||
#define BLK_STS_TRANSPORT ((__force blk_status_t)4)
|
||||
#define BLK_STS_TARGET ((__force blk_status_t)5)
|
||||
#define BLK_STS_NEXUS ((__force blk_status_t)6)
|
||||
#define BLK_STS_MEDIUM ((__force blk_status_t)7)
|
||||
#define BLK_STS_PROTECTION ((__force blk_status_t)8)
|
||||
#define BLK_STS_RESOURCE ((__force blk_status_t)9)
|
||||
#define BLK_STS_IOERR ((__force blk_status_t)10)
|
||||
|
||||
/* hack for device mapper, don't use elsewhere: */
|
||||
#define BLK_STS_DM_REQUEUE ((__force blk_status_t)11)
|
||||
|
||||
#define BLK_STS_AGAIN ((__force blk_status_t)12)
|
||||
|
||||
/*
|
||||
* main unit of I/O for the block layer and lower layers (ie drivers and
|
||||
@ -22,7 +42,7 @@ typedef void (bio_destructor_t) (struct bio *);
|
||||
struct bio {
|
||||
struct bio *bi_next; /* request queue link */
|
||||
struct block_device *bi_bdev;
|
||||
int bi_error;
|
||||
blk_status_t bi_status;
|
||||
unsigned int bi_opf; /* bottom bits req flags,
|
||||
* top bits REQ_OP. Use
|
||||
* accessors.
|
||||
|
@ -197,5 +197,8 @@ static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
|
||||
|
||||
#define capable(cap) true
|
||||
|
||||
int blk_status_to_errno(blk_status_t status);
|
||||
blk_status_t errno_to_blk_status(int errno);
|
||||
|
||||
#endif /* __TOOLS_LINUX_BLKDEV_H */
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
#define BUG() do { assert(0); unreachable(); } while (0)
|
||||
#define BUG_ON(cond) assert(!(cond))
|
||||
|
||||
#define WARN_ON_ONCE(cond) assert(!(cond))
|
||||
#define WARN_ON_ONCE(cond) ({ bool _r = (cond); if (_r) assert(0); _r; })
|
||||
#define WARN_ONCE(cond, msg) ({ bool _r = (cond); if (_r) assert(0); _r; })
|
||||
|
||||
#define __WARN() assert(0)
|
||||
|
@ -204,4 +204,19 @@ static __always_inline void timespec64_add_ns(struct timespec64 *a, u64 ns)
|
||||
extern struct timespec64 timespec64_add_safe(const struct timespec64 lhs,
|
||||
const struct timespec64 rhs);
|
||||
|
||||
static inline struct timespec timespec_trunc(struct timespec t, unsigned gran)
|
||||
{
|
||||
/* Avoid division in the common cases 1 ns and 1 s. */
|
||||
if (gran == 1) {
|
||||
/* nothing */
|
||||
} else if (gran == NSEC_PER_SEC) {
|
||||
t.tv_nsec = 0;
|
||||
} else if (gran > 1 && gran < NSEC_PER_SEC) {
|
||||
t.tv_nsec -= t.tv_nsec % gran;
|
||||
} else {
|
||||
WARN(1, "illegal file time granularity: %u", gran);
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_TIME64_H */
|
||||
|
@ -193,8 +193,7 @@ int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
else {
|
||||
inode->v.i_ctime =
|
||||
current_fs_time(inode->v.i_sb);
|
||||
inode->v.i_ctime = current_time(&inode->v);
|
||||
mark_inode_dirty(&inode->v);
|
||||
if (ret == 0)
|
||||
acl = NULL;
|
||||
|
@ -257,7 +257,7 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
|
||||
return;
|
||||
|
||||
a = bkey_s_c_to_alloc(k);
|
||||
ca = c->devs[a.k->p.inode];
|
||||
ca = bch_dev_bkey_exists(c, a.k->p.inode);
|
||||
|
||||
if (a.k->p.offset >= ca->mi.nbuckets)
|
||||
return;
|
||||
@ -305,10 +305,12 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
|
||||
bch2_alloc_read_key(c, bkey_i_to_s_c(k));
|
||||
}
|
||||
|
||||
mutex_lock(&c->bucket_lock);
|
||||
for_each_member_device(ca, c, i) {
|
||||
bch2_recalc_min_prio(c, ca, READ);
|
||||
bch2_recalc_min_prio(c, ca, WRITE);
|
||||
}
|
||||
mutex_unlock(&c->bucket_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -368,7 +370,7 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos)
|
||||
if (pos.inode >= c->sb.nr_devices || !c->devs[pos.inode])
|
||||
return 0;
|
||||
|
||||
ca = c->devs[pos.inode];
|
||||
ca = bch_dev_bkey_exists(c, pos.inode);
|
||||
|
||||
if (pos.offset >= ca->mi.nbuckets)
|
||||
return 0;
|
||||
@ -461,7 +463,7 @@ static void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
/* Bucket heap / gen */
|
||||
|
||||
void bch2_recalc_min_prio(struct bch_fs *c, struct bch_dev *ca, int rw)
|
||||
static void bch2_recalc_min_prio(struct bch_fs *c, struct bch_dev *ca, int rw)
|
||||
{
|
||||
struct prio_clock *clock = &c->prio_clock[rw];
|
||||
struct bucket *g;
|
||||
@ -975,7 +977,7 @@ static int bch2_allocator_thread(void *arg)
|
||||
|
||||
void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
{
|
||||
struct bch_dev *ca = c->devs[ob->ptr.dev];
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
|
||||
|
||||
spin_lock(&ob->lock);
|
||||
bch2_mark_alloc_bucket(c, ca, PTR_BUCKET(ca, &ob->ptr), false,
|
||||
@ -1303,7 +1305,7 @@ static void writepoint_drop_ptrs(struct bch_fs *c,
|
||||
|
||||
for (i = wp->nr_ptrs - 1; i >= 0; --i) {
|
||||
struct open_bucket *ob = wp->ptrs[i];
|
||||
struct bch_dev *ca = c->devs[ob->ptr.dev];
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
|
||||
|
||||
if (nr_ptrs_dislike && !test_bit(ob->ptr.dev, devs->d)) {
|
||||
BUG_ON(ca->open_buckets_partial_nr >=
|
||||
@ -1331,7 +1333,7 @@ static void verify_not_stale(struct bch_fs *c, const struct write_point *wp)
|
||||
unsigned i;
|
||||
|
||||
writepoint_for_each_ptr(wp, ob, i) {
|
||||
struct bch_dev *ca = c->devs[ob->ptr.dev];
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
|
||||
|
||||
BUG_ON(ptr_stale(ca, &ob->ptr));
|
||||
}
|
||||
@ -1537,7 +1539,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
|
||||
|
||||
for (i = 0; i < wp->nr_ptrs_can_use; i++) {
|
||||
struct open_bucket *ob = wp->ptrs[i];
|
||||
struct bch_dev *ca = c->devs[ob->ptr.dev];
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
|
||||
struct bch_extent_ptr tmp = ob->ptr;
|
||||
|
||||
EBUG_ON(bch2_extent_has_device(extent_i_to_s_c(e), ob->ptr.dev));
|
||||
@ -1589,7 +1591,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
ra_pages += bdi->ra_pages;
|
||||
}
|
||||
|
||||
c->bdi.ra_pages = ra_pages;
|
||||
bch2_set_ra_pages(c, ra_pages);
|
||||
|
||||
/* Find fastest, slowest tiers with devices: */
|
||||
|
||||
|
@ -326,9 +326,9 @@ struct io_count {
|
||||
struct bch_dev {
|
||||
struct kobject kobj;
|
||||
struct percpu_ref ref;
|
||||
struct completion ref_completion;
|
||||
struct percpu_ref io_ref;
|
||||
struct completion stop_complete;
|
||||
struct completion offline_complete;
|
||||
struct completion io_ref_completion;
|
||||
|
||||
struct bch_fs *fs;
|
||||
|
||||
@ -515,12 +515,11 @@ struct bch_fs {
|
||||
struct closure sb_write;
|
||||
struct mutex sb_lock;
|
||||
|
||||
struct backing_dev_info bdi;
|
||||
|
||||
/* BTREE CACHE */
|
||||
struct bio_set btree_read_bio;
|
||||
|
||||
struct btree_root btree_roots[BTREE_ID_NR];
|
||||
bool btree_roots_dirty;
|
||||
struct mutex btree_root_lock;
|
||||
|
||||
struct btree_cache btree_cache;
|
||||
@ -710,6 +709,14 @@ struct bch_fs {
|
||||
#undef BCH_TIME_STAT
|
||||
};
|
||||
|
||||
static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
|
||||
{
|
||||
#ifndef NO_BCACHEFS_FS
|
||||
if (c->vfs_sb)
|
||||
c->vfs_sb->s_bdi->ra_pages = ra_pages;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool bch2_fs_running(struct bch_fs *c)
|
||||
{
|
||||
return c->state == BCH_FS_RO || c->state == BCH_FS_RW;
|
||||
|
@ -593,18 +593,24 @@ struct bch_inode_generation {
|
||||
} __attribute__((packed, aligned(8)));
|
||||
BKEY_VAL_TYPE(inode_generation, BCH_INODE_GENERATION);
|
||||
|
||||
#define BCH_INODE_FIELDS() \
|
||||
BCH_INODE_FIELD(bi_atime, 64) \
|
||||
BCH_INODE_FIELD(bi_ctime, 64) \
|
||||
BCH_INODE_FIELD(bi_mtime, 64) \
|
||||
BCH_INODE_FIELD(bi_otime, 64) \
|
||||
BCH_INODE_FIELD(bi_size, 64) \
|
||||
BCH_INODE_FIELD(bi_sectors, 64) \
|
||||
BCH_INODE_FIELD(bi_uid, 32) \
|
||||
BCH_INODE_FIELD(bi_gid, 32) \
|
||||
BCH_INODE_FIELD(bi_nlink, 32) \
|
||||
BCH_INODE_FIELD(bi_generation, 32) \
|
||||
BCH_INODE_FIELD(bi_dev, 32)
|
||||
#define BCH_INODE_FIELDS() \
|
||||
BCH_INODE_FIELD(bi_atime, 64) \
|
||||
BCH_INODE_FIELD(bi_ctime, 64) \
|
||||
BCH_INODE_FIELD(bi_mtime, 64) \
|
||||
BCH_INODE_FIELD(bi_otime, 64) \
|
||||
BCH_INODE_FIELD(bi_size, 64) \
|
||||
BCH_INODE_FIELD(bi_sectors, 64) \
|
||||
BCH_INODE_FIELD(bi_uid, 32) \
|
||||
BCH_INODE_FIELD(bi_gid, 32) \
|
||||
BCH_INODE_FIELD(bi_nlink, 32) \
|
||||
BCH_INODE_FIELD(bi_generation, 32) \
|
||||
BCH_INODE_FIELD(bi_dev, 32) \
|
||||
BCH_INODE_FIELD(bi_data_checksum, 8) \
|
||||
BCH_INODE_FIELD(bi_compression, 8)
|
||||
|
||||
#define BCH_INODE_FIELDS_INHERIT() \
|
||||
BCH_INODE_FIELD(bi_data_checksum) \
|
||||
BCH_INODE_FIELD(bi_compression)
|
||||
|
||||
enum {
|
||||
/*
|
||||
@ -794,7 +800,7 @@ struct bch_sb_layout {
|
||||
__u8 sb_max_size_bits; /* base 2 of 512 byte sectors */
|
||||
__u8 nr_superblocks;
|
||||
__u8 pad[5];
|
||||
__u64 sb_offset[61];
|
||||
__le64 sb_offset[61];
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
#define BCH_SB_LAYOUT_SECTOR 7
|
||||
@ -1089,6 +1095,11 @@ struct jset_entry {
|
||||
};
|
||||
};
|
||||
|
||||
struct jset_entry_blacklist {
|
||||
struct jset_entry entry;
|
||||
__le64 seq;
|
||||
};
|
||||
|
||||
#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
|
||||
|
||||
enum {
|
||||
|
@ -1,6 +1,7 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "bkey.h"
|
||||
#include "bkey_methods.h"
|
||||
#include "bset.h"
|
||||
#include "util.h"
|
||||
|
||||
@ -80,37 +81,6 @@ static inline void bch2_bkey_pack_verify(const struct bkey_packed *packed,
|
||||
const struct bkey_format *format) {}
|
||||
#endif
|
||||
|
||||
int bch2_bkey_to_text(char *buf, size_t size, const struct bkey *k)
|
||||
{
|
||||
char *out = buf, *end = buf + size;
|
||||
|
||||
#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
|
||||
|
||||
p("u64s %u type %u %llu:%llu snap %u len %u ver %llu",
|
||||
k->u64s, k->type, k->p.inode, k->p.offset,
|
||||
k->p.snapshot, k->size, k->version.lo);
|
||||
|
||||
BUG_ON(bkey_packed(k));
|
||||
|
||||
switch (k->type) {
|
||||
case KEY_TYPE_DELETED:
|
||||
p(" deleted");
|
||||
break;
|
||||
case KEY_TYPE_DISCARD:
|
||||
p(" discard");
|
||||
break;
|
||||
case KEY_TYPE_ERROR:
|
||||
p(" error");
|
||||
break;
|
||||
case KEY_TYPE_COOKIE:
|
||||
p(" cookie");
|
||||
break;
|
||||
}
|
||||
#undef p
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
struct pack_state {
|
||||
const struct bkey_format *format;
|
||||
unsigned bits; /* bits remaining in current word */
|
||||
@ -336,7 +306,8 @@ bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
|
||||
* Extents - we have to guarantee that if an extent is packed, a trimmed
|
||||
* version will also pack:
|
||||
*/
|
||||
if (bkey_start_offset(in) < format->field_offset[BKEY_FIELD_OFFSET])
|
||||
if (bkey_start_offset(in) <
|
||||
le64_to_cpu(format->field_offset[BKEY_FIELD_OFFSET]))
|
||||
return false;
|
||||
|
||||
pack_state_finish(&state, out);
|
||||
@ -800,7 +771,7 @@ static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
|
||||
bool *eax_zeroed)
|
||||
{
|
||||
unsigned bits = format->bits_per_field[field];
|
||||
u64 offset = format->field_offset[field];
|
||||
u64 offset = le64_to_cpu(format->field_offset[field]);
|
||||
unsigned i, byte, bit_offset, align, shl, shr;
|
||||
|
||||
if (!bits && !offset) {
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include "vstructs.h"
|
||||
|
||||
void bch2_to_binary(char *, const u64 *, unsigned);
|
||||
int bch2_bkey_to_text(char *, size_t, const struct bkey *);
|
||||
|
||||
#define BKEY_PADDED(key) __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
|
||||
|
||||
@ -377,7 +376,8 @@ static inline u64 bkey_field_max(const struct bkey_format *f,
|
||||
enum bch_bkey_fields nr)
|
||||
{
|
||||
return f->bits_per_field[nr] < 64
|
||||
? f->field_offset[nr] + ~(~0ULL << f->bits_per_field[nr])
|
||||
? (le64_to_cpu(f->field_offset[nr]) +
|
||||
~(~0ULL << f->bits_per_field[nr]))
|
||||
: U64_MAX;
|
||||
}
|
||||
|
||||
|
@ -18,28 +18,11 @@ const struct bkey_ops *bch2_bkey_ops[] = {
|
||||
[BKEY_TYPE_BTREE] = &bch2_bkey_btree_ops,
|
||||
};
|
||||
|
||||
/* Returns string indicating reason for being invalid, or NULL if valid: */
|
||||
const char *bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type,
|
||||
struct bkey_s_c k)
|
||||
const char *bch2_bkey_val_invalid(struct bch_fs *c, enum bkey_type type,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||
|
||||
if (k.k->u64s < BKEY_U64s)
|
||||
return "u64s too small";
|
||||
|
||||
if (!ops->is_extents) {
|
||||
if (k.k->size)
|
||||
return "nonzero size field";
|
||||
} else {
|
||||
if ((k.k->size == 0) != bkey_deleted(k.k))
|
||||
return "bad size field";
|
||||
}
|
||||
|
||||
if (ops->is_extents &&
|
||||
!k.k->size &&
|
||||
!bkey_deleted(k.k))
|
||||
return "zero size field";
|
||||
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_DELETED:
|
||||
case KEY_TYPE_DISCARD:
|
||||
@ -63,8 +46,41 @@ const char *bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type,
|
||||
}
|
||||
}
|
||||
|
||||
const char *bch2_btree_bkey_invalid(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_s_c k)
|
||||
const char *__bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||
|
||||
if (k.k->u64s < BKEY_U64s)
|
||||
return "u64s too small";
|
||||
|
||||
if (!ops->is_extents) {
|
||||
if (k.k->size)
|
||||
return "nonzero size field";
|
||||
} else {
|
||||
if ((k.k->size == 0) != bkey_deleted(k.k))
|
||||
return "bad size field";
|
||||
}
|
||||
|
||||
if (ops->is_extents &&
|
||||
!k.k->size &&
|
||||
!bkey_deleted(k.k))
|
||||
return "zero size field";
|
||||
|
||||
if (k.k->p.snapshot)
|
||||
return "nonzero snapshot";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
return __bch2_bkey_invalid(c, type, k) ?:
|
||||
bch2_bkey_val_invalid(c, type, k);
|
||||
}
|
||||
|
||||
const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
|
||||
{
|
||||
if (bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0)
|
||||
return "key before start of btree node";
|
||||
@ -72,10 +88,7 @@ const char *bch2_btree_bkey_invalid(struct bch_fs *c, struct btree *b,
|
||||
if (bkey_cmp(k.k->p, b->data->max_key) > 0)
|
||||
return "key past end of btree node";
|
||||
|
||||
if (k.k->p.snapshot)
|
||||
return "nonzero snapshot";
|
||||
|
||||
return bch2_bkey_invalid(c, btree_node_type(b), k);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
|
||||
@ -86,7 +99,8 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
|
||||
|
||||
BUG_ON(!k.k->u64s);
|
||||
|
||||
invalid = bch2_btree_bkey_invalid(c, b, k);
|
||||
invalid = bch2_bkey_invalid(c, type, k) ?:
|
||||
bch2_bkey_in_btree_node(b, k);
|
||||
if (invalid) {
|
||||
char buf[160];
|
||||
|
||||
@ -100,33 +114,62 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
|
||||
ops->key_debugcheck(c, b, k);
|
||||
}
|
||||
|
||||
char *bch2_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||
char *buf, size_t size, struct bkey_s_c k)
|
||||
#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
|
||||
|
||||
int bch2_bkey_to_text(char *buf, size_t size, const struct bkey *k)
|
||||
{
|
||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||
char *out = buf, *end = buf + size;
|
||||
|
||||
if (k.k->type >= KEY_TYPE_GENERIC_NR &&
|
||||
ops->val_to_text)
|
||||
ops->val_to_text(c, buf, size, k);
|
||||
p("u64s %u type %u ", k->u64s, k->type);
|
||||
|
||||
return buf;
|
||||
if (bkey_cmp(k->p, POS_MAX))
|
||||
p("%llu:%llu", k->p.inode, k->p.offset);
|
||||
else
|
||||
p("POS_MAX");
|
||||
|
||||
p(" snap %u len %u ver %llu", k->p.snapshot, k->size, k->version.lo);
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
char *bch2_bkey_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||
char *buf, size_t size, struct bkey_s_c k)
|
||||
int bch2_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||
char *buf, size_t size, struct bkey_s_c k)
|
||||
{
|
||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||
char *out = buf, *end = buf + size;
|
||||
|
||||
out += bch2_bkey_to_text(out, end - out, k.k);
|
||||
|
||||
if (k.k->type >= KEY_TYPE_GENERIC_NR &&
|
||||
ops->val_to_text) {
|
||||
out += scnprintf(out, end - out, ": ");
|
||||
ops->val_to_text(c, out, end - out, k);
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_DELETED:
|
||||
p(" deleted");
|
||||
break;
|
||||
case KEY_TYPE_DISCARD:
|
||||
p(" discard");
|
||||
break;
|
||||
case KEY_TYPE_ERROR:
|
||||
p(" error");
|
||||
break;
|
||||
case KEY_TYPE_COOKIE:
|
||||
p(" cookie");
|
||||
break;
|
||||
default:
|
||||
if (k.k->type >= KEY_TYPE_GENERIC_NR && ops->val_to_text)
|
||||
ops->val_to_text(c, buf, size, k);
|
||||
break;
|
||||
}
|
||||
|
||||
return buf;
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
int bch2_bkey_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||
char *buf, size_t size, struct bkey_s_c k)
|
||||
{
|
||||
char *out = buf, *end = buf + size;
|
||||
|
||||
out += bch2_bkey_to_text(out, end - out, k.k);
|
||||
out += scnprintf(out, end - out, ": ");
|
||||
out += bch2_val_to_text(c, type, out, end - out, k);
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
void bch2_bkey_swab(enum bkey_type type,
|
||||
|
@ -64,15 +64,19 @@ struct bkey_ops {
|
||||
bool is_extents;
|
||||
};
|
||||
|
||||
const char *bch2_bkey_val_invalid(struct bch_fs *, enum bkey_type,
|
||||
struct bkey_s_c);
|
||||
const char *__bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c);
|
||||
const char *bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c);
|
||||
const char *bch2_btree_bkey_invalid(struct bch_fs *, struct btree *,
|
||||
struct bkey_s_c);
|
||||
const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c);
|
||||
|
||||
void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
|
||||
char *bch2_val_to_text(struct bch_fs *, enum bkey_type,
|
||||
char *, size_t, struct bkey_s_c);
|
||||
char *bch2_bkey_val_to_text(struct bch_fs *, enum bkey_type,
|
||||
char *, size_t, struct bkey_s_c);
|
||||
|
||||
int bch2_bkey_to_text(char *, size_t, const struct bkey *);
|
||||
int bch2_val_to_text(struct bch_fs *, enum bkey_type,
|
||||
char *, size_t, struct bkey_s_c);
|
||||
int bch2_bkey_val_to_text(struct bch_fs *, enum bkey_type,
|
||||
char *, size_t, struct bkey_s_c);
|
||||
|
||||
void bch2_bkey_swab(enum bkey_type, const struct bkey_format *,
|
||||
struct bkey_packed *);
|
||||
|
@ -96,7 +96,7 @@ u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
|
||||
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
|
||||
|
||||
extent_for_each_ptr(e, ptr) {
|
||||
struct bch_dev *ca = c->devs[ptr->dev];
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
size_t b = PTR_BUCKET_NR(ca, ptr);
|
||||
|
||||
if (gen_after(ca->oldest_gens[b], ptr->gen))
|
||||
@ -159,14 +159,15 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
|
||||
if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
||||
(!c->opts.nofsck &&
|
||||
fsck_err_on(!bch2_sb_has_replicas(c, e, data_type), c,
|
||||
"superblock not marked as containing replicas"))) {
|
||||
"superblock not marked as containing replicas (type %u)",
|
||||
data_type))) {
|
||||
ret = bch2_check_mark_super(c, e, data_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
extent_for_each_ptr(e, ptr) {
|
||||
struct bch_dev *ca = c->devs[ptr->dev];
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g = PTR_BUCKET(ca, ptr);
|
||||
|
||||
if (mustfix_fsck_err_on(!g->mark.gen_valid, c,
|
||||
@ -315,14 +316,14 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
for (i = 0; i < layout->nr_superblocks; i++) {
|
||||
if (layout->sb_offset[i] == BCH_SB_SECTOR)
|
||||
u64 offset = le64_to_cpu(layout->sb_offset[i]);
|
||||
|
||||
if (offset == BCH_SB_SECTOR)
|
||||
mark_metadata_sectors(c, ca, 0, BCH_SB_SECTOR,
|
||||
BUCKET_SB, flags);
|
||||
|
||||
mark_metadata_sectors(c, ca,
|
||||
layout->sb_offset[i],
|
||||
layout->sb_offset[i] +
|
||||
(1 << layout->sb_max_size_bits),
|
||||
mark_metadata_sectors(c, ca, offset,
|
||||
offset + (1 << layout->sb_max_size_bits),
|
||||
BUCKET_SB, flags);
|
||||
}
|
||||
|
||||
@ -414,7 +415,7 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
|
||||
spin_lock(&ob->lock);
|
||||
if (ob->valid) {
|
||||
gc_pos_set(c, gc_pos_alloc(c, ob));
|
||||
ca = c->devs[ob->ptr.dev];
|
||||
ca = bch_dev_bkey_exists(c, ob->ptr.dev);
|
||||
bch2_mark_alloc_bucket(c, ca, PTR_BUCKET(ca, &ob->ptr), true,
|
||||
gc_pos_alloc(c, ob),
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
@ -424,7 +425,7 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_gc_start(struct bch_fs *c)
|
||||
static void bch2_gc_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
struct bucket *g;
|
||||
|
@ -556,7 +556,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
struct bset_tree *t;
|
||||
struct bset *start_bset = bset(b, &b->set[start_idx]);
|
||||
bool used_mempool = false;
|
||||
u64 start_time;
|
||||
u64 start_time, seq = 0;
|
||||
unsigned i, u64s = 0, order, shift = end_idx - start_idx - 1;
|
||||
bool sorting_entire_node = start_idx == 0 &&
|
||||
end_idx == b->nsets;
|
||||
@ -595,12 +595,9 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
bch2_time_stats_update(&c->btree_sort_time, start_time);
|
||||
|
||||
/* Make sure we preserve bset journal_seq: */
|
||||
for (t = b->set + start_idx + 1;
|
||||
t < b->set + end_idx;
|
||||
t++)
|
||||
start_bset->journal_seq =
|
||||
max(start_bset->journal_seq,
|
||||
bset(b, t)->journal_seq);
|
||||
for (t = b->set + start_idx; t < b->set + end_idx; t++)
|
||||
seq = max(seq, le64_to_cpu(bset(b, t)->journal_seq));
|
||||
start_bset->journal_seq = cpu_to_le64(seq);
|
||||
|
||||
if (sorting_entire_node) {
|
||||
unsigned u64s = le16_to_cpu(out->keys.u64s);
|
||||
@ -958,6 +955,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
|
||||
{
|
||||
struct bkey_packed *k, *prev = NULL;
|
||||
struct bpos prev_pos = POS_MIN;
|
||||
enum bkey_type type = btree_node_type(b);
|
||||
bool seen_non_whiteout = false;
|
||||
const char *err;
|
||||
int ret = 0;
|
||||
@ -1025,7 +1023,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
|
||||
|
||||
if (!BSET_SEPARATE_WHITEOUTS(i)) {
|
||||
seen_non_whiteout = true;
|
||||
whiteout_u64s = 0;
|
||||
*whiteout_u64s = 0;
|
||||
}
|
||||
|
||||
for (k = i->start;
|
||||
@ -1059,16 +1057,17 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
|
||||
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN)
|
||||
bch2_bkey_swab(btree_node_type(b), &b->format, k);
|
||||
bch2_bkey_swab(type, &b->format, k);
|
||||
|
||||
u = bkey_disassemble(b, k, &tmp);
|
||||
|
||||
invalid = bch2_btree_bkey_invalid(c, b, u);
|
||||
invalid = __bch2_bkey_invalid(c, type, u) ?:
|
||||
bch2_bkey_in_btree_node(b, u) ?:
|
||||
(write ? bch2_bkey_val_invalid(c, type, u) : NULL);
|
||||
if (invalid) {
|
||||
char buf[160];
|
||||
|
||||
bch2_bkey_val_to_text(c, btree_node_type(b),
|
||||
buf, sizeof(buf), u);
|
||||
bch2_bkey_val_to_text(c, type, buf, sizeof(buf), u);
|
||||
btree_err(BTREE_ERR_FIXABLE, c, b, i,
|
||||
"invalid bkey %s: %s", buf, invalid);
|
||||
|
||||
@ -1114,6 +1113,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
struct btree_node_entry *bne;
|
||||
struct btree_node_iter *iter;
|
||||
struct btree_node *sorted;
|
||||
struct bkey_packed *k;
|
||||
struct bset *i;
|
||||
bool used_mempool;
|
||||
unsigned u64s;
|
||||
int ret, retry_read = 0, write = READ;
|
||||
@ -1137,7 +1138,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
unsigned sectors, whiteout_u64s = 0;
|
||||
struct nonce nonce;
|
||||
struct bch_csum csum;
|
||||
struct bset *i;
|
||||
|
||||
if (!b->written) {
|
||||
i = &b->data->keys;
|
||||
@ -1238,6 +1238,31 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
|
||||
btree_bounce_free(c, btree_page_order(c), used_mempool, sorted);
|
||||
|
||||
i = &b->data->keys;
|
||||
for (k = i->start; k != vstruct_last(i);) {
|
||||
enum bkey_type type = btree_node_type(b);
|
||||
struct bkey tmp;
|
||||
struct bkey_s_c u = bkey_disassemble(b, k, &tmp);
|
||||
const char *invalid = bch2_bkey_val_invalid(c, type, u);
|
||||
|
||||
if (invalid) {
|
||||
char buf[160];
|
||||
|
||||
bch2_bkey_val_to_text(c, type, buf, sizeof(buf), u);
|
||||
btree_err(BTREE_ERR_FIXABLE, c, b, i,
|
||||
"invalid bkey %s: %s", buf, invalid);
|
||||
|
||||
btree_keys_account_key_drop(&b->nr, 0, k);
|
||||
|
||||
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
|
||||
memmove_u64s_down(k, bkey_next(k),
|
||||
(u64 *) vstruct_end(i) - (u64 *) k);
|
||||
continue;
|
||||
}
|
||||
|
||||
k = bkey_next(k);
|
||||
}
|
||||
|
||||
bch2_bset_build_aux_tree(b, b->set, false);
|
||||
|
||||
set_needs_whiteout(btree_bset_first(b));
|
||||
@ -1278,13 +1303,13 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
bio->bi_iter.bi_size = btree_bytes(c);
|
||||
submit_bio_wait(bio);
|
||||
start:
|
||||
bch2_dev_io_err_on(bio->bi_error, rb->pick.ca, "btree read");
|
||||
bch2_dev_io_err_on(bio->bi_status, rb->pick.ca, "btree read");
|
||||
percpu_ref_put(&rb->pick.ca->io_ref);
|
||||
|
||||
__set_bit(rb->pick.ca->dev_idx, avoid.d);
|
||||
rb->pick = bch2_btree_pick_ptr(c, b, &avoid);
|
||||
|
||||
if (!bio->bi_error &&
|
||||
if (!bio->bi_status &&
|
||||
!bch2_btree_node_read_done(c, b, !IS_ERR_OR_NULL(rb->pick.ca)))
|
||||
goto out;
|
||||
} while (!IS_ERR_OR_NULL(rb->pick.ca));
|
||||
@ -1377,17 +1402,24 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
|
||||
BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id));
|
||||
|
||||
bch2_btree_node_read(c, b, true);
|
||||
six_unlock_write(&b->lock);
|
||||
|
||||
if (btree_node_read_error(b)) {
|
||||
six_unlock_intent(&b->lock);
|
||||
return -EIO;
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
list_move(&b->list, &c->btree_cache.freeable);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_btree_set_root_for_read(c, b);
|
||||
err:
|
||||
six_unlock_write(&b->lock);
|
||||
six_unlock_intent(&b->lock);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
|
||||
@ -1412,35 +1444,57 @@ static void bch2_btree_node_write_error(struct bch_fs *c,
|
||||
struct closure *cl = wbio->cl;
|
||||
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
|
||||
struct bkey_i_extent *new_key;
|
||||
struct bkey_s_extent e;
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct btree_iter iter;
|
||||
int ret;
|
||||
|
||||
six_lock_read(&b->lock);
|
||||
bkey_copy(&tmp.k, &b->key);
|
||||
six_unlock_read(&b->lock);
|
||||
__bch2_btree_iter_init(&iter, c, b->btree_id, b->key.k.p,
|
||||
BTREE_MAX_DEPTH,
|
||||
b->level, 0);
|
||||
retry:
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!bkey_extent_is_data(&tmp.k.k) || !PTR_HASH(&tmp.k)) {
|
||||
/* Node has been freed: */
|
||||
/* has node been freed? */
|
||||
if (iter.nodes[b->level] != b) {
|
||||
/* node has been freed: */
|
||||
if (!btree_node_dying(b))
|
||||
panic("foo4\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!btree_node_hashed(b))
|
||||
panic("foo5\n");
|
||||
|
||||
bkey_copy(&tmp.k, &b->key);
|
||||
|
||||
new_key = bkey_i_to_extent(&tmp.k);
|
||||
e = extent_i_to_s(new_key);
|
||||
extent_for_each_ptr_backwards(e, ptr)
|
||||
if (bch2_dev_list_has_dev(wbio->failed, ptr->dev))
|
||||
bch2_extent_drop_ptr(e, ptr);
|
||||
|
||||
while (wbio->replicas_failed) {
|
||||
unsigned idx = __fls(wbio->replicas_failed);
|
||||
if (!bch2_extent_nr_ptrs(e.c))
|
||||
goto err;
|
||||
|
||||
bch2_extent_drop_ptr_idx(extent_i_to_s(new_key), idx);
|
||||
wbio->replicas_failed ^= 1 << idx;
|
||||
}
|
||||
|
||||
if (!bch2_extent_nr_ptrs(extent_i_to_s_c(new_key)) ||
|
||||
bch2_btree_node_update_key(c, b, new_key)) {
|
||||
set_btree_node_noevict(b);
|
||||
bch2_fatal_error(c);
|
||||
}
|
||||
ret = bch2_btree_node_update_key(c, &iter, b, new_key);
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
if (ret)
|
||||
goto err;
|
||||
out:
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
bio_put(&wbio->bio);
|
||||
btree_node_write_done(c, b);
|
||||
if (cl)
|
||||
closure_put(cl);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
bch2_fs_fatal_error(c, "fatal error writing btree node");
|
||||
goto out;
|
||||
}
|
||||
|
||||
void bch2_btree_write_error_work(struct work_struct *work)
|
||||
@ -1470,12 +1524,17 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
struct closure *cl = !wbio->split ? wbio->cl : NULL;
|
||||
struct bch_fs *c = wbio->c;
|
||||
struct bch_dev *ca = wbio->ca;
|
||||
unsigned long flags;
|
||||
|
||||
bch2_latency_acct(ca, wbio->submit_time_us, WRITE);
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_error, ca, "btree write") ||
|
||||
bch2_meta_write_fault("btree"))
|
||||
set_bit(wbio->ptr_idx, (unsigned long *) &orig->replicas_failed);
|
||||
if (bio->bi_status == BLK_STS_REMOVED ||
|
||||
bch2_dev_io_err_on(bio->bi_status, ca, "btree write") ||
|
||||
bch2_meta_write_fault("btree")) {
|
||||
spin_lock_irqsave(&c->btree_write_error_lock, flags);
|
||||
bch2_dev_list_add_dev(&orig->failed, ca->dev_idx);
|
||||
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
|
||||
}
|
||||
|
||||
if (wbio->have_io_ref)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
@ -1491,12 +1550,11 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
wbio->used_mempool,
|
||||
wbio->data);
|
||||
|
||||
if (wbio->replicas_failed) {
|
||||
unsigned long flags;
|
||||
|
||||
if (wbio->failed.nr) {
|
||||
spin_lock_irqsave(&c->btree_write_error_lock, flags);
|
||||
bio_list_add(&c->btree_write_error_list, &wbio->bio);
|
||||
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
|
||||
|
||||
queue_work(c->wq, &c->btree_write_error_work);
|
||||
return;
|
||||
}
|
||||
@ -1707,6 +1765,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
|
||||
wbio = wbio_init(bio_alloc_bioset(GFP_NOIO, 1 << order, &c->bio_write));
|
||||
wbio->cl = parent;
|
||||
wbio->failed.nr = 0;
|
||||
wbio->order = order;
|
||||
wbio->used_mempool = used_mempool;
|
||||
wbio->data = data;
|
||||
|
@ -75,8 +75,8 @@ bool bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
|
||||
{
|
||||
struct btree_iter *linked;
|
||||
struct btree *b = iter->nodes[level];
|
||||
enum btree_node_locked_type want = btree_lock_want(iter, level);
|
||||
enum btree_node_locked_type have = btree_node_locked_type(iter, level);
|
||||
int want = btree_lock_want(iter, level);
|
||||
int have = btree_node_locked_type(iter, level);
|
||||
|
||||
if (want == have)
|
||||
return true;
|
||||
@ -108,6 +108,17 @@ success:
|
||||
return true;
|
||||
}
|
||||
|
||||
bool bch2_btree_iter_relock(struct btree_iter *iter)
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
for (l = iter->level; l < iter->locks_want && iter->nodes[l]; l++)
|
||||
if (!bch2_btree_node_relock(iter, l))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Slowpath: */
|
||||
bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
unsigned level,
|
||||
@ -214,7 +225,6 @@ bool __bch2_btree_iter_set_locks_want(struct btree_iter *iter,
|
||||
unsigned new_locks_want)
|
||||
{
|
||||
struct btree_iter *linked;
|
||||
unsigned l;
|
||||
|
||||
/* Drop locks we don't want anymore: */
|
||||
if (new_locks_want < iter->locks_want)
|
||||
@ -228,12 +238,9 @@ bool __bch2_btree_iter_set_locks_want(struct btree_iter *iter,
|
||||
iter->locks_want = new_locks_want;
|
||||
btree_iter_drop_extra_locks(iter);
|
||||
|
||||
for (l = iter->level; l < iter->locks_want && iter->nodes[l]; l++)
|
||||
if (!bch2_btree_node_relock(iter, l))
|
||||
goto fail;
|
||||
if (bch2_btree_iter_relock(iter))
|
||||
return true;
|
||||
|
||||
return true;
|
||||
fail:
|
||||
/*
|
||||
* Just an optimization: ancestor nodes must be locked before child
|
||||
* nodes, so set locks_want on iterators that might lock ancestors
|
||||
|
@ -75,7 +75,7 @@ static inline void mark_btree_node_intent_locked(struct btree_iter *iter,
|
||||
mark_btree_node_locked(iter, level, SIX_LOCK_intent);
|
||||
}
|
||||
|
||||
static inline int btree_lock_want(struct btree_iter *iter, int level)
|
||||
static inline enum six_lock_type btree_lock_want(struct btree_iter *iter, int level)
|
||||
{
|
||||
return level < iter->locks_want
|
||||
? SIX_LOCK_intent
|
||||
@ -111,6 +111,7 @@ static inline bool btree_node_lock(struct btree *b, struct bpos pos,
|
||||
}
|
||||
|
||||
bool bch2_btree_node_relock(struct btree_iter *, unsigned);
|
||||
bool bch2_btree_iter_relock(struct btree_iter *);
|
||||
|
||||
void bch2_btree_node_unlock_write(struct btree *, struct btree_iter *);
|
||||
void bch2_btree_node_lock_write(struct btree *, struct btree_iter *);
|
||||
|
@ -196,6 +196,7 @@ enum btree_flags {
|
||||
BTREE_NODE_accessed,
|
||||
BTREE_NODE_write_in_flight,
|
||||
BTREE_NODE_just_written,
|
||||
BTREE_NODE_dying,
|
||||
};
|
||||
|
||||
BTREE_FLAG(read_in_flight);
|
||||
@ -207,6 +208,7 @@ BTREE_FLAG(write_idx);
|
||||
BTREE_FLAG(accessed);
|
||||
BTREE_FLAG(write_in_flight);
|
||||
BTREE_FLAG(just_written);
|
||||
BTREE_FLAG(dying);
|
||||
|
||||
static inline struct btree_write *btree_current_write(struct btree *b)
|
||||
{
|
||||
|
@ -130,7 +130,7 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
|
||||
|
||||
int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
|
||||
__le64, unsigned);
|
||||
int bch2_btree_node_update_key(struct bch_fs *, struct btree *,
|
||||
struct bkey_i_extent *);
|
||||
int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
|
||||
struct btree *, struct bkey_i_extent *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_UPDATE_H */
|
||||
|
@ -21,7 +21,7 @@
|
||||
static void btree_node_will_make_reachable(struct btree_update *,
|
||||
struct btree *);
|
||||
static void btree_update_drop_new_node(struct bch_fs *, struct btree *);
|
||||
static void bch2_btree_set_root_ondisk(struct bch_fs *, struct btree *);
|
||||
static void bch2_btree_set_root_ondisk(struct bch_fs *, struct btree *, int);
|
||||
|
||||
/* Debug code: */
|
||||
|
||||
@ -686,7 +686,7 @@ retry:
|
||||
BUG_ON(c->btree_roots[b->btree_id].as != as);
|
||||
c->btree_roots[b->btree_id].as = NULL;
|
||||
|
||||
bch2_btree_set_root_ondisk(c, b);
|
||||
bch2_btree_set_root_ondisk(c, b, WRITE);
|
||||
|
||||
/*
|
||||
* We don't have to wait anything anything here (before
|
||||
@ -914,6 +914,7 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
||||
struct btree_write *w;
|
||||
struct bset_tree *t;
|
||||
|
||||
set_btree_node_dying(b);
|
||||
btree_interior_update_add_node_reference(as, b);
|
||||
|
||||
/*
|
||||
@ -925,7 +926,8 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
||||
* in with keys that aren't in the journal anymore:
|
||||
*/
|
||||
for_each_bset(b, t)
|
||||
as->journal_seq = max(as->journal_seq, bset(b, t)->journal_seq);
|
||||
as->journal_seq = max(as->journal_seq,
|
||||
le64_to_cpu(bset(b, t)->journal_seq));
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
@ -1027,6 +1029,10 @@ static void __bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
|
||||
mutex_lock(&c->btree_root_lock);
|
||||
BUG_ON(btree_node_root(c, b) &&
|
||||
(b->level < btree_node_root(c, b)->level ||
|
||||
!btree_node_dying(btree_node_root(c, b))));
|
||||
|
||||
btree_node_root(c, b) = b;
|
||||
mutex_unlock(&c->btree_root_lock);
|
||||
|
||||
@ -1054,7 +1060,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
|
||||
gc_pos_btree_root(b->btree_id));
|
||||
}
|
||||
|
||||
static void bch2_btree_set_root_ondisk(struct bch_fs *c, struct btree *b)
|
||||
static void bch2_btree_set_root_ondisk(struct bch_fs *c, struct btree *b, int rw)
|
||||
{
|
||||
struct btree_root *r = &c->btree_roots[b->btree_id];
|
||||
|
||||
@ -1064,6 +1070,8 @@ static void bch2_btree_set_root_ondisk(struct bch_fs *c, struct btree *b)
|
||||
bkey_copy(&r->key, &b->key);
|
||||
r->level = b->level;
|
||||
r->alive = true;
|
||||
if (rw == WRITE)
|
||||
c->btree_roots_dirty = true;
|
||||
|
||||
mutex_unlock(&c->btree_root_lock);
|
||||
}
|
||||
@ -1787,64 +1795,16 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_node_update_key(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_i_extent *new_key)
|
||||
static void __bch2_btree_node_update_key(struct bch_fs *c,
|
||||
struct btree_update *as,
|
||||
struct btree_iter *iter,
|
||||
struct btree *b, struct btree *new_hash,
|
||||
struct bkey_i_extent *new_key)
|
||||
{
|
||||
struct btree_update *as = NULL;
|
||||
struct btree *parent, *new_hash = NULL;
|
||||
struct btree_iter iter;
|
||||
struct closure cl;
|
||||
struct btree *parent;
|
||||
bool must_rewrite_parent = false;
|
||||
int ret;
|
||||
|
||||
__bch2_btree_iter_init(&iter, c, b->btree_id, b->key.k.p,
|
||||
BTREE_MAX_DEPTH,
|
||||
b->level, 0);
|
||||
closure_init_stack(&cl);
|
||||
|
||||
ret = bch2_check_mark_super(c, extent_i_to_s_c(new_key), BCH_DATA_BTREE);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
retry:
|
||||
down_read(&c->gc_lock);
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* check PTR_HASH() after @b is locked by btree_iter_traverse(): */
|
||||
if (!new_hash &&
|
||||
PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
|
||||
/* bch2_btree_reserve_get will unlock */
|
||||
do {
|
||||
ret = bch2_btree_cache_cannibalize_lock(c, &cl);
|
||||
closure_sync(&cl);
|
||||
} while (ret == -EAGAIN);
|
||||
|
||||
BUG_ON(ret);
|
||||
|
||||
new_hash = bch2_btree_node_mem_alloc(c);
|
||||
}
|
||||
|
||||
as = bch2_btree_update_start(c, iter.btree_id,
|
||||
btree_update_reserve_required(c, b),
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE,
|
||||
&cl);
|
||||
if (IS_ERR(as)) {
|
||||
ret = PTR_ERR(as);
|
||||
if (ret == -EAGAIN || ret == -EINTR) {
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
up_read(&c->gc_lock);
|
||||
closure_sync(&cl);
|
||||
goto retry;
|
||||
}
|
||||
goto err;
|
||||
}
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
/*
|
||||
* Two corner cases that need to be thought about here:
|
||||
*
|
||||
@ -1869,22 +1829,12 @@ retry:
|
||||
if (b->will_make_reachable)
|
||||
must_rewrite_parent = true;
|
||||
|
||||
/* other case: btree node being freed */
|
||||
if (iter.nodes[b->level] != b) {
|
||||
/* node has been freed: */
|
||||
BUG_ON(btree_node_hashed(b));
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
goto err;
|
||||
}
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
if (must_rewrite_parent)
|
||||
as->flags |= BTREE_INTERIOR_UPDATE_MUST_REWRITE;
|
||||
|
||||
btree_interior_update_add_node_reference(as, b);
|
||||
|
||||
parent = iter.nodes[b->level + 1];
|
||||
parent = iter->nodes[b->level + 1];
|
||||
if (parent) {
|
||||
if (new_hash) {
|
||||
bkey_copy(&new_hash->key, &new_key->k_i);
|
||||
@ -1893,8 +1843,8 @@ retry:
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
bch2_btree_insert_node(as, parent, &iter,
|
||||
&keylist_single(&new_key->k_i));
|
||||
bch2_keylist_add(&as->parent_keys, &new_key->k_i);
|
||||
bch2_btree_insert_node(as, parent, iter, &as->parent_keys);
|
||||
|
||||
if (new_hash) {
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
@ -1914,7 +1864,7 @@ retry:
|
||||
|
||||
BUG_ON(btree_node_root(c, b) != b);
|
||||
|
||||
bch2_btree_node_lock_write(b, &iter);
|
||||
bch2_btree_node_lock_write(b, iter);
|
||||
|
||||
bch2_mark_key(c, bkey_i_to_s_c(&new_key->k_i),
|
||||
c->opts.btree_node_size, true,
|
||||
@ -1925,14 +1875,94 @@ retry:
|
||||
&stats);
|
||||
bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
|
||||
gc_pos_btree_root(b->btree_id));
|
||||
bkey_copy(&b->key, &new_key->k_i);
|
||||
|
||||
if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
bkey_copy(&b->key, &new_key->k_i);
|
||||
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
|
||||
BUG_ON(ret);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
} else {
|
||||
bkey_copy(&b->key, &new_key->k_i);
|
||||
}
|
||||
|
||||
btree_update_updated_root(as);
|
||||
bch2_btree_node_unlock_write(b, &iter);
|
||||
bch2_btree_node_unlock_write(b, iter);
|
||||
}
|
||||
|
||||
bch2_btree_update_done(as);
|
||||
out:
|
||||
}
|
||||
|
||||
int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
|
||||
struct btree *b, struct bkey_i_extent *new_key)
|
||||
{
|
||||
struct btree_update *as = NULL;
|
||||
struct btree *new_hash = NULL;
|
||||
struct closure cl;
|
||||
int ret;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
if (!down_read_trylock(&c->gc_lock)) {
|
||||
bch2_btree_iter_unlock(iter);
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
if (!bch2_btree_iter_relock(iter)) {
|
||||
ret = -EINTR;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/* check PTR_HASH() after @b is locked by btree_iter_traverse(): */
|
||||
if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
|
||||
/* bch2_btree_reserve_get will unlock */
|
||||
ret = bch2_btree_cache_cannibalize_lock(c, &cl);
|
||||
if (ret) {
|
||||
ret = -EINTR;
|
||||
|
||||
bch2_btree_iter_unlock(iter);
|
||||
up_read(&c->gc_lock);
|
||||
closure_sync(&cl);
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
if (!bch2_btree_iter_relock(iter))
|
||||
goto err;
|
||||
}
|
||||
|
||||
new_hash = bch2_btree_node_mem_alloc(c);
|
||||
}
|
||||
|
||||
as = bch2_btree_update_start(c, iter->btree_id,
|
||||
btree_update_reserve_required(c, b),
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE,
|
||||
&cl);
|
||||
if (IS_ERR(as)) {
|
||||
ret = PTR_ERR(as);
|
||||
if (ret == -EAGAIN)
|
||||
ret = -EINTR;
|
||||
|
||||
if (ret != -EINTR)
|
||||
goto err;
|
||||
|
||||
bch2_btree_iter_unlock(iter);
|
||||
up_read(&c->gc_lock);
|
||||
closure_sync(&cl);
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
if (!bch2_btree_iter_relock(iter))
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_check_mark_super(c, extent_i_to_s_c(new_key), BCH_DATA_BTREE);
|
||||
if (ret)
|
||||
goto err_free_update;
|
||||
|
||||
__bch2_btree_node_update_key(c, as, iter, b, new_hash, new_key);
|
||||
err:
|
||||
if (new_hash) {
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
list_move(&new_hash->list, &c->btree_cache.freeable);
|
||||
@ -1941,14 +1971,12 @@ out:
|
||||
six_unlock_write(&new_hash->lock);
|
||||
six_unlock_intent(&new_hash->lock);
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
up_read(&c->gc_lock);
|
||||
closure_sync(&cl);
|
||||
return ret;
|
||||
err:
|
||||
if (as)
|
||||
bch2_btree_update_free(as);
|
||||
goto out;
|
||||
err_free_update:
|
||||
bch2_btree_update_free(as);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Init code: */
|
||||
@ -1962,7 +1990,7 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
|
||||
BUG_ON(btree_node_root(c, b));
|
||||
|
||||
__bch2_btree_set_root_inmem(c, b);
|
||||
bch2_btree_set_root_ondisk(c, b);
|
||||
bch2_btree_set_root_ondisk(c, b, READ);
|
||||
}
|
||||
|
||||
int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
||||
@ -1998,7 +2026,7 @@ int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
||||
BUG_ON(btree_node_root(c, b));
|
||||
|
||||
bch2_btree_set_root_inmem(as, b);
|
||||
bch2_btree_set_root_ondisk(c, b);
|
||||
bch2_btree_set_root_ondisk(c, b, WRITE);
|
||||
|
||||
bch2_btree_open_bucket_put(c, b);
|
||||
six_unlock_intent(&b->lock);
|
||||
|
@ -174,9 +174,11 @@ do { \
|
||||
|
||||
#define bch2_usage_read_raw(_stats) \
|
||||
({ \
|
||||
typeof(*this_cpu_ptr(_stats)) _acc = { 0 }; \
|
||||
typeof(*this_cpu_ptr(_stats)) _acc; \
|
||||
int cpu; \
|
||||
\
|
||||
memset(&_acc, 0, sizeof(_acc)); \
|
||||
\
|
||||
for_each_possible_cpu(cpu) \
|
||||
bch2_usage_add(&_acc, per_cpu_ptr((_stats), cpu)); \
|
||||
\
|
||||
@ -479,7 +481,7 @@ static void bch2_mark_pointer(struct bch_fs *c,
|
||||
{
|
||||
struct bucket_mark old, new;
|
||||
unsigned saturated;
|
||||
struct bch_dev *ca = c->devs[ptr->dev];
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g = ca->buckets + PTR_BUCKET_NR(ca, ptr);
|
||||
unsigned data_type = type == S_META
|
||||
? BUCKET_BTREE : BUCKET_DATA;
|
||||
|
@ -68,16 +68,14 @@ struct bch_dev_usage {
|
||||
|
||||
struct bch_fs_usage {
|
||||
/* all fields are in units of 512 byte sectors: */
|
||||
|
||||
/* _uncompressed_ sectors: */
|
||||
u64 online_reserved;
|
||||
u64 available_cache;
|
||||
|
||||
struct {
|
||||
u64 data[S_ALLOC_NR];
|
||||
u64 persistent_reserved;
|
||||
} s[BCH_REPLICAS_MAX];
|
||||
|
||||
u64 online_reserved;
|
||||
u64 available_cache;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "bcachefs_ioctl.h"
|
||||
#include "chardev.h"
|
||||
#include "super.h"
|
||||
#include "super-io.h"
|
||||
|
||||
@ -25,7 +26,7 @@ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
rcu_read_lock();
|
||||
ca = c->devs[dev];
|
||||
ca = rcu_dereference(c->devs[dev]);
|
||||
if (ca)
|
||||
percpu_ref_get(&ca->ref);
|
||||
rcu_read_unlock();
|
||||
@ -80,7 +81,7 @@ static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
|
||||
|
||||
devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL);
|
||||
|
||||
if (copy_from_user(user_devs, arg.devs,
|
||||
if (copy_from_user(user_devs, user_arg->devs,
|
||||
sizeof(u64) * arg.nr_devs))
|
||||
goto err;
|
||||
|
||||
|
@ -72,14 +72,15 @@ static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opts type,
|
||||
}
|
||||
}
|
||||
|
||||
static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c)
|
||||
static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c,
|
||||
unsigned opt)
|
||||
{
|
||||
if (c->sb.encryption_type)
|
||||
return c->opts.wide_macs
|
||||
? BCH_CSUM_CHACHA20_POLY1305_128
|
||||
: BCH_CSUM_CHACHA20_POLY1305_80;
|
||||
|
||||
return bch2_csum_opt_to_type(c->opts.data_checksum, true);
|
||||
return bch2_csum_opt_to_type(opt, true);
|
||||
}
|
||||
|
||||
static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
|
||||
@ -143,6 +144,14 @@ static inline struct nonce nonce_add(struct nonce nonce, unsigned offset)
|
||||
return nonce;
|
||||
}
|
||||
|
||||
static inline struct nonce null_nonce(void)
|
||||
{
|
||||
struct nonce ret;
|
||||
|
||||
memset(&ret, 0, sizeof(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct nonce extent_nonce(struct bversion version,
|
||||
struct bch_extent_crc_unpacked crc)
|
||||
{
|
||||
|
@ -95,11 +95,17 @@ print:
|
||||
vscnprintf(buf, sizeof(_buf), fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (c->opts.fix_errors == FSCK_OPT_EXIT) {
|
||||
bch_err(c, "%s, exiting", buf);
|
||||
mutex_unlock(&c->fsck_error_lock);
|
||||
return FSCK_ERR_EXIT;
|
||||
}
|
||||
|
||||
if (flags & FSCK_CAN_FIX) {
|
||||
if (c->opts.fix_errors == FSCK_ERR_ASK) {
|
||||
if (c->opts.fix_errors == FSCK_OPT_ASK) {
|
||||
printk(KERN_ERR "%s: fix?", buf);
|
||||
fix = ask_yn();
|
||||
} else if (c->opts.fix_errors == FSCK_ERR_YES ||
|
||||
} else if (c->opts.fix_errors == FSCK_OPT_YES ||
|
||||
(c->opts.nochanges &&
|
||||
!(flags & FSCK_CAN_IGNORE))) {
|
||||
if (print)
|
||||
|
@ -96,9 +96,10 @@ enum {
|
||||
};
|
||||
|
||||
enum fsck_err_opts {
|
||||
FSCK_ERR_NO,
|
||||
FSCK_ERR_YES,
|
||||
FSCK_ERR_ASK,
|
||||
FSCK_OPT_EXIT,
|
||||
FSCK_OPT_YES,
|
||||
FSCK_OPT_NO,
|
||||
FSCK_OPT_ASK,
|
||||
};
|
||||
|
||||
enum fsck_err_ret {
|
||||
@ -217,7 +218,7 @@ do { \
|
||||
#define bcache_io_error(c, bio, fmt, ...) \
|
||||
do { \
|
||||
__bcache_io_error(c, fmt, ##__VA_ARGS__); \
|
||||
(bio)->bi_error = -EIO; \
|
||||
(bio)->bi_status = BLK_STS_IOERR; \
|
||||
} while (0)
|
||||
|
||||
#endif /* _BCACHEFS_ERROR_H */
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "extents.h"
|
||||
#include "inode.h"
|
||||
#include "journal.h"
|
||||
#include "super.h"
|
||||
#include "super-io.h"
|
||||
#include "util.h"
|
||||
#include "xattr.h"
|
||||
@ -156,6 +157,19 @@ unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c k)
|
||||
return nr_ptrs;
|
||||
}
|
||||
|
||||
unsigned bch2_extent_nr_good_ptrs(struct bch_fs *c, struct bkey_s_c_extent e)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
unsigned nr_ptrs = 0;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
nr_ptrs += (!ptr->cached &&
|
||||
bch_dev_bkey_exists(c, ptr->dev)->mi.state !=
|
||||
BCH_MEMBER_STATE_FAILED);
|
||||
|
||||
return nr_ptrs;
|
||||
}
|
||||
|
||||
unsigned bch2_extent_is_compressed(struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_extent e;
|
||||
@ -362,7 +376,7 @@ static bool should_drop_ptr(const struct bch_fs *c,
|
||||
struct bkey_s_c_extent e,
|
||||
const struct bch_extent_ptr *ptr)
|
||||
{
|
||||
return ptr->cached && ptr_stale(c->devs[ptr->dev], ptr);
|
||||
return ptr->cached && ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr);
|
||||
}
|
||||
|
||||
static void bch2_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e)
|
||||
@ -411,8 +425,10 @@ static void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
|
||||
entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_crc128:
|
||||
entry->crc128.csum.hi = swab64(entry->crc64.csum_hi);
|
||||
entry->crc128.csum.lo = swab64(entry->crc64.csum_lo);
|
||||
entry->crc128.csum.hi = (__force __le64)
|
||||
swab64((__force u64) entry->crc128.csum.hi);
|
||||
entry->crc128.csum.lo = (__force __le64)
|
||||
swab64((__force u64) entry->crc128.csum.lo);
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_ptr:
|
||||
break;
|
||||
@ -432,10 +448,11 @@ static const char *extent_ptr_invalid(const struct bch_fs *c,
|
||||
const struct bch_extent_ptr *ptr2;
|
||||
struct bch_dev *ca;
|
||||
|
||||
if (ptr->dev >= c->sb.nr_devices)
|
||||
if (ptr->dev >= c->sb.nr_devices ||
|
||||
!c->devs[ptr->dev])
|
||||
return "pointer to invalid device";
|
||||
|
||||
ca = c->devs[ptr->dev];
|
||||
ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
if (!ca)
|
||||
return "pointer to invalid device";
|
||||
|
||||
@ -487,7 +504,9 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf,
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_ptr:
|
||||
ptr = entry_to_ptr(entry);
|
||||
ca = c->devs[ptr->dev];
|
||||
ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
|
||||
? bch_dev_bkey_exists(c, ptr->dev)
|
||||
: NULL;
|
||||
|
||||
p("ptr: %u:%llu gen %u%s", ptr->dev,
|
||||
(u64) ptr->offset, ptr->gen,
|
||||
@ -528,7 +547,7 @@ static void extent_pick_read_device(struct bch_fs *c,
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
|
||||
extent_for_each_ptr_crc(e, ptr, crc) {
|
||||
struct bch_dev *ca = c->devs[ptr->dev];
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
|
||||
if (ptr->cached && ptr_stale(ca, ptr))
|
||||
continue;
|
||||
@ -621,7 +640,7 @@ static void btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
|
||||
bool bad;
|
||||
|
||||
extent_for_each_ptr(e, ptr) {
|
||||
ca = c->devs[ptr->dev];
|
||||
ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
g = PTR_BUCKET(ca, ptr);
|
||||
replicas++;
|
||||
|
||||
@ -1730,7 +1749,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
|
||||
memset(ptrs_per_tier, 0, sizeof(ptrs_per_tier));
|
||||
|
||||
extent_for_each_ptr(e, ptr) {
|
||||
ca = c->devs[ptr->dev];
|
||||
ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
g = PTR_BUCKET(ca, ptr);
|
||||
replicas++;
|
||||
ptrs_per_tier[ca->mi.tier]++;
|
||||
@ -1844,7 +1863,7 @@ static void bch2_extent_to_text(struct bch_fs *c, char *buf,
|
||||
static unsigned PTR_TIER(struct bch_fs *c,
|
||||
const struct bch_extent_ptr *ptr)
|
||||
{
|
||||
return c->devs[ptr->dev]->mi.tier;
|
||||
return bch_dev_bkey_exists(c, ptr->dev)->mi.tier;
|
||||
}
|
||||
|
||||
static void bch2_extent_crc_init(union bch_extent_crc *crc,
|
||||
@ -1971,14 +1990,10 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
|
||||
struct bkey_s_extent e)
|
||||
{
|
||||
struct bch_extent_ptr *ptr;
|
||||
unsigned tier = 0, nr_cached = 0, nr_good = 0;
|
||||
unsigned tier = 0, nr_cached = 0;
|
||||
unsigned nr_good = bch2_extent_nr_good_ptrs(c, e.c);
|
||||
bool have_higher_tier;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (!ptr->cached &&
|
||||
c->devs[ptr->dev]->mi.state != BCH_MEMBER_STATE_FAILED)
|
||||
nr_good++;
|
||||
|
||||
if (nr_good <= c->opts.data_replicas)
|
||||
return;
|
||||
|
||||
@ -2103,7 +2118,7 @@ static enum merge_result bch2_extent_merge(struct bch_fs *c,
|
||||
return BCH_MERGE_NOMERGE;
|
||||
|
||||
/* We don't allow extents to straddle buckets: */
|
||||
ca = c->devs[lp->dev];
|
||||
ca = bch_dev_bkey_exists(c, lp->dev);
|
||||
|
||||
if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
|
||||
return BCH_MERGE_NOMERGE;
|
||||
@ -2347,6 +2362,30 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bpos end = pos;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
end.offset += size;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, pos,
|
||||
BTREE_ITER_WITH_HOLES, k) {
|
||||
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
|
||||
break;
|
||||
|
||||
if (!bch2_extent_is_fully_allocated(k)) {
|
||||
ret = -ENOSPC;
|
||||
break;
|
||||
}
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct bkey_ops bch2_bkey_extent_ops = {
|
||||
.key_invalid = bch2_extent_invalid,
|
||||
.key_debugcheck = bch2_extent_debugcheck,
|
||||
|
@ -45,6 +45,7 @@ bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
|
||||
|
||||
unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent);
|
||||
unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c);
|
||||
unsigned bch2_extent_nr_good_ptrs(struct bch_fs *, struct bkey_s_c_extent);
|
||||
unsigned bch2_extent_is_compressed(struct bkey_s_c);
|
||||
|
||||
bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent,
|
||||
@ -243,14 +244,14 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
|
||||
case BCH_EXTENT_CRC32:
|
||||
return (struct bch_extent_crc_unpacked) {
|
||||
common_fields(crc->crc32),
|
||||
.csum.lo = crc->crc32.csum,
|
||||
.csum.lo = (__force __le64) crc->crc32.csum,
|
||||
};
|
||||
case BCH_EXTENT_CRC64:
|
||||
return (struct bch_extent_crc_unpacked) {
|
||||
common_fields(crc->crc64),
|
||||
.nonce = crc->crc64.nonce,
|
||||
.csum.lo = crc->crc64.csum_lo,
|
||||
.csum.hi = crc->crc64.csum_hi,
|
||||
.csum.lo = (__force __le64) crc->crc64.csum_lo,
|
||||
.csum.hi = (__force __le64) crc->crc64.csum_hi,
|
||||
};
|
||||
case BCH_EXTENT_CRC128:
|
||||
return (struct bch_extent_crc_unpacked) {
|
||||
@ -425,4 +426,6 @@ bool bch2_cut_front(struct bpos, struct bkey_i *);
|
||||
bool bch2_cut_back(struct bpos, struct bkey *);
|
||||
void bch2_key_resize(struct bkey *, unsigned);
|
||||
|
||||
int bch2_check_range_allocated(struct bch_fs *, struct bpos, u64);
|
||||
|
||||
#endif /* _BCACHEFS_EXTENTS_H */
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -75,7 +75,7 @@ do { \
|
||||
/* Set VFS inode flags from bcachefs inode: */
|
||||
void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
|
||||
{
|
||||
set_flags(bch_flags_to_vfs, inode->ei_flags, inode->v.i_flags);
|
||||
set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
|
||||
}
|
||||
|
||||
static int bch2_inode_flags_set(struct bch_inode_info *inode,
|
||||
@ -99,13 +99,13 @@ static int bch2_inode_flags_set(struct bch_inode_info *inode,
|
||||
return -EINVAL;
|
||||
|
||||
bi->bi_flags = newflags;
|
||||
inode->v.i_ctime = current_fs_time(inode->v.i_sb);
|
||||
inode->v.i_ctime = current_time(&inode->v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg)
|
||||
{
|
||||
unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_flags);
|
||||
unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags);
|
||||
|
||||
return put_user(flags, arg);
|
||||
}
|
||||
@ -153,7 +153,7 @@ static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
|
||||
{
|
||||
struct fsxattr fa = { 0 };
|
||||
|
||||
fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_flags);
|
||||
fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
|
||||
|
||||
return copy_to_user(arg, &fa, sizeof(fa));
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "fs-ioctl.h"
|
||||
#include "fsck.h"
|
||||
#include "inode.h"
|
||||
#include "io.h"
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "super.h"
|
||||
@ -130,10 +131,8 @@ int __must_check __bch2_write_inode(struct bch_fs *c,
|
||||
BTREE_INSERT_ENTRY(&iter, &inode_p.inode.k_i));
|
||||
} while (ret == -EINTR);
|
||||
|
||||
if (!ret) {
|
||||
inode->ei_size = inode_u.bi_size;
|
||||
inode->ei_flags = inode_u.bi_flags;
|
||||
}
|
||||
if (!ret)
|
||||
inode->ei_inode = inode_u;
|
||||
out:
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
@ -146,7 +145,7 @@ int __must_check bch2_write_inode(struct bch_fs *c,
|
||||
return __bch2_write_inode(c, inode, NULL, NULL);
|
||||
}
|
||||
|
||||
int bch2_inc_nlink(struct bch_fs *c, struct bch_inode_info *inode)
|
||||
static int bch2_inc_nlink(struct bch_fs *c, struct bch_inode_info *inode)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -158,7 +157,7 @@ int bch2_inc_nlink(struct bch_fs *c, struct bch_inode_info *inode)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dec_nlink(struct bch_fs *c, struct bch_inode_info *inode)
|
||||
static int bch2_dec_nlink(struct bch_fs *c, struct bch_inode_info *inode)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
@ -223,7 +222,9 @@ static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c,
|
||||
bch2_inode_init(c, &inode_u,
|
||||
i_uid_read(&inode->v),
|
||||
i_gid_read(&inode->v),
|
||||
inode->v.i_mode, rdev);
|
||||
inode->v.i_mode, rdev,
|
||||
&dir->ei_inode);
|
||||
|
||||
ret = bch2_inode_create(c, &inode_u,
|
||||
BLOCKDEV_INODE_MAX, 0,
|
||||
&c->unused_inode_hint);
|
||||
@ -277,7 +278,7 @@ static int bch2_vfs_dirent_create(struct bch_fs *c,
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
dir->v.i_mtime = dir->v.i_ctime = current_fs_time(c->vfs_sb);
|
||||
dir->v.i_mtime = dir->v.i_ctime = current_time(&dir->v);
|
||||
mark_inode_dirty_sync(&dir->v);
|
||||
return 0;
|
||||
}
|
||||
@ -344,7 +345,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
|
||||
|
||||
lockdep_assert_held(&inode->v.i_rwsem);
|
||||
|
||||
inode->v.i_ctime = current_fs_time(dir->v.i_sb);
|
||||
inode->v.i_ctime = current_time(&dir->v);
|
||||
|
||||
ret = bch2_inc_nlink(c, inode);
|
||||
if (ret)
|
||||
@ -473,7 +474,7 @@ static int bch2_rename(struct bch_fs *c,
|
||||
{
|
||||
struct bch_inode_info *old_inode = to_bch_ei(old_dentry->d_inode);
|
||||
struct bch_inode_info *new_inode = to_bch_ei(new_dentry->d_inode);
|
||||
struct timespec now = current_fs_time(old_dir->v.i_sb);
|
||||
struct timespec now = current_time(&old_dir->v);
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&old_dir->v.i_rwsem);
|
||||
@ -551,7 +552,7 @@ static int bch2_rename_exchange(struct bch_fs *c,
|
||||
{
|
||||
struct bch_inode_info *old_inode = to_bch_ei(old_dentry->d_inode);
|
||||
struct bch_inode_info *new_inode = to_bch_ei(new_dentry->d_inode);
|
||||
struct timespec now = current_fs_time(old_dir->v.i_sb);
|
||||
struct timespec now = current_time(&old_dir->v);
|
||||
int ret;
|
||||
|
||||
ret = bch2_dirent_rename(c,
|
||||
@ -909,10 +910,8 @@ static void bch2_vfs_inode_init(struct bch_fs *c,
|
||||
inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
|
||||
|
||||
inode->ei_journal_seq = 0;
|
||||
inode->ei_size = bi->bi_size;
|
||||
inode->ei_flags = bi->bi_flags;
|
||||
atomic64_set(&inode->ei_sectors, bi->bi_sectors);
|
||||
inode->ei_str_hash = bch2_hash_info_init(c, bi);
|
||||
inode->ei_inode = *bi;
|
||||
|
||||
bch2_inode_flags_to_vfs(inode);
|
||||
|
||||
@ -949,8 +948,6 @@ static struct inode *bch2_alloc_inode(struct super_block *sb)
|
||||
inode_init_once(&inode->v);
|
||||
mutex_init(&inode->ei_update_lock);
|
||||
inode->ei_journal_seq = 0;
|
||||
atomic_long_set(&inode->ei_size_dirty_count, 0);
|
||||
atomic_long_set(&inode->ei_sectors_dirty_count, 0);
|
||||
|
||||
return &inode->v;
|
||||
}
|
||||
@ -995,12 +992,6 @@ static void bch2_evict_inode(struct inode *vinode)
|
||||
|
||||
truncate_inode_pages_final(&inode->v.i_data);
|
||||
|
||||
if (!bch2_journal_error(&c->journal) && !is_bad_inode(&inode->v)) {
|
||||
/* XXX - we want to check this stuff iff there weren't IO errors: */
|
||||
BUG_ON(atomic_long_read(&inode->ei_sectors_dirty_count));
|
||||
BUG_ON(atomic64_read(&inode->ei_sectors) != inode->v.i_blocks);
|
||||
}
|
||||
|
||||
clear_inode(&inode->v);
|
||||
|
||||
if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
|
||||
@ -1272,9 +1263,16 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
|
||||
sb->s_magic = BCACHEFS_STATFS_MAGIC;
|
||||
sb->s_time_gran = c->sb.time_precision;
|
||||
c->vfs_sb = sb;
|
||||
sb->s_bdi = &c->bdi;
|
||||
strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
|
||||
|
||||
ret = super_setup_bdi(sb);
|
||||
if (ret)
|
||||
goto err_put_super;
|
||||
|
||||
sb->s_bdi->congested_fn = bch2_congested;
|
||||
sb->s_bdi->congested_data = c;
|
||||
sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
|
||||
|
||||
for_each_online_member(ca, c, i) {
|
||||
struct block_device *bdev = ca->disk_sb.bdev;
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef _BCACHEFS_FS_H
|
||||
#define _BCACHEFS_FS_H
|
||||
|
||||
#include "opts.h"
|
||||
#include "str_hash.h"
|
||||
|
||||
#include <linux/seqlock.h>
|
||||
@ -11,22 +12,12 @@ struct bch_inode_info {
|
||||
|
||||
struct mutex ei_update_lock;
|
||||
u64 ei_journal_seq;
|
||||
|
||||
atomic_long_t ei_size_dirty_count;
|
||||
|
||||
/*
|
||||
* these are updated whenever we update the inode in the btree - for
|
||||
* e.g. fsync
|
||||
*/
|
||||
u64 ei_size;
|
||||
u32 ei_flags;
|
||||
|
||||
atomic_long_t ei_sectors_dirty_count;
|
||||
atomic64_t ei_sectors;
|
||||
unsigned long ei_last_dirtied;
|
||||
|
||||
struct bch_hash_info ei_str_hash;
|
||||
|
||||
unsigned long ei_last_dirtied;
|
||||
/* copy of inode in btree: */
|
||||
struct bch_inode_unpacked ei_inode;
|
||||
};
|
||||
|
||||
#define to_bch_ei(_inode) \
|
||||
|
@ -204,7 +204,7 @@ static int hash_check_key(const struct bch_hash_desc desc,
|
||||
"hash table key at wrong offset: %llu, "
|
||||
"hashed to %llu chain starts at %llu\n%s",
|
||||
k.k->p.offset, hashed, h->chain.pos.offset,
|
||||
bch2_bkey_val_to_text(c, desc.btree_id,
|
||||
bch2_bkey_val_to_text(c, bkey_type(0, desc.btree_id),
|
||||
buf, sizeof(buf), k))) {
|
||||
ret = hash_redo_key(desc, h, c, k_iter, k, hashed);
|
||||
if (ret) {
|
||||
@ -224,7 +224,7 @@ static int hash_check_key(const struct bch_hash_desc desc,
|
||||
if (fsck_err_on(k2.k->type == desc.key_type &&
|
||||
!desc.cmp_bkey(k, k2), c,
|
||||
"duplicate hash table keys:\n%s",
|
||||
bch2_bkey_val_to_text(c, desc.btree_id,
|
||||
bch2_bkey_val_to_text(c, bkey_type(0, desc.btree_id),
|
||||
buf, sizeof(buf), k))) {
|
||||
ret = bch2_hash_delete_at(desc, &h->info, &h->iter, NULL);
|
||||
if (ret)
|
||||
@ -397,9 +397,9 @@ static int check_dirents(struct bch_fs *c)
|
||||
|
||||
if (fsck_err_on(have_target &&
|
||||
d.v->d_type !=
|
||||
mode_to_type(le16_to_cpu(target.bi_mode)), c,
|
||||
mode_to_type(target.bi_mode), c,
|
||||
"incorrect d_type: should be %u:\n%s",
|
||||
mode_to_type(le16_to_cpu(target.bi_mode)),
|
||||
mode_to_type(target.bi_mode),
|
||||
bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
|
||||
buf, sizeof(buf), k))) {
|
||||
struct bkey_i_dirent *n;
|
||||
@ -411,7 +411,7 @@ static int check_dirents(struct bch_fs *c)
|
||||
}
|
||||
|
||||
bkey_reassemble(&n->k_i, d.s_c);
|
||||
n->v.d_type = mode_to_type(le16_to_cpu(target.bi_mode));
|
||||
n->v.d_type = mode_to_type(target.bi_mode);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
@ -493,7 +493,8 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
|
||||
fsck_err:
|
||||
return ret;
|
||||
create_root:
|
||||
bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
|
||||
bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO,
|
||||
0, NULL);
|
||||
root_inode->bi_inum = BCACHEFS_ROOT_INO;
|
||||
|
||||
bch2_inode_pack(&packed, root_inode);
|
||||
@ -545,7 +546,8 @@ create_lostfound:
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
|
||||
bch2_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO,
|
||||
0, root_inode);
|
||||
|
||||
ret = bch2_inode_create(c, lostfound_inode, BLOCKDEV_INODE_MAX, 0,
|
||||
&c->unused_inode_hint);
|
||||
|
@ -198,6 +198,12 @@ static const char *bch2_inode_invalid(const struct bch_fs *c,
|
||||
if (bch2_inode_unpack(inode, &unpacked))
|
||||
return "invalid variable length fields";
|
||||
|
||||
if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1)
|
||||
return "invalid data checksum type";
|
||||
|
||||
if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
|
||||
return "invalid data checksum type";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
case BCH_INODE_BLOCKDEV:
|
||||
@ -221,6 +227,7 @@ static const char *bch2_inode_invalid(const struct bch_fs *c,
|
||||
static void bch2_inode_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
char *out = buf, *end = out + size;
|
||||
struct bkey_s_c_inode inode;
|
||||
struct bch_inode_unpacked unpacked;
|
||||
|
||||
@ -228,11 +235,14 @@ static void bch2_inode_to_text(struct bch_fs *c, char *buf,
|
||||
case BCH_INODE_FS:
|
||||
inode = bkey_s_c_to_inode(k);
|
||||
if (bch2_inode_unpack(inode, &unpacked)) {
|
||||
scnprintf(buf, size, "(unpack error)");
|
||||
out += scnprintf(out, end - out, "(unpack error)");
|
||||
break;
|
||||
}
|
||||
|
||||
scnprintf(buf, size, "i_size %llu", unpacked.bi_size);
|
||||
#define BCH_INODE_FIELD(_name, _bits) \
|
||||
out += scnprintf(out, end - out, #_name ": %llu ", (u64) unpacked._name);
|
||||
BCH_INODE_FIELDS()
|
||||
#undef BCH_INODE_FIELD
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -243,9 +253,12 @@ const struct bkey_ops bch2_bkey_inode_ops = {
|
||||
};
|
||||
|
||||
void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
|
||||
uid_t uid, gid_t gid, umode_t mode, dev_t rdev)
|
||||
uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
|
||||
struct bch_inode_unpacked *parent)
|
||||
{
|
||||
s64 now = timespec_to_bch2_time(c, CURRENT_TIME);
|
||||
s64 now = timespec_to_bch2_time(c,
|
||||
timespec_trunc(current_kernel_time(),
|
||||
c->sb.time_precision));
|
||||
|
||||
memset(inode_u, 0, sizeof(*inode_u));
|
||||
|
||||
@ -261,6 +274,12 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
|
||||
inode_u->bi_mtime = now;
|
||||
inode_u->bi_ctime = now;
|
||||
inode_u->bi_otime = now;
|
||||
|
||||
if (parent) {
|
||||
#define BCH_INODE_FIELD(_name) inode_u->_name = parent->_name;
|
||||
BCH_INODE_FIELDS_INHERIT()
|
||||
#undef BCH_INODE_FIELD
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_inode_create(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
|
||||
@ -416,7 +435,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
|
||||
struct bch_inode_unpacked inode_u;
|
||||
|
||||
if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u))
|
||||
bi_generation = cpu_to_le32(inode_u.bi_generation) + 1;
|
||||
bi_generation = inode_u.bi_generation + 1;
|
||||
break;
|
||||
}
|
||||
case BCH_INODE_GENERATION: {
|
||||
|
@ -1,6 +1,8 @@
|
||||
#ifndef _BCACHEFS_INODE_H
|
||||
#define _BCACHEFS_INODE_H
|
||||
|
||||
#include "opts.h"
|
||||
|
||||
#include <linux/math64.h>
|
||||
|
||||
extern const struct bkey_ops bch2_bkey_inode_ops;
|
||||
@ -28,7 +30,8 @@ void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *)
|
||||
int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
|
||||
|
||||
void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
|
||||
uid_t, gid_t, umode_t, dev_t);
|
||||
uid_t, gid_t, umode_t, dev_t,
|
||||
struct bch_inode_unpacked *);
|
||||
int bch2_inode_create(struct bch_fs *, struct bch_inode_unpacked *,
|
||||
u64, u64, u64 *);
|
||||
int bch2_inode_truncate(struct bch_fs *, u64, u64,
|
||||
@ -55,6 +58,45 @@ static inline u64 timespec_to_bch2_time(struct bch_fs *c, struct timespec ts)
|
||||
return div_s64(ns, c->sb.time_precision);
|
||||
}
|
||||
|
||||
static inline struct bch_io_opts bch2_inode_opts_get(struct bch_inode_unpacked *inode)
|
||||
{
|
||||
struct bch_io_opts ret = { 0 };
|
||||
|
||||
#define BCH_INODE_OPT(_name, _bits) \
|
||||
if (inode->bi_##_name) \
|
||||
opt_set(ret, _name, inode->bi_##_name - 1);
|
||||
BCH_INODE_OPTS()
|
||||
#undef BCH_INODE_OPT
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void __bch2_inode_opt_set(struct bch_inode_unpacked *inode,
|
||||
enum bch_opt_id id, u64 v)
|
||||
{
|
||||
switch (id) {
|
||||
#define BCH_INODE_OPT(_name, ...) \
|
||||
case Opt_##_name: \
|
||||
inode->bi_##_name = v; \
|
||||
break;
|
||||
BCH_INODE_OPTS()
|
||||
#undef BCH_INODE_OPT
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void bch2_inode_opt_set(struct bch_inode_unpacked *inode,
|
||||
enum bch_opt_id id, u64 v)
|
||||
{
|
||||
return __bch2_inode_opt_set(inode, id, v + 1);
|
||||
}
|
||||
|
||||
static inline void bch2_inode_opt_clear(struct bch_inode_unpacked *inode,
|
||||
enum bch_opt_id id)
|
||||
{
|
||||
return __bch2_inode_opt_set(inode, id, 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void bch2_inode_pack_test(void);
|
||||
#else
|
||||
|
128
libbcachefs/io.c
128
libbcachefs/io.c
@ -20,6 +20,7 @@
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "super.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
@ -139,7 +140,6 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct bch_write_bio *n;
|
||||
struct bch_dev *ca;
|
||||
unsigned ptr_idx = 0;
|
||||
|
||||
BUG_ON(c->opts.nochanges);
|
||||
|
||||
@ -147,7 +147,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX ||
|
||||
!c->devs[ptr->dev]);
|
||||
|
||||
ca = c->devs[ptr->dev];
|
||||
ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
|
||||
if (ptr + 1 < &extent_entry_last(e)->ptr) {
|
||||
n = to_wbio(bio_clone_fast(&wbio->bio, GFP_NOIO,
|
||||
@ -168,7 +168,6 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
|
||||
n->c = c;
|
||||
n->ca = ca;
|
||||
n->ptr_idx = ptr_idx++;
|
||||
n->submit_time_us = local_clock_us();
|
||||
n->bio.bi_iter.bi_sector = ptr->offset;
|
||||
|
||||
@ -184,7 +183,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
submit_bio(&n->bio);
|
||||
} else {
|
||||
n->have_io_ref = false;
|
||||
bcache_io_error(c, &n->bio, "device has been removed");
|
||||
n->bio.bi_status = BLK_STS_REMOVED;
|
||||
bio_endio(&n->bio);
|
||||
}
|
||||
}
|
||||
@ -201,9 +200,12 @@ static void bch2_write_done(struct closure *cl)
|
||||
if (!op->error && (op->flags & BCH_WRITE_FLUSH))
|
||||
op->error = bch2_journal_error(&op->c->journal);
|
||||
|
||||
bch2_disk_reservation_put(op->c, &op->res);
|
||||
if (!(op->flags & BCH_WRITE_NOPUT_RESERVATION))
|
||||
bch2_disk_reservation_put(op->c, &op->res);
|
||||
percpu_ref_put(&op->c->writes);
|
||||
bch2_keylist_free(&op->insert_keys, op->inline_keys);
|
||||
op->flags &= ~(BCH_WRITE_DONE|BCH_WRITE_LOOPED);
|
||||
|
||||
closure_return(cl);
|
||||
}
|
||||
|
||||
@ -244,9 +246,37 @@ static void bch2_write_index(struct closure *cl)
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bch_fs *c = op->c;
|
||||
struct keylist *keys = &op->insert_keys;
|
||||
struct bkey_s_extent e;
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct bkey_i *src, *dst = keys->keys, *n;
|
||||
int ret;
|
||||
|
||||
op->flags |= BCH_WRITE_LOOPED;
|
||||
|
||||
for (src = keys->keys; src != keys->top; src = n) {
|
||||
n = bkey_next(src);
|
||||
bkey_copy(dst, src);
|
||||
|
||||
e = bkey_i_to_s_extent(dst);
|
||||
extent_for_each_ptr_backwards(e, ptr)
|
||||
if (test_bit(ptr->dev, op->failed.d))
|
||||
bch2_extent_drop_ptr(e, ptr);
|
||||
|
||||
ret = bch2_extent_nr_ptrs(e.c)
|
||||
? bch2_check_mark_super(c, e.c, BCH_DATA_USER)
|
||||
: -EIO;
|
||||
if (ret) {
|
||||
keys->top = keys->keys;
|
||||
op->error = ret;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
goto err;
|
||||
}
|
||||
|
||||
dst = bkey_next(dst);
|
||||
}
|
||||
|
||||
keys->top = dst;
|
||||
|
||||
if (!bch2_keylist_empty(keys)) {
|
||||
u64 sectors_start = keylist_sectors(keys);
|
||||
int ret = op->index_update_fn(op);
|
||||
@ -260,7 +290,7 @@ static void bch2_write_index(struct closure *cl)
|
||||
op->error = ret;
|
||||
}
|
||||
}
|
||||
|
||||
err:
|
||||
bch2_open_bucket_put_refs(c, &op->open_buckets_nr, op->open_buckets);
|
||||
|
||||
if (!(op->flags & BCH_WRITE_DONE))
|
||||
@ -276,43 +306,6 @@ static void bch2_write_index(struct closure *cl)
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_write_io_error(struct closure *cl)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct keylist *keys = &op->insert_keys;
|
||||
struct bch_fs *c = op->c;
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct bkey_i *k;
|
||||
int ret;
|
||||
|
||||
for_each_keylist_key(keys, k) {
|
||||
struct bkey_i *n = bkey_next(k);
|
||||
struct bkey_s_extent e = bkey_i_to_s_extent(k);
|
||||
|
||||
extent_for_each_ptr_backwards(e, ptr)
|
||||
if (test_bit(ptr->dev, op->failed.d))
|
||||
bch2_extent_drop_ptr(e, ptr);
|
||||
|
||||
memmove(bkey_next(k), n, (void *) keys->top - (void *) n);
|
||||
keys->top_p -= (u64 *) n - (u64 *) bkey_next(k);
|
||||
|
||||
ret = bch2_extent_nr_ptrs(e.c)
|
||||
? bch2_check_mark_super(c, e.c, BCH_DATA_USER)
|
||||
: -EIO;
|
||||
if (ret) {
|
||||
keys->top = keys->keys;
|
||||
op->error = ret;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
memset(&op->failed, 0, sizeof(op->failed));
|
||||
|
||||
bch2_write_index(cl);
|
||||
return;
|
||||
}
|
||||
|
||||
static void bch2_write_endio(struct bio *bio)
|
||||
{
|
||||
struct closure *cl = bio->bi_private;
|
||||
@ -324,10 +317,8 @@ static void bch2_write_endio(struct bio *bio)
|
||||
|
||||
bch2_latency_acct(ca, wbio->submit_time_us, WRITE);
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_error, ca, "data write")) {
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "data write"))
|
||||
set_bit(ca->dev_idx, op->failed.d);
|
||||
set_closure_fn(cl, bch2_write_io_error, index_update_wq(op));
|
||||
}
|
||||
|
||||
if (wbio->have_io_ref)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
@ -706,11 +697,6 @@ do_write:
|
||||
|
||||
key_to_write = (void *) (op->insert_keys.keys_p + key_to_write_offset);
|
||||
|
||||
ret = bch2_check_mark_super(c, bkey_i_to_s_c_extent(key_to_write),
|
||||
BCH_DATA_USER);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
dst->bi_end_io = bch2_write_endio;
|
||||
dst->bi_private = &op->cl;
|
||||
bio_set_op_attrs(dst, REQ_OP_WRITE, 0);
|
||||
@ -870,7 +856,8 @@ void bch2_write(struct closure *cl)
|
||||
!percpu_ref_tryget(&c->writes)) {
|
||||
__bcache_io_error(c, "read only");
|
||||
op->error = -EROFS;
|
||||
bch2_disk_reservation_put(c, &op->res);
|
||||
if (!(op->flags & BCH_WRITE_NOPUT_RESERVATION))
|
||||
bch2_disk_reservation_put(c, &op->res);
|
||||
closure_return(cl);
|
||||
}
|
||||
|
||||
@ -916,7 +903,10 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
|
||||
swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
|
||||
rbio->promote = NULL;
|
||||
|
||||
__bch2_write_op_init(&op->write.op, c);
|
||||
bch2_write_op_init(&op->write.op, c);
|
||||
op->write.op.csum_type = bch2_data_checksum_type(c, rbio->opts.data_checksum);
|
||||
op->write.op.compression_type =
|
||||
bch2_compression_opt_to_type(rbio->opts.compression);
|
||||
|
||||
op->write.move_dev = -1;
|
||||
op->write.op.devs = c->fastest_devs;
|
||||
@ -1060,7 +1050,7 @@ static void bch2_rbio_retry(struct work_struct *work)
|
||||
if (rbio->split)
|
||||
rbio = bch2_rbio_free(rbio);
|
||||
else
|
||||
rbio->bio.bi_error = 0;
|
||||
rbio->bio.bi_status = 0;
|
||||
|
||||
if (!(flags & BCH_READ_NODECODE))
|
||||
flags |= BCH_READ_MUST_CLONE;
|
||||
@ -1073,7 +1063,8 @@ static void bch2_rbio_retry(struct work_struct *work)
|
||||
__bch2_read(c, rbio, iter, inode, &avoid, flags);
|
||||
}
|
||||
|
||||
static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, int error)
|
||||
static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
|
||||
blk_status_t error)
|
||||
{
|
||||
rbio->retry = retry;
|
||||
|
||||
@ -1081,7 +1072,7 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, int error)
|
||||
return;
|
||||
|
||||
if (retry == READ_ERR) {
|
||||
bch2_rbio_parent(rbio)->bio.bi_error = error;
|
||||
bch2_rbio_parent(rbio)->bio.bi_status = error;
|
||||
bch2_rbio_done(rbio);
|
||||
} else {
|
||||
bch2_rbio_punt(rbio, bch2_rbio_retry,
|
||||
@ -1236,7 +1227,7 @@ csum_err:
|
||||
*/
|
||||
if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
|
||||
rbio->flags |= BCH_READ_MUST_BOUNCE;
|
||||
bch2_rbio_error(rbio, READ_RETRY, -EIO);
|
||||
bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1245,13 +1236,13 @@ csum_err:
|
||||
rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector,
|
||||
rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
|
||||
csum.hi, csum.lo, crc.csum_type);
|
||||
bch2_rbio_error(rbio, READ_RETRY_AVOID, -EIO);
|
||||
bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
|
||||
return;
|
||||
decompression_err:
|
||||
__bcache_io_error(c, "decompression error, inode %llu offset %llu",
|
||||
rbio->pos.inode,
|
||||
(u64) rbio->bvec_iter.bi_sector);
|
||||
bch2_rbio_error(rbio, READ_ERR, -EIO);
|
||||
bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1270,8 +1261,8 @@ static void bch2_read_endio(struct bio *bio)
|
||||
if (!rbio->split)
|
||||
rbio->bio.bi_end_io = rbio->end_io;
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_error, rbio->pick.ca, "data read")) {
|
||||
bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_error);
|
||||
if (bch2_dev_io_err_on(bio->bi_status, rbio->pick.ca, "data read")) {
|
||||
bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1281,9 +1272,9 @@ static void bch2_read_endio(struct bio *bio)
|
||||
atomic_long_inc(&c->read_realloc_races);
|
||||
|
||||
if (rbio->flags & BCH_READ_RETRY_IF_STALE)
|
||||
bch2_rbio_error(rbio, READ_RETRY, -EINTR);
|
||||
bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
|
||||
else
|
||||
bch2_rbio_error(rbio, READ_ERR, -EINTR);
|
||||
bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1360,7 +1351,8 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
|
||||
|
||||
rbio = rbio_init(bio_alloc_bioset(GFP_NOIO,
|
||||
DIV_ROUND_UP(sectors, PAGE_SECTORS),
|
||||
&c->bio_read_split));
|
||||
&c->bio_read_split),
|
||||
orig->opts);
|
||||
|
||||
bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
|
||||
split = true;
|
||||
@ -1374,7 +1366,8 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
|
||||
* lose the error)
|
||||
*/
|
||||
rbio = rbio_init(bio_clone_fast(&orig->bio, GFP_NOIO,
|
||||
&c->bio_read_split));
|
||||
&c->bio_read_split),
|
||||
orig->opts);
|
||||
rbio->bio.bi_iter = iter;
|
||||
split = true;
|
||||
} else {
|
||||
@ -1428,6 +1421,8 @@ noclone:
|
||||
bch2_read_endio(&rbio->bio);
|
||||
|
||||
ret = rbio->retry;
|
||||
if (rbio->split)
|
||||
rbio = bch2_rbio_free(rbio);
|
||||
if (!ret)
|
||||
bch2_rbio_done(rbio);
|
||||
}
|
||||
@ -1503,7 +1498,7 @@ err:
|
||||
* possibly bigger than the memory that was
|
||||
* originally allocated)
|
||||
*/
|
||||
rbio->bio.bi_error = -EINTR;
|
||||
rbio->bio.bi_status = BLK_STS_AGAIN;
|
||||
bio_endio(&rbio->bio);
|
||||
return;
|
||||
}
|
||||
@ -1561,6 +1556,7 @@ retry:
|
||||
case READ_RETRY:
|
||||
goto retry;
|
||||
case READ_ERR:
|
||||
rbio->bio.bi_status = BLK_STS_IOERR;
|
||||
bio_endio(&rbio->bio);
|
||||
return;
|
||||
};
|
||||
|
@ -21,6 +21,8 @@ void bch2_latency_acct(struct bch_dev *, unsigned, int);
|
||||
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
|
||||
enum bch_data_type, const struct bkey_i *);
|
||||
|
||||
#define BLK_STS_REMOVED ((__force blk_status_t)128)
|
||||
|
||||
enum bch_write_flags {
|
||||
BCH_WRITE_ALLOC_NOWAIT = (1 << 0),
|
||||
BCH_WRITE_CACHED = (1 << 1),
|
||||
@ -29,11 +31,12 @@ enum bch_write_flags {
|
||||
BCH_WRITE_PAGES_STABLE = (1 << 4),
|
||||
BCH_WRITE_PAGES_OWNED = (1 << 5),
|
||||
BCH_WRITE_ONLY_SPECIFIED_DEVS = (1 << 6),
|
||||
BCH_WRITE_NOPUT_RESERVATION = (1 << 7),
|
||||
|
||||
/* Internal: */
|
||||
BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 7),
|
||||
BCH_WRITE_DONE = (1 << 8),
|
||||
BCH_WRITE_LOOPED = (1 << 9),
|
||||
BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 8),
|
||||
BCH_WRITE_DONE = (1 << 9),
|
||||
BCH_WRITE_LOOPED = (1 << 10),
|
||||
};
|
||||
|
||||
static inline u64 *op_journal_seq(struct bch_write_op *op)
|
||||
@ -42,6 +45,12 @@ static inline u64 *op_journal_seq(struct bch_write_op *op)
|
||||
? op->journal_seq_p : &op->journal_seq;
|
||||
}
|
||||
|
||||
static inline void op_journal_seq_set(struct bch_write_op *op, u64 *journal_seq)
|
||||
{
|
||||
op->journal_seq_p = journal_seq;
|
||||
op->flags |= BCH_WRITE_JOURNAL_SEQ_PTR;
|
||||
}
|
||||
|
||||
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
|
||||
{
|
||||
return op->alloc_reserve == RESERVE_MOVINGGC
|
||||
@ -51,14 +60,14 @@ static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
|
||||
|
||||
int bch2_write_index_default(struct bch_write_op *);
|
||||
|
||||
static inline void __bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c)
|
||||
static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c)
|
||||
{
|
||||
op->c = c;
|
||||
op->io_wq = index_update_wq(op);
|
||||
op->flags = 0;
|
||||
op->written = 0;
|
||||
op->error = 0;
|
||||
op->csum_type = bch2_data_checksum_type(c);
|
||||
op->csum_type = bch2_data_checksum_type(c, c->opts.data_checksum);
|
||||
op->compression_type =
|
||||
bch2_compression_opt_to_type(c->opts.compression);
|
||||
op->nr_replicas = 0;
|
||||
@ -75,27 +84,6 @@ static inline void __bch2_write_op_init(struct bch_write_op *op, struct bch_fs *
|
||||
op->index_update_fn = bch2_write_index_default;
|
||||
}
|
||||
|
||||
static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
|
||||
struct disk_reservation res,
|
||||
struct bch_devs_mask *devs,
|
||||
struct write_point_specifier write_point,
|
||||
struct bpos pos,
|
||||
u64 *journal_seq, unsigned flags)
|
||||
{
|
||||
__bch2_write_op_init(op, c);
|
||||
op->flags = flags;
|
||||
op->nr_replicas = res.nr_replicas;
|
||||
op->pos = pos;
|
||||
op->res = res;
|
||||
op->devs = devs;
|
||||
op->write_point = write_point;
|
||||
|
||||
if (journal_seq) {
|
||||
op->journal_seq_p = journal_seq;
|
||||
op->flags |= BCH_WRITE_JOURNAL_SEQ_PTR;
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_write(struct closure *);
|
||||
|
||||
static inline struct bch_write_bio *wbio_init(struct bio *bio)
|
||||
@ -134,25 +122,27 @@ static inline void bch2_read_extent(struct bch_fs *c,
|
||||
struct extent_pick_ptr *pick,
|
||||
unsigned flags)
|
||||
{
|
||||
rbio->_state = 0;
|
||||
__bch2_read_extent(c, rbio, rbio->bio.bi_iter, e, pick, flags);
|
||||
}
|
||||
|
||||
static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
|
||||
u64 inode)
|
||||
{
|
||||
rbio->_state = 0;
|
||||
BUG_ON(rbio->_state);
|
||||
__bch2_read(c, rbio, rbio->bio.bi_iter, inode, NULL,
|
||||
BCH_READ_RETRY_IF_STALE|
|
||||
BCH_READ_MAY_PROMOTE|
|
||||
BCH_READ_USER_MAPPED);
|
||||
}
|
||||
|
||||
static inline struct bch_read_bio *rbio_init(struct bio *bio)
|
||||
static inline struct bch_read_bio *rbio_init(struct bio *bio,
|
||||
struct bch_io_opts opts)
|
||||
{
|
||||
struct bch_read_bio *rbio = to_rbio(bio);
|
||||
|
||||
rbio->_state = 0;
|
||||
rbio->_state = 0;
|
||||
rbio->promote = NULL;
|
||||
rbio->opts = opts;
|
||||
return rbio;
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "buckets_types.h"
|
||||
#include "extents_types.h"
|
||||
#include "keylist_types.h"
|
||||
#include "opts.h"
|
||||
#include "super_types.h"
|
||||
|
||||
#include <linux/llist.h>
|
||||
@ -56,6 +57,8 @@ struct bch_read_bio {
|
||||
|
||||
struct promote_op *promote;
|
||||
|
||||
struct bch_io_opts opts;
|
||||
|
||||
struct work_struct work;
|
||||
|
||||
struct bio bio;
|
||||
@ -69,8 +72,7 @@ struct bch_write_bio {
|
||||
struct closure *cl;
|
||||
};
|
||||
|
||||
u8 ptr_idx;
|
||||
u8 replicas_failed;
|
||||
struct bch_devs_list failed;
|
||||
u8 order;
|
||||
|
||||
unsigned split:1,
|
||||
@ -90,8 +92,8 @@ struct bch_write_op {
|
||||
struct bch_fs *c;
|
||||
struct workqueue_struct *io_wq;
|
||||
|
||||
unsigned written; /* sectors */
|
||||
u16 flags;
|
||||
u16 written; /* sectors */
|
||||
s8 error;
|
||||
|
||||
unsigned csum_type:4;
|
||||
|
@ -338,8 +338,8 @@ struct journal_list {
|
||||
* Given a journal entry we just read, add it to the list of journal entries to
|
||||
* be replayed:
|
||||
*/
|
||||
static int journal_entry_add(struct bch_fs *c, struct journal_list *jlist,
|
||||
struct jset *j)
|
||||
static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct journal_list *jlist, struct jset *j)
|
||||
{
|
||||
struct journal_replay *i, *pos;
|
||||
struct list_head *where;
|
||||
@ -347,8 +347,6 @@ static int journal_entry_add(struct bch_fs *c, struct journal_list *jlist,
|
||||
__le64 last_seq;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&jlist->lock);
|
||||
|
||||
last_seq = !list_empty(jlist->head)
|
||||
? list_last_entry(jlist->head, struct journal_replay,
|
||||
list)->j.last_seq
|
||||
@ -376,9 +374,7 @@ static int journal_entry_add(struct bch_fs *c, struct journal_list *jlist,
|
||||
memcmp(j, &i->j, bytes), c,
|
||||
"found duplicate but non identical journal entries (seq %llu)",
|
||||
le64_to_cpu(j->seq));
|
||||
|
||||
ret = JOURNAL_ENTRY_ADD_OK;
|
||||
goto out;
|
||||
goto found;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(j->seq) > le64_to_cpu(i->j.seq)) {
|
||||
@ -395,12 +391,16 @@ add:
|
||||
goto out;
|
||||
}
|
||||
|
||||
memcpy(&i->j, j, bytes);
|
||||
list_add(&i->list, where);
|
||||
i->devs.nr = 0;
|
||||
memcpy(&i->j, j, bytes);
|
||||
found:
|
||||
if (!fsck_err_on(bch2_dev_list_has_dev(i->devs, ca->dev_idx),
|
||||
c, "duplicate journal entries on same device"))
|
||||
bch2_dev_list_add_dev(&i->devs, ca->dev_idx);
|
||||
ret = JOURNAL_ENTRY_ADD_OK;
|
||||
out:
|
||||
fsck_err:
|
||||
mutex_unlock(&jlist->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -496,8 +496,8 @@ fsck_err:
|
||||
#define journal_entry_err_on(cond, c, msg, ...) \
|
||||
((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false)
|
||||
|
||||
static int __journal_entry_validate(struct bch_fs *c, struct jset *j,
|
||||
int write)
|
||||
static int journal_entry_validate_entries(struct bch_fs *c, struct jset *j,
|
||||
int write)
|
||||
{
|
||||
struct jset_entry *entry;
|
||||
int ret = 0;
|
||||
@ -508,7 +508,7 @@ static int __journal_entry_validate(struct bch_fs *c, struct jset *j,
|
||||
if (journal_entry_err_on(vstruct_next(entry) >
|
||||
vstruct_last(j), c,
|
||||
"journal entry extends past end of jset")) {
|
||||
j->u64s = cpu_to_le64((u64 *) entry - j->_data);
|
||||
j->u64s = cpu_to_le32((u64 *) entry - j->_data);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -614,7 +614,7 @@ static int journal_entry_validate(struct bch_fs *c,
|
||||
"invalid journal entry: last_seq > seq"))
|
||||
j->last_seq = j->seq;
|
||||
|
||||
return __journal_entry_validate(c, j, write);
|
||||
return 0;
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
@ -722,7 +722,10 @@ reread: sectors_read = min_t(unsigned,
|
||||
|
||||
ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
|
||||
|
||||
ret = journal_entry_add(c, jlist, j);
|
||||
mutex_lock(&jlist->lock);
|
||||
ret = journal_entry_add(c, ca, jlist, j);
|
||||
mutex_unlock(&jlist->lock);
|
||||
|
||||
switch (ret) {
|
||||
case JOURNAL_ENTRY_ADD_OK:
|
||||
*entries_found = true;
|
||||
@ -916,7 +919,9 @@ static int journal_seq_blacklist_read(struct journal *j,
|
||||
|
||||
for_each_jset_entry_type(entry, &i->j,
|
||||
JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED) {
|
||||
seq = le64_to_cpu(entry->_data[0]);
|
||||
struct jset_entry_blacklist *bl_entry =
|
||||
container_of(entry, struct jset_entry_blacklist, entry);
|
||||
seq = le64_to_cpu(bl_entry->seq);
|
||||
|
||||
bch_verbose(c, "blacklisting existing journal seq %llu", seq);
|
||||
|
||||
@ -982,6 +987,12 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
fsck_err_on(c->sb.clean && journal_has_keys(list), c,
|
||||
"filesystem marked clean but journal has keys to replay");
|
||||
|
||||
list_for_each_entry(i, list, list) {
|
||||
ret = journal_entry_validate_entries(c, &i->j, READ);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
i = list_last_entry(list, struct journal_replay, list);
|
||||
|
||||
unfixable_fsck_err_on(le64_to_cpu(i->j.seq) -
|
||||
@ -1002,6 +1013,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
INIT_LIST_HEAD(&p->flushed);
|
||||
atomic_set(&p->count, 0);
|
||||
p->devs.nr = 0;
|
||||
}
|
||||
|
||||
mutex_lock(&j->blacklist_lock);
|
||||
@ -1010,6 +1022,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
p = journal_seq_pin(j, le64_to_cpu(i->j.seq));
|
||||
|
||||
atomic_set(&p->count, 1);
|
||||
p->devs = i->devs;
|
||||
|
||||
if (journal_seq_blacklist_read(j, i, p)) {
|
||||
mutex_unlock(&j->blacklist_lock);
|
||||
@ -1090,7 +1103,7 @@ void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
|
||||
{
|
||||
struct journal_buf *w = journal_prev_buf(j);
|
||||
|
||||
atomic_dec_bug(&journal_seq_pin(j, w->data->seq)->count);
|
||||
atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count);
|
||||
|
||||
if (!need_write_just_set &&
|
||||
test_bit(JOURNAL_NEED_WRITE, &j->flags))
|
||||
@ -1122,6 +1135,7 @@ static void __journal_entry_new(struct journal *j, int count)
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
INIT_LIST_HEAD(&p->flushed);
|
||||
atomic_set(&p->count, count);
|
||||
p->devs.nr = 0;
|
||||
}
|
||||
|
||||
static void __bch2_journal_next_entry(struct journal *j)
|
||||
@ -1851,6 +1865,21 @@ void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
|
||||
bch2_journal_error(j));
|
||||
}
|
||||
|
||||
int bch2_journal_flush_all_pins(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
bool flush;
|
||||
|
||||
bch2_journal_flush_pins(j, U64_MAX);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
flush = last_seq(j) != j->last_seq_ondisk ||
|
||||
c->btree_roots_dirty;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return flush ? bch2_journal_meta(j) : 0;
|
||||
}
|
||||
|
||||
static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
|
||||
{
|
||||
bool ret;
|
||||
@ -2002,7 +2031,7 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
|
||||
* i.e. whichever device was limiting the current journal entry size.
|
||||
*/
|
||||
extent_for_each_ptr_backwards(e, ptr) {
|
||||
ca = c->devs[ptr->dev];
|
||||
ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
|
||||
if (ca->mi.state != BCH_MEMBER_STATE_RW ||
|
||||
ca->journal.sectors_free <= sectors)
|
||||
@ -2197,7 +2226,7 @@ static void journal_write_endio(struct bio *bio)
|
||||
struct bch_dev *ca = bio->bi_private;
|
||||
struct journal *j = &ca->fs->journal;
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_error, ca, "journal write") ||
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write") ||
|
||||
bch2_meta_write_fault("journal")) {
|
||||
/* Was this a flush or an actual journal write? */
|
||||
if (ca->journal.ptr_idx != U8_MAX) {
|
||||
@ -2233,6 +2262,7 @@ static void journal_write(struct closure *cl)
|
||||
if (r->alive)
|
||||
bch2_journal_add_btree_root(w, i, &r->key, r->level);
|
||||
}
|
||||
c->btree_roots_dirty = false;
|
||||
mutex_unlock(&c->btree_root_lock);
|
||||
|
||||
journal_write_compact(jset);
|
||||
@ -2246,7 +2276,7 @@ static void journal_write(struct closure *cl)
|
||||
SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
|
||||
|
||||
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) &&
|
||||
__journal_entry_validate(c, jset, WRITE))
|
||||
journal_entry_validate_entries(c, jset, WRITE))
|
||||
goto err;
|
||||
|
||||
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
|
||||
@ -2257,7 +2287,7 @@ static void journal_write(struct closure *cl)
|
||||
journal_nonce(jset), jset);
|
||||
|
||||
if (!bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) &&
|
||||
__journal_entry_validate(c, jset, WRITE))
|
||||
journal_entry_validate_entries(c, jset, WRITE))
|
||||
goto err;
|
||||
|
||||
sectors = vstruct_sectors(jset, c->block_bits);
|
||||
@ -2277,6 +2307,9 @@ static void journal_write(struct closure *cl)
|
||||
BCH_DATA_JOURNAL))
|
||||
goto err;
|
||||
|
||||
journal_seq_pin(j, le64_to_cpu(jset->seq))->devs =
|
||||
bch2_extent_devs(bkey_i_to_s_c_extent(&j->key));
|
||||
|
||||
/*
|
||||
* XXX: we really should just disable the entire journal in nochanges
|
||||
* mode
|
||||
@ -2285,7 +2318,7 @@ static void journal_write(struct closure *cl)
|
||||
goto no_io;
|
||||
|
||||
extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) {
|
||||
ca = c->devs[ptr->dev];
|
||||
ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
if (!percpu_ref_tryget(&ca->io_ref)) {
|
||||
/* XXX: fix this */
|
||||
bch_err(c, "missing device for journal write\n");
|
||||
@ -2693,6 +2726,46 @@ int bch2_journal_flush(struct journal *j)
|
||||
return bch2_journal_flush_seq(j, seq);
|
||||
}
|
||||
|
||||
int bch2_journal_flush_device(struct journal *j, unsigned dev_idx)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_entry_pin_list *p;
|
||||
struct bch_devs_list devs;
|
||||
u64 seq = 0;
|
||||
unsigned iter;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
fifo_for_each_entry_ptr(p, &j->pin, iter)
|
||||
if (bch2_dev_list_has_dev(p->devs, dev_idx))
|
||||
seq = journal_pin_seq(j, p);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
bch2_journal_flush_pins(j, seq);
|
||||
|
||||
mutex_lock(&c->replicas_gc_lock);
|
||||
bch2_replicas_gc_start(c, 1 << BCH_DATA_JOURNAL);
|
||||
|
||||
seq = 0;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
while (!ret && seq < atomic64_read(&j->seq)) {
|
||||
seq = max(seq, last_seq(j));
|
||||
devs = journal_seq_pin(j, seq)->devs;
|
||||
seq++;
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
ret = bch2_check_mark_super_devlist(c, &devs, BCH_DATA_JOURNAL);
|
||||
spin_lock(&j->lock);
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
bch2_replicas_gc_end(c, ret);
|
||||
mutex_unlock(&c->replicas_gc_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
@ -2862,9 +2935,7 @@ void bch2_fs_journal_stop(struct journal *j)
|
||||
* journal entries, then force a brand new empty journal entry to be
|
||||
* written:
|
||||
*/
|
||||
bch2_journal_flush_pins(j, U64_MAX);
|
||||
bch2_journal_flush_async(j, NULL);
|
||||
bch2_journal_meta(j);
|
||||
bch2_journal_flush_all_pins(j);
|
||||
|
||||
cancel_delayed_work_sync(&j->write_work);
|
||||
cancel_delayed_work_sync(&j->reclaim_work);
|
||||
|
@ -118,6 +118,8 @@
|
||||
*/
|
||||
struct journal_replay {
|
||||
struct list_head list;
|
||||
struct bch_devs_list devs;
|
||||
/* must be last: */
|
||||
struct jset j;
|
||||
};
|
||||
|
||||
@ -164,6 +166,7 @@ void bch2_journal_pin_add_if_older(struct journal *,
|
||||
struct journal_entry_pin *,
|
||||
journal_pin_flush_fn);
|
||||
void bch2_journal_flush_pins(struct journal *, u64);
|
||||
int bch2_journal_flush_all_pins(struct journal *);
|
||||
|
||||
struct closure;
|
||||
struct bch_fs;
|
||||
@ -356,6 +359,7 @@ void bch2_journal_meta_async(struct journal *, struct closure *);
|
||||
int bch2_journal_flush_seq(struct journal *, u64);
|
||||
int bch2_journal_flush(struct journal *);
|
||||
int bch2_journal_meta(struct journal *);
|
||||
int bch2_journal_flush_device(struct journal *, unsigned);
|
||||
|
||||
void bch2_journal_halt(struct journal *);
|
||||
|
||||
|
@ -34,6 +34,7 @@ struct journal_entry_pin_list {
|
||||
struct list_head list;
|
||||
struct list_head flushed;
|
||||
atomic_t count;
|
||||
struct bch_devs_list devs;
|
||||
};
|
||||
|
||||
struct journal;
|
||||
|
@ -27,23 +27,9 @@ static bool migrate_pred(void *arg, struct bkey_s_c_extent e)
|
||||
|
||||
#define MAX_DATA_OFF_ITER 10
|
||||
|
||||
/*
|
||||
* This moves only the data off, leaving the meta-data (if any) in place.
|
||||
* It walks the key space, and for any key with a valid pointer to the
|
||||
* relevant device, it copies it elsewhere, updating the key to point to
|
||||
* the copy.
|
||||
* The meta-data is moved off by bch_move_meta_data_off_device.
|
||||
*
|
||||
* Note: If the number of data replicas desired is > 1, ideally, any
|
||||
* new copies would not be made in the same device that already have a
|
||||
* copy (if there are enough devices).
|
||||
* This is _not_ currently implemented. The multiple replicas can
|
||||
* land in the same device even if there are others available.
|
||||
*/
|
||||
|
||||
int bch2_move_data_off_device(struct bch_dev *ca)
|
||||
static int bch2_dev_usrdata_migrate(struct bch_fs *c, struct bch_dev *ca,
|
||||
int flags)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 keys_moved, sectors_moved;
|
||||
@ -113,10 +99,6 @@ int bch2_move_data_off_device(struct bch_dev *ca)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This walks the btree, and for any node on the relevant device it moves the
|
||||
* node elsewhere.
|
||||
*/
|
||||
static int bch2_move_btree_off(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum btree_id id)
|
||||
{
|
||||
@ -200,9 +182,9 @@ static int bch2_move_btree_off(struct bch_fs *c, struct bch_dev *ca,
|
||||
* is written.
|
||||
*/
|
||||
|
||||
int bch2_move_metadata_off_device(struct bch_dev *ca)
|
||||
static int bch2_dev_metadata_migrate(struct bch_fs *c, struct bch_dev *ca,
|
||||
int flags)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
@ -240,37 +222,31 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Flagging data bad when forcibly removing a device after failing to
|
||||
* migrate the data off the device.
|
||||
*/
|
||||
|
||||
static int bch2_flag_key_bad(struct btree_iter *iter,
|
||||
struct bch_dev *ca,
|
||||
struct bkey_s_c_extent orig)
|
||||
int bch2_dev_data_migrate(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
{
|
||||
BKEY_PADDED(key) tmp;
|
||||
struct bkey_s_extent e;
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct bch_fs *c = ca->fs;
|
||||
return bch2_dev_usrdata_migrate(c, ca, flags) ?:
|
||||
bch2_dev_metadata_migrate(c, ca, flags);
|
||||
}
|
||||
|
||||
bkey_reassemble(&tmp.key, orig.s_c);
|
||||
e = bkey_i_to_s_extent(&tmp.key);
|
||||
static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e,
|
||||
unsigned dev_idx, int flags, bool metadata)
|
||||
{
|
||||
struct bch_extent_ptr *ptr;
|
||||
unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
|
||||
unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
|
||||
unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED;
|
||||
unsigned nr_good;
|
||||
|
||||
extent_for_each_ptr_backwards(e, ptr)
|
||||
if (ptr->dev == ca->dev_idx)
|
||||
if (ptr->dev == dev_idx)
|
||||
bch2_extent_drop_ptr(e, ptr);
|
||||
|
||||
/*
|
||||
* If the new extent no longer has any pointers, bch2_extent_normalize()
|
||||
* will do the appropriate thing with it (turning it into a
|
||||
* KEY_TYPE_ERROR key, or just a discard if it was a cached extent)
|
||||
*/
|
||||
bch2_extent_normalize(c, e.s);
|
||||
nr_good = bch2_extent_nr_good_ptrs(c, e.c);
|
||||
if ((!nr_good && !(flags & lost)) ||
|
||||
(nr_good < replicas && !(flags & degraded)))
|
||||
return -EINVAL;
|
||||
|
||||
return bch2_btree_insert_at(c, NULL, NULL, NULL,
|
||||
BTREE_INSERT_ATOMIC,
|
||||
BTREE_INSERT_ENTRY(iter, &tmp.key));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -284,11 +260,11 @@ static int bch2_flag_key_bad(struct btree_iter *iter,
|
||||
* that we've already tried to move the data MAX_DATA_OFF_ITER times and
|
||||
* are not likely to succeed if we try again.
|
||||
*/
|
||||
int bch2_flag_data_bad(struct bch_dev *ca)
|
||||
static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_extent e;
|
||||
struct bkey_s_extent e;
|
||||
BKEY_PADDED(key) tmp;
|
||||
struct btree_iter iter;
|
||||
int ret = 0;
|
||||
|
||||
@ -303,11 +279,33 @@ int bch2_flag_data_bad(struct bch_dev *ca)
|
||||
if (!bkey_extent_is_data(k.k))
|
||||
goto advance;
|
||||
|
||||
e = bkey_s_c_to_extent(k);
|
||||
if (!bch2_extent_has_device(e, ca->dev_idx))
|
||||
if (!bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx))
|
||||
goto advance;
|
||||
|
||||
ret = bch2_flag_key_bad(&iter, ca, e);
|
||||
bkey_reassemble(&tmp.key, k);
|
||||
e = bkey_i_to_s_extent(&tmp.key);
|
||||
|
||||
ret = drop_dev_ptrs(c, e, dev_idx, flags, false);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
/*
|
||||
* If the new extent no longer has any pointers, bch2_extent_normalize()
|
||||
* will do the appropriate thing with it (turning it into a
|
||||
* KEY_TYPE_ERROR key, or just a discard if it was a cached extent)
|
||||
*/
|
||||
bch2_extent_normalize(c, e.s);
|
||||
|
||||
if (bkey_extent_is_data(e.k) &&
|
||||
(ret = bch2_check_mark_super(c, e.c, BCH_DATA_USER)))
|
||||
break;
|
||||
|
||||
iter.pos = bkey_start_pos(&tmp.key.k);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BTREE_INSERT_ENTRY(&iter, &tmp.key));
|
||||
|
||||
/*
|
||||
* don't want to leave ret == -EINTR, since if we raced and
|
||||
@ -319,26 +317,6 @@ int bch2_flag_data_bad(struct bch_dev *ca)
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
/*
|
||||
* If the replica we're dropping was dirty and there is an
|
||||
* additional cached replica, the cached replica will now be
|
||||
* considered dirty - upon inserting the new version of the key,
|
||||
* the bucket accounting will be updated to reflect the fact
|
||||
* that the cached data is now dirty and everything works out as
|
||||
* if by magic without us having to do anything.
|
||||
*
|
||||
* The one thing we need to be concerned with here is there's a
|
||||
* race between when we drop any stale pointers from the key
|
||||
* we're about to insert, and when the key actually gets
|
||||
* inserted and the cached data is marked as dirty - we could
|
||||
* end up trying to insert a key with a pointer that should be
|
||||
* dirty, but points to stale data.
|
||||
*
|
||||
* If that happens the insert code just bails out and doesn't do
|
||||
* the insert - however, it doesn't return an error. Hence we
|
||||
* need to always recheck the current key before advancing to
|
||||
* the next:
|
||||
*/
|
||||
continue;
|
||||
advance:
|
||||
if (bkey_extent_is_data(k.k)) {
|
||||
@ -357,3 +335,80 @@ advance:
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct closure cl;
|
||||
struct btree *b;
|
||||
unsigned id;
|
||||
int ret;
|
||||
|
||||
/* don't handle this yet: */
|
||||
if (flags & BCH_FORCE_IF_METADATA_LOST)
|
||||
return -EINVAL;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
mutex_lock(&c->replicas_gc_lock);
|
||||
bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
|
||||
|
||||
for (id = 0; id < BTREE_ID_NR; id++) {
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
|
||||
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
|
||||
struct bkey_i_extent *new_key;
|
||||
retry:
|
||||
if (!bch2_extent_has_device(bkey_i_to_s_c_extent(&b->key),
|
||||
dev_idx)) {
|
||||
bch2_btree_iter_set_locks_want(&iter, 0);
|
||||
|
||||
ret = bch2_check_mark_super(c, bkey_i_to_s_c_extent(&b->key),
|
||||
BCH_DATA_BTREE);
|
||||
if (ret)
|
||||
goto err;
|
||||
} else {
|
||||
bkey_copy(&tmp.k, &b->key);
|
||||
new_key = bkey_i_to_extent(&tmp.k);
|
||||
|
||||
ret = drop_dev_ptrs(c, extent_i_to_s(new_key),
|
||||
dev_idx, flags, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!bch2_btree_iter_set_locks_want(&iter, U8_MAX)) {
|
||||
b = bch2_btree_iter_peek_node(&iter);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
ret = bch2_btree_node_update_key(c, &iter, b, new_key);
|
||||
if (ret == -EINTR) {
|
||||
b = bch2_btree_iter_peek_node(&iter);
|
||||
goto retry;
|
||||
}
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
/* btree root */
|
||||
mutex_lock(&c->btree_root_lock);
|
||||
mutex_unlock(&c->btree_root_lock);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
bch2_replicas_gc_end(c, ret);
|
||||
mutex_unlock(&c->replicas_gc_lock);
|
||||
|
||||
return ret;
|
||||
err:
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
goto out;
|
||||
}
|
||||
|
||||
int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
{
|
||||
return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
|
||||
bch2_dev_metadata_drop(c, dev_idx, flags);
|
||||
}
|
||||
|
@ -1,8 +1,7 @@
|
||||
#ifndef _BCACHEFS_MIGRATE_H
|
||||
#define _BCACHEFS_MIGRATE_H
|
||||
|
||||
int bch2_move_data_off_device(struct bch_dev *);
|
||||
int bch2_move_metadata_off_device(struct bch_dev *);
|
||||
int bch2_flag_data_bad(struct bch_dev *);
|
||||
int bch2_dev_data_migrate(struct bch_fs *, struct bch_dev *, int);
|
||||
int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
|
||||
|
||||
#endif /* _BCACHEFS_MIGRATE_H */
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "btree_gc.h"
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
#include "inode.h"
|
||||
#include "io.h"
|
||||
#include "move.h"
|
||||
#include "super-io.h"
|
||||
@ -206,7 +207,7 @@ static void move_write(struct closure *cl)
|
||||
{
|
||||
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
||||
|
||||
if (likely(!io->rbio.bio.bi_error)) {
|
||||
if (likely(!io->rbio.bio.bi_status)) {
|
||||
bch2_migrate_write_init(&io->write, &io->rbio);
|
||||
closure_call(&io->write.op.cl, bch2_write, NULL, cl);
|
||||
}
|
||||
@ -240,6 +241,7 @@ static int bch2_move_extent(struct bch_fs *c,
|
||||
struct write_point_specifier wp,
|
||||
int btree_insert_flags,
|
||||
int move_device,
|
||||
struct bch_io_opts opts,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct extent_pick_ptr pick;
|
||||
@ -276,6 +278,7 @@ static int bch2_move_extent(struct bch_fs *c,
|
||||
goto err;
|
||||
}
|
||||
|
||||
io->rbio.opts = opts;
|
||||
bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
|
||||
bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
|
||||
io->rbio.bio.bi_iter.bi_size = sectors << 9;
|
||||
@ -284,9 +287,13 @@ static int bch2_move_extent(struct bch_fs *c,
|
||||
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
|
||||
io->rbio.bio.bi_end_io = move_read_endio;
|
||||
|
||||
__bch2_write_op_init(&io->write.op, c);
|
||||
io->write.btree_insert_flags = btree_insert_flags;
|
||||
io->write.move_dev = move_device;
|
||||
|
||||
bch2_write_op_init(&io->write.op, c);
|
||||
io->write.op.csum_type = bch2_data_checksum_type(c, opts.data_checksum);
|
||||
io->write.op.compression_type =
|
||||
bch2_compression_opt_to_type(opts.compression);
|
||||
io->write.op.devs = devs;
|
||||
io->write.op.write_point = wp;
|
||||
|
||||
@ -371,9 +378,11 @@ int bch2_move_data(struct bch_fs *c,
|
||||
{
|
||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
||||
struct moving_context ctxt;
|
||||
struct bch_io_opts opts = bch2_opts_to_inode_opts(c->opts);
|
||||
struct btree_iter iter;
|
||||
BKEY_PADDED(k) tmp;
|
||||
struct bkey_s_c k;
|
||||
u64 cur_inum = U64_MAX;
|
||||
int ret = 0;
|
||||
|
||||
bch2_move_ctxt_init(&ctxt);
|
||||
@ -396,7 +405,7 @@ int bch2_move_data(struct bch_fs *c,
|
||||
(bch2_btree_iter_unlock(&iter),
|
||||
(ret = bch2_ratelimit_wait_freezable_stoppable(rate))))
|
||||
break;
|
||||
|
||||
peek:
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
if (!k.k)
|
||||
break;
|
||||
@ -404,8 +413,23 @@ int bch2_move_data(struct bch_fs *c,
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (!bkey_extent_is_data(k.k) ||
|
||||
!pred(arg, bkey_s_c_to_extent(k)))
|
||||
if (!bkey_extent_is_data(k.k))
|
||||
goto next;
|
||||
|
||||
if (cur_inum != k.k->p.inode) {
|
||||
struct bch_inode_unpacked inode;
|
||||
|
||||
/* don't hold btree locks while looking up inode: */
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
opts = bch2_opts_to_inode_opts(c->opts);
|
||||
if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
|
||||
bch2_io_opts_apply(&opts, bch2_inode_opts_get(&inode));
|
||||
cur_inum = k.k->p.inode;
|
||||
goto peek;
|
||||
}
|
||||
|
||||
if (!pred(arg, bkey_s_c_to_extent(k)))
|
||||
goto next;
|
||||
|
||||
/* unlock before doing IO: */
|
||||
@ -415,7 +439,7 @@ int bch2_move_data(struct bch_fs *c,
|
||||
|
||||
if (bch2_move_extent(c, &ctxt, devs, wp,
|
||||
btree_insert_flags,
|
||||
move_device, k)) {
|
||||
move_device, opts, k)) {
|
||||
/* memory allocation failure, wait for some IO to finish */
|
||||
bch2_move_ctxt_wait_for_io(&ctxt);
|
||||
continue;
|
||||
|
@ -76,16 +76,27 @@ void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src)
|
||||
#undef BCH_OPT
|
||||
}
|
||||
|
||||
bool bch2_opt_defined_by_id(const struct bch_opts *opts, enum bch_opt_id id)
|
||||
{
|
||||
switch (id) {
|
||||
#define BCH_OPT(_name, ...) \
|
||||
case Opt_##_name: \
|
||||
return opt_defined(*opts, _name);
|
||||
BCH_OPTS()
|
||||
#undef BCH_OPT
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
u64 bch2_opt_get_by_id(const struct bch_opts *opts, enum bch_opt_id id)
|
||||
{
|
||||
switch (id) {
|
||||
#define BCH_OPT(_name, ...) \
|
||||
case Opt_##_name: \
|
||||
return opts->_name; \
|
||||
|
||||
return opts->_name;
|
||||
BCH_OPTS()
|
||||
#undef BCH_OPT
|
||||
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
@ -98,10 +109,8 @@ void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v)
|
||||
case Opt_##_name: \
|
||||
opt_set(*opts, _name, v); \
|
||||
break;
|
||||
|
||||
BCH_OPTS()
|
||||
#undef BCH_OPT
|
||||
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
@ -118,7 +127,6 @@ struct bch_opts bch2_opts_from_sb(struct bch_sb *sb)
|
||||
#define BCH_OPT(_name, _bits, _mode, _type, _sb_opt, _default) \
|
||||
if (_sb_opt != NO_SB_OPT) \
|
||||
opt_set(opts, _name, _sb_opt(sb));
|
||||
|
||||
BCH_OPTS()
|
||||
#undef BCH_OPT
|
||||
|
||||
@ -145,7 +153,7 @@ const struct bch_option bch2_opt_table[] = {
|
||||
#undef BCH_OPT
|
||||
};
|
||||
|
||||
static int bch2_opt_lookup(const char *name)
|
||||
int bch2_opt_lookup(const char *name)
|
||||
{
|
||||
const struct bch_option *i;
|
||||
|
||||
@ -247,3 +255,52 @@ no_val:
|
||||
pr_err("Mount option %s requires a value", name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* io opts: */
|
||||
|
||||
struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src)
|
||||
{
|
||||
struct bch_io_opts ret = { 0 };
|
||||
#define BCH_INODE_OPT(_name, _bits) \
|
||||
if (opt_defined(src, _name)) \
|
||||
opt_set(ret, _name, src._name);
|
||||
BCH_INODE_OPTS()
|
||||
#undef BCH_INODE_OPT
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bch_opts bch2_inode_opts_to_opts(struct bch_io_opts src)
|
||||
{
|
||||
struct bch_opts ret = { 0 };
|
||||
#define BCH_INODE_OPT(_name, _bits) \
|
||||
if (opt_defined(src, _name)) \
|
||||
opt_set(ret, _name, src._name);
|
||||
BCH_INODE_OPTS()
|
||||
#undef BCH_INODE_OPT
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_io_opts_apply(struct bch_io_opts *dst, struct bch_io_opts src)
|
||||
{
|
||||
#define BCH_INODE_OPT(_name, _bits) \
|
||||
if (opt_defined(src, _name)) \
|
||||
opt_set(*dst, _name, src._name);
|
||||
BCH_INODE_OPTS()
|
||||
#undef BCH_INODE_OPT
|
||||
}
|
||||
|
||||
bool bch2_opt_is_inode_opt(enum bch_opt_id id)
|
||||
{
|
||||
static const enum bch_opt_id inode_opt_list[] = {
|
||||
#define BCH_INODE_OPT(_name, _bits) Opt_##_name,
|
||||
BCH_INODE_OPTS()
|
||||
#undef BCH_INODE_OPT
|
||||
};
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(inode_opt_list); i++)
|
||||
if (inode_opt_list[i] == id)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -181,10 +181,7 @@ do { \
|
||||
|
||||
static inline struct bch_opts bch2_opts_empty(void)
|
||||
{
|
||||
struct bch_opts opts;
|
||||
|
||||
memset(&opts, 0, sizeof(opts));
|
||||
return opts;
|
||||
return (struct bch_opts) { 0 };
|
||||
}
|
||||
|
||||
void bch2_opts_apply(struct bch_opts *, struct bch_opts);
|
||||
@ -215,12 +212,35 @@ struct bch_option {
|
||||
|
||||
extern const struct bch_option bch2_opt_table[];
|
||||
|
||||
bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id);
|
||||
u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id);
|
||||
void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64);
|
||||
|
||||
struct bch_opts bch2_opts_from_sb(struct bch_sb *);
|
||||
|
||||
int bch2_opt_lookup(const char *);
|
||||
int bch2_opt_parse(const struct bch_option *, const char *, u64 *);
|
||||
int bch2_parse_mount_opts(struct bch_opts *, char *);
|
||||
|
||||
/* inode opts: */
|
||||
|
||||
#define BCH_INODE_OPTS() \
|
||||
BCH_INODE_OPT(data_checksum, 8) \
|
||||
BCH_INODE_OPT(compression, 8)
|
||||
|
||||
struct bch_io_opts {
|
||||
#define BCH_INODE_OPT(_name, _bits) unsigned _name##_defined:1;
|
||||
BCH_INODE_OPTS()
|
||||
#undef BCH_INODE_OPT
|
||||
|
||||
#define BCH_INODE_OPT(_name, _bits) u##_bits _name;
|
||||
BCH_INODE_OPTS()
|
||||
#undef BCH_INODE_OPT
|
||||
};
|
||||
|
||||
struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts);
|
||||
struct bch_opts bch2_inode_opts_to_opts(struct bch_io_opts);
|
||||
void bch2_io_opts_apply(struct bch_io_opts *, struct bch_io_opts);
|
||||
bool bch2_opt_is_inode_opt(enum bch_opt_id);
|
||||
|
||||
#endif /* _BCACHEFS_OPTS_H */
|
||||
|
@ -12,6 +12,8 @@
|
||||
#include <linux/sort.h>
|
||||
|
||||
static int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
|
||||
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
|
||||
struct bch_replicas_cpu *);
|
||||
static const char *bch2_sb_validate_replicas(struct bch_sb *);
|
||||
|
||||
static inline void __bch2_sb_layout_size_assert(void)
|
||||
@ -157,7 +159,7 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
|
||||
return NULL;
|
||||
|
||||
f = __bch2_sb_field_resize(sb->sb, f, u64s);
|
||||
f->type = type;
|
||||
f->type = cpu_to_le32(type);
|
||||
return f;
|
||||
}
|
||||
|
||||
@ -188,7 +190,7 @@ struct bch_sb_field *bch2_fs_sb_field_resize(struct bch_fs *c,
|
||||
}
|
||||
|
||||
f = __bch2_sb_field_resize(c->disk_sb, f, u64s);
|
||||
f->type = type;
|
||||
f->type = cpu_to_le32(type);
|
||||
return f;
|
||||
}
|
||||
|
||||
@ -354,7 +356,16 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
|
||||
|
||||
if (!BCH_SB_DATA_REPLICAS_REQ(sb) ||
|
||||
BCH_SB_DATA_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
|
||||
return "Invalid number of metadata replicas";
|
||||
return "Invalid number of data replicas";
|
||||
|
||||
if (BCH_SB_META_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
|
||||
return "Invalid metadata checksum type";
|
||||
|
||||
if (BCH_SB_DATA_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
|
||||
return "Invalid metadata checksum type";
|
||||
|
||||
if (BCH_SB_COMPRESSION_TYPE(sb) >= BCH_COMPRESSION_OPT_NR)
|
||||
return "Invalid compression type";
|
||||
|
||||
if (!BCH_SB_BTREE_NODE_SIZE(sb))
|
||||
return "Btree node size not set";
|
||||
@ -507,7 +518,7 @@ static void __copy_super(struct bch_sb *dst, struct bch_sb *src)
|
||||
if (src_f->type == BCH_SB_FIELD_journal)
|
||||
continue;
|
||||
|
||||
dst_f = bch2_sb_field_get(dst, src_f->type);
|
||||
dst_f = bch2_sb_field_get(dst, le32_to_cpu(src_f->type));
|
||||
dst_f = __bch2_sb_field_resize(dst, dst_f,
|
||||
le32_to_cpu(src_f->u64s));
|
||||
|
||||
@ -601,7 +612,7 @@ reread:
|
||||
|
||||
/* XXX: verify MACs */
|
||||
csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
|
||||
(struct nonce) { 0 }, sb->sb);
|
||||
null_nonce(), sb->sb);
|
||||
|
||||
if (bch2_crc_cmp(csum, sb->sb->csum))
|
||||
return "bad checksum reading superblock";
|
||||
@ -688,9 +699,9 @@ const char *bch2_read_super(const char *path,
|
||||
got_super:
|
||||
pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
|
||||
le64_to_cpu(ret->sb->version),
|
||||
le64_to_cpu(ret->sb->flags),
|
||||
le64_to_cpu(ret->sb->flags[0]),
|
||||
le64_to_cpu(ret->sb->seq),
|
||||
le16_to_cpu(ret->sb->u64s));
|
||||
le32_to_cpu(ret->sb->u64s));
|
||||
|
||||
err = "Superblock block size smaller than device block size";
|
||||
if (le16_to_cpu(ret->sb->block_size) << 9 <
|
||||
@ -711,7 +722,7 @@ static void write_super_endio(struct bio *bio)
|
||||
|
||||
/* XXX: return errors directly */
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_error, ca, "superblock write"))
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write"))
|
||||
ca->sb_write_error = 1;
|
||||
|
||||
closure_put(&ca->fs->sb_write);
|
||||
@ -727,7 +738,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
|
||||
|
||||
SET_BCH_SB_CSUM_TYPE(sb, c->opts.metadata_checksum);
|
||||
sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
|
||||
(struct nonce) { 0 }, sb);
|
||||
null_nonce(), sb);
|
||||
|
||||
bio_reset(bio);
|
||||
bio->bi_bdev = ca->disk_sb.bdev;
|
||||
@ -830,7 +841,12 @@ out:
|
||||
bch2_sb_update(c);
|
||||
}
|
||||
|
||||
/* replica information: */
|
||||
/* Replicas tracking - in memory: */
|
||||
|
||||
#define for_each_cpu_replicas_entry(_r, _i) \
|
||||
for (_i = (_r)->entries; \
|
||||
(void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
|
||||
_i = (void *) (_i) + (_r)->entry_size)
|
||||
|
||||
static inline struct bch_replicas_cpu_entry *
|
||||
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
|
||||
@ -838,6 +854,11 @@ cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
|
||||
return (void *) r->entries + r->entry_size * i;
|
||||
}
|
||||
|
||||
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
|
||||
{
|
||||
eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
|
||||
}
|
||||
|
||||
static inline bool replicas_test_dev(struct bch_replicas_cpu_entry *e,
|
||||
unsigned dev)
|
||||
{
|
||||
@ -856,6 +877,246 @@ static inline unsigned replicas_dev_slots(struct bch_replicas_cpu *r)
|
||||
offsetof(struct bch_replicas_cpu_entry, devs)) * 8;
|
||||
}
|
||||
|
||||
static unsigned bkey_to_replicas(struct bkey_s_c_extent e,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_replicas_cpu_entry *r,
|
||||
unsigned *max_dev)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
unsigned nr = 0;
|
||||
|
||||
BUG_ON(!data_type ||
|
||||
data_type == BCH_DATA_SB ||
|
||||
data_type >= BCH_DATA_NR);
|
||||
|
||||
memset(r, 0, sizeof(*r));
|
||||
r->data_type = data_type;
|
||||
|
||||
*max_dev = 0;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (!ptr->cached) {
|
||||
*max_dev = max_t(unsigned, *max_dev, ptr->dev);
|
||||
replicas_set_dev(r, ptr->dev);
|
||||
nr++;
|
||||
}
|
||||
return nr;
|
||||
}
|
||||
|
||||
static struct bch_replicas_cpu *
|
||||
cpu_replicas_add_entry(struct bch_replicas_cpu *old,
|
||||
struct bch_replicas_cpu_entry new_entry,
|
||||
unsigned max_dev)
|
||||
{
|
||||
struct bch_replicas_cpu *new;
|
||||
unsigned i, nr, entry_size;
|
||||
|
||||
entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
|
||||
DIV_ROUND_UP(max_dev + 1, 8);
|
||||
entry_size = max(entry_size, old->entry_size);
|
||||
nr = old->nr + 1;
|
||||
|
||||
new = kzalloc(sizeof(struct bch_replicas_cpu) +
|
||||
nr * entry_size, GFP_NOIO);
|
||||
if (!new)
|
||||
return NULL;
|
||||
|
||||
new->nr = nr;
|
||||
new->entry_size = entry_size;
|
||||
|
||||
for (i = 0; i < old->nr; i++)
|
||||
memcpy(cpu_replicas_entry(new, i),
|
||||
cpu_replicas_entry(old, i),
|
||||
min(new->entry_size, old->entry_size));
|
||||
|
||||
memcpy(cpu_replicas_entry(new, old->nr),
|
||||
&new_entry,
|
||||
new->entry_size);
|
||||
|
||||
bch2_cpu_replicas_sort(new);
|
||||
return new;
|
||||
}
|
||||
|
||||
static bool replicas_has_entry(struct bch_replicas_cpu *r,
|
||||
struct bch_replicas_cpu_entry search,
|
||||
unsigned max_dev)
|
||||
{
|
||||
return max_dev < replicas_dev_slots(r) &&
|
||||
eytzinger0_find(r->entries, r->nr,
|
||||
r->entry_size,
|
||||
memcmp, &search) < r->nr;
|
||||
}
|
||||
|
||||
noinline
|
||||
static int bch2_check_mark_super_slowpath(struct bch_fs *c,
|
||||
struct bch_replicas_cpu_entry new_entry,
|
||||
unsigned max_dev)
|
||||
{
|
||||
struct bch_replicas_cpu *old_gc, *new_gc = NULL, *old_r, *new_r;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
old_gc = rcu_dereference_protected(c->replicas_gc,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
if (old_gc && !replicas_has_entry(old_gc, new_entry, max_dev)) {
|
||||
new_gc = cpu_replicas_add_entry(old_gc, new_entry, max_dev);
|
||||
if (!new_gc)
|
||||
goto err;
|
||||
}
|
||||
|
||||
old_r = rcu_dereference_protected(c->replicas,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
/* recheck, might have raced */
|
||||
if (replicas_has_entry(old_r, new_entry, max_dev))
|
||||
goto out;
|
||||
|
||||
new_r = cpu_replicas_add_entry(old_r, new_entry, max_dev);
|
||||
if (!new_r)
|
||||
goto err;
|
||||
|
||||
ret = bch2_cpu_replicas_to_sb_replicas(c, new_r);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (new_gc) {
|
||||
rcu_assign_pointer(c->replicas_gc, new_gc);
|
||||
kfree_rcu(old_gc, rcu);
|
||||
}
|
||||
|
||||
rcu_assign_pointer(c->replicas, new_r);
|
||||
kfree_rcu(old_r, rcu);
|
||||
|
||||
bch2_write_super(c);
|
||||
out:
|
||||
ret = 0;
|
||||
err:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int __bch2_check_mark_super(struct bch_fs *c,
|
||||
struct bch_replicas_cpu_entry search,
|
||||
unsigned max_dev)
|
||||
{
|
||||
struct bch_replicas_cpu *r, *gc_r;
|
||||
bool marked;
|
||||
|
||||
rcu_read_lock();
|
||||
r = rcu_dereference(c->replicas);
|
||||
gc_r = rcu_dereference(c->replicas_gc);
|
||||
marked = replicas_has_entry(r, search, max_dev) &&
|
||||
(!likely(gc_r) || replicas_has_entry(gc_r, search, max_dev));
|
||||
rcu_read_unlock();
|
||||
|
||||
return likely(marked) ? 0
|
||||
: bch2_check_mark_super_slowpath(c, search, max_dev);
|
||||
}
|
||||
|
||||
int bch2_check_mark_super(struct bch_fs *c, struct bkey_s_c_extent e,
|
||||
enum bch_data_type data_type)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
unsigned max_dev;
|
||||
|
||||
if (!bkey_to_replicas(e, data_type, &search, &max_dev))
|
||||
return 0;
|
||||
|
||||
return __bch2_check_mark_super(c, search, max_dev);
|
||||
}
|
||||
|
||||
int bch2_check_mark_super_devlist(struct bch_fs *c,
|
||||
struct bch_devs_list *devs,
|
||||
enum bch_data_type data_type)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search = { .data_type = data_type };
|
||||
unsigned i, max_dev = 0;
|
||||
|
||||
if (!devs->nr)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < devs->nr; i++) {
|
||||
max_dev = max_t(unsigned, max_dev, devs->devs[i]);
|
||||
replicas_set_dev(&search, devs->devs[i]);
|
||||
}
|
||||
|
||||
return __bch2_check_mark_super(c, search, max_dev);
|
||||
}
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *c, int err)
|
||||
{
|
||||
struct bch_replicas_cpu *new_r, *old_r;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&c->replicas_gc_lock);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
new_r = rcu_dereference_protected(c->replicas_gc,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
|
||||
if (err) {
|
||||
rcu_assign_pointer(c->replicas_gc, NULL);
|
||||
kfree_rcu(new_r, rcu);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bch2_cpu_replicas_to_sb_replicas(c, new_r)) {
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
}
|
||||
|
||||
old_r = rcu_dereference_protected(c->replicas,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
|
||||
rcu_assign_pointer(c->replicas, new_r);
|
||||
rcu_assign_pointer(c->replicas_gc, NULL);
|
||||
kfree_rcu(old_r, rcu);
|
||||
|
||||
bch2_write_super(c);
|
||||
err:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
|
||||
{
|
||||
struct bch_replicas_cpu *dst, *src;
|
||||
struct bch_replicas_cpu_entry *e;
|
||||
|
||||
lockdep_assert_held(&c->replicas_gc_lock);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
BUG_ON(c->replicas_gc);
|
||||
|
||||
src = rcu_dereference_protected(c->replicas,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
|
||||
dst = kzalloc(sizeof(struct bch_replicas_cpu) +
|
||||
src->nr * src->entry_size, GFP_NOIO);
|
||||
if (!dst) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dst->nr = 0;
|
||||
dst->entry_size = src->entry_size;
|
||||
|
||||
for_each_cpu_replicas_entry(src, e)
|
||||
if (!((1 << e->data_type) & typemask))
|
||||
memcpy(cpu_replicas_entry(dst, dst->nr++),
|
||||
e, dst->entry_size);
|
||||
|
||||
bch2_cpu_replicas_sort(dst);
|
||||
|
||||
rcu_assign_pointer(c->replicas_gc, dst);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Replicas tracking - superblock: */
|
||||
|
||||
static void bch2_sb_replicas_nr_entries(struct bch_sb_field_replicas *r,
|
||||
unsigned *nr,
|
||||
unsigned *bytes,
|
||||
@ -914,10 +1175,7 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
|
||||
}
|
||||
}
|
||||
|
||||
eytzinger0_sort(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
cpu_r->entry_size,
|
||||
memcmp, NULL);
|
||||
bch2_cpu_replicas_sort(cpu_r);
|
||||
return cpu_r;
|
||||
}
|
||||
|
||||
@ -926,14 +1184,12 @@ static int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
|
||||
struct bch_sb_field_replicas *sb_r;
|
||||
struct bch_replicas_cpu *cpu_r, *old_r;
|
||||
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
sb_r = bch2_sb_get_replicas(c->disk_sb);
|
||||
cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_r);
|
||||
if (!cpu_r)
|
||||
return -ENOMEM;
|
||||
|
||||
old_r = c->replicas;
|
||||
old_r = rcu_dereference_check(c->replicas, lockdep_is_held(&c->sb_lock));
|
||||
rcu_assign_pointer(c->replicas, cpu_r);
|
||||
if (old_r)
|
||||
kfree_rcu(old_r, rcu);
|
||||
@ -941,192 +1197,133 @@ static int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bkey_to_replicas(struct bkey_s_c_extent e,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_replicas_cpu_entry *r,
|
||||
unsigned *max_dev)
|
||||
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
|
||||
struct bch_replicas_cpu *r)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct bch_sb_field_replicas *sb_r;
|
||||
struct bch_replicas_entry *sb_e;
|
||||
struct bch_replicas_cpu_entry *e;
|
||||
size_t i, bytes;
|
||||
|
||||
BUG_ON(!data_type ||
|
||||
data_type == BCH_DATA_SB ||
|
||||
data_type >= BCH_DATA_NR);
|
||||
bytes = sizeof(struct bch_sb_field_replicas);
|
||||
|
||||
memset(r, 0, sizeof(*r));
|
||||
r->data_type = data_type;
|
||||
for_each_cpu_replicas_entry(r, e) {
|
||||
bytes += sizeof(struct bch_replicas_entry);
|
||||
for (i = 0; i < r->entry_size - 1; i++)
|
||||
bytes += hweight8(e->devs[i]);
|
||||
}
|
||||
|
||||
*max_dev = 0;
|
||||
sb_r = bch2_fs_sb_resize_replicas(c,
|
||||
DIV_ROUND_UP(sizeof(*sb_r) + bytes, sizeof(u64)));
|
||||
if (!sb_r)
|
||||
return -ENOSPC;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (!ptr->cached) {
|
||||
*max_dev = max_t(unsigned, *max_dev, ptr->dev);
|
||||
replicas_set_dev(r, ptr->dev);
|
||||
}
|
||||
}
|
||||
memset(&sb_r->entries, 0,
|
||||
vstruct_end(&sb_r->field) -
|
||||
(void *) &sb_r->entries);
|
||||
|
||||
/*
|
||||
* for when gc of replica information is in progress:
|
||||
*/
|
||||
static int bch2_update_gc_replicas(struct bch_fs *c,
|
||||
struct bch_replicas_cpu *gc_r,
|
||||
struct bkey_s_c_extent e,
|
||||
enum bch_data_type data_type)
|
||||
{
|
||||
struct bch_replicas_cpu_entry new_e;
|
||||
struct bch_replicas_cpu *new;
|
||||
unsigned i, nr, entry_size, max_dev;
|
||||
sb_e = sb_r->entries;
|
||||
for_each_cpu_replicas_entry(r, e) {
|
||||
sb_e->data_type = e->data_type;
|
||||
|
||||
bkey_to_replicas(e, data_type, &new_e, &max_dev);
|
||||
for (i = 0; i < replicas_dev_slots(r); i++)
|
||||
if (replicas_test_dev(e, i))
|
||||
sb_e->devs[sb_e->nr++] = i;
|
||||
|
||||
entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
|
||||
DIV_ROUND_UP(max_dev + 1, 8);
|
||||
entry_size = max(entry_size, gc_r->entry_size);
|
||||
nr = gc_r->nr + 1;
|
||||
sb_e = replicas_entry_next(sb_e);
|
||||
|
||||
new = kzalloc(sizeof(struct bch_replicas_cpu) +
|
||||
nr * entry_size, GFP_NOIO);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
BUG_ON((void *) sb_e > vstruct_end(&sb_r->field));
|
||||
}
|
||||
|
||||
new->nr = nr;
|
||||
new->entry_size = entry_size;
|
||||
|
||||
for (i = 0; i < gc_r->nr; i++)
|
||||
memcpy(cpu_replicas_entry(new, i),
|
||||
cpu_replicas_entry(gc_r, i),
|
||||
gc_r->entry_size);
|
||||
|
||||
memcpy(cpu_replicas_entry(new, nr - 1),
|
||||
&new_e,
|
||||
new->entry_size);
|
||||
|
||||
eytzinger0_sort(new->entries,
|
||||
new->nr,
|
||||
new->entry_size,
|
||||
memcmp, NULL);
|
||||
|
||||
rcu_assign_pointer(c->replicas_gc, new);
|
||||
kfree_rcu(gc_r, rcu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool replicas_has_extent(struct bch_replicas_cpu *r,
|
||||
struct bkey_s_c_extent e,
|
||||
enum bch_data_type data_type)
|
||||
static const char *bch2_sb_validate_replicas(struct bch_sb *sb)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
unsigned max_dev;
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_sb_field_replicas *sb_r;
|
||||
struct bch_replicas_cpu *cpu_r = NULL;
|
||||
struct bch_replicas_entry *e;
|
||||
const char *err;
|
||||
unsigned i;
|
||||
|
||||
bkey_to_replicas(e, data_type, &search, &max_dev);
|
||||
mi = bch2_sb_get_members(sb);
|
||||
sb_r = bch2_sb_get_replicas(sb);
|
||||
if (!sb_r)
|
||||
return NULL;
|
||||
|
||||
return max_dev < replicas_dev_slots(r) &&
|
||||
eytzinger0_find(r->entries, r->nr,
|
||||
r->entry_size,
|
||||
memcmp, &search) < r->nr;
|
||||
for_each_replicas_entry(sb_r, e) {
|
||||
err = "invalid replicas entry: invalid data type";
|
||||
if (e->data_type >= BCH_DATA_NR)
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: no devices";
|
||||
if (!e->nr)
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: too many devices";
|
||||
if (e->nr >= BCH_REPLICAS_MAX)
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: invalid device";
|
||||
for (i = 0; i < e->nr; i++)
|
||||
if (!bch2_dev_exists(sb, mi, e->devs[i]))
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = "cannot allocate memory";
|
||||
cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_r);
|
||||
if (!cpu_r)
|
||||
goto err;
|
||||
|
||||
sort_cmp_size(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
cpu_r->entry_size,
|
||||
memcmp, NULL);
|
||||
|
||||
for (i = 0; i + 1 < cpu_r->nr; i++) {
|
||||
struct bch_replicas_cpu_entry *l =
|
||||
cpu_replicas_entry(cpu_r, i);
|
||||
struct bch_replicas_cpu_entry *r =
|
||||
cpu_replicas_entry(cpu_r, i + 1);
|
||||
|
||||
BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
|
||||
|
||||
err = "duplicate replicas entry";
|
||||
if (!memcmp(l, r, cpu_r->entry_size))
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = NULL;
|
||||
err:
|
||||
kfree(cpu_r);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Query replicas: */
|
||||
|
||||
bool bch2_sb_has_replicas(struct bch_fs *c, struct bkey_s_c_extent e,
|
||||
enum bch_data_type data_type)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
unsigned max_dev;
|
||||
bool ret;
|
||||
|
||||
if (!bkey_to_replicas(e, data_type, &search, &max_dev))
|
||||
return true;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = replicas_has_extent(rcu_dereference(c->replicas),
|
||||
e, data_type);
|
||||
ret = replicas_has_entry(rcu_dereference(c->replicas),
|
||||
search, max_dev);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
noinline
|
||||
static int bch2_check_mark_super_slowpath(struct bch_fs *c,
|
||||
struct bkey_s_c_extent e,
|
||||
enum bch_data_type data_type)
|
||||
{
|
||||
struct bch_replicas_cpu *gc_r;
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct bch_sb_field_replicas *sb_r;
|
||||
struct bch_replicas_entry *new_entry;
|
||||
unsigned new_entry_bytes, new_u64s, nr, bytes, max_dev;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
gc_r = rcu_dereference_protected(c->replicas_gc,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
if (gc_r &&
|
||||
!replicas_has_extent(gc_r, e, data_type)) {
|
||||
ret = bch2_update_gc_replicas(c, gc_r, e, data_type);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* recheck, might have raced */
|
||||
if (bch2_sb_has_replicas(c, e, data_type)) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
new_entry_bytes = sizeof(struct bch_replicas_entry) +
|
||||
bch2_extent_nr_dirty_ptrs(e.s_c);
|
||||
|
||||
sb_r = bch2_sb_get_replicas(c->disk_sb);
|
||||
|
||||
bch2_sb_replicas_nr_entries(sb_r, &nr, &bytes, &max_dev);
|
||||
|
||||
new_u64s = DIV_ROUND_UP(bytes + new_entry_bytes, sizeof(u64));
|
||||
|
||||
sb_r = bch2_fs_sb_resize_replicas(c,
|
||||
DIV_ROUND_UP(sizeof(*sb_r) + bytes + new_entry_bytes,
|
||||
sizeof(u64)));
|
||||
if (!sb_r) {
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
}
|
||||
|
||||
new_entry = (void *) sb_r + bytes;
|
||||
new_entry->data_type = data_type;
|
||||
new_entry->nr = 0;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (!ptr->cached)
|
||||
new_entry->devs[new_entry->nr++] = ptr->dev;
|
||||
|
||||
ret = bch2_sb_replicas_to_cpu_replicas(c);
|
||||
if (ret) {
|
||||
memset(new_entry, 0,
|
||||
vstruct_end(&sb_r->field) - (void *) new_entry);
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_write_super(c);
|
||||
err:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_check_mark_super(struct bch_fs *c, struct bkey_s_c_extent e,
|
||||
enum bch_data_type data_type)
|
||||
{
|
||||
struct bch_replicas_cpu *gc_r;
|
||||
bool marked;
|
||||
|
||||
rcu_read_lock();
|
||||
marked = replicas_has_extent(rcu_dereference(c->replicas),
|
||||
e, data_type) &&
|
||||
(!(gc_r = rcu_dereference(c->replicas_gc)) ||
|
||||
replicas_has_extent(gc_r, e, data_type));
|
||||
rcu_read_unlock();
|
||||
|
||||
if (marked)
|
||||
return 0;
|
||||
|
||||
return bch2_check_mark_super_slowpath(c, e, data_type);
|
||||
}
|
||||
|
||||
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
|
||||
struct bch_devs_mask online_devs)
|
||||
struct bch_devs_mask online_devs)
|
||||
{
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_replicas_cpu_entry *e;
|
||||
struct bch_replicas_cpu *r;
|
||||
unsigned i, dev, dev_slots, nr_online, nr_offline;
|
||||
@ -1137,14 +1334,15 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
|
||||
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
|
||||
ret.replicas[i].nr_online = UINT_MAX;
|
||||
|
||||
mi = bch2_sb_get_members(c->disk_sb);
|
||||
rcu_read_lock();
|
||||
|
||||
r = rcu_dereference(c->replicas);
|
||||
dev_slots = min_t(unsigned, replicas_dev_slots(r), c->sb.nr_devices);
|
||||
dev_slots = replicas_dev_slots(r);
|
||||
|
||||
for (i = 0; i < r->nr; i++) {
|
||||
e = cpu_replicas_entry(r, i);
|
||||
|
||||
BUG_ON(e->data_type >= ARRAY_SIZE(ret.replicas));
|
||||
for_each_cpu_replicas_entry(r, e) {
|
||||
if (e->data_type >= ARRAY_SIZE(ret.replicas))
|
||||
panic("e %p data_type %u\n", e, e->data_type);
|
||||
|
||||
nr_online = nr_offline = 0;
|
||||
|
||||
@ -1152,6 +1350,8 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
|
||||
if (!replicas_test_dev(e, dev))
|
||||
continue;
|
||||
|
||||
BUG_ON(!bch2_dev_exists(c->disk_sb, mi, dev));
|
||||
|
||||
if (test_bit(dev, online_devs.d))
|
||||
nr_online++;
|
||||
else
|
||||
@ -1216,7 +1416,7 @@ unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct bch_replicas_cpu_entry *e;
|
||||
struct bch_replicas_cpu *r;
|
||||
unsigned i, ret = 0;
|
||||
unsigned ret = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
r = rcu_dereference(c->replicas);
|
||||
@ -1224,191 +1424,13 @@ unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
|
||||
if (ca->dev_idx >= replicas_dev_slots(r))
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < r->nr; i++) {
|
||||
e = cpu_replicas_entry(r, i);
|
||||
|
||||
for_each_cpu_replicas_entry(r, e)
|
||||
if (replicas_test_dev(e, ca->dev_idx)) {
|
||||
ret |= 1 << e->data_type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *bch2_sb_validate_replicas(struct bch_sb *sb)
|
||||
{
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_sb_field_replicas *sb_r;
|
||||
struct bch_replicas_cpu *cpu_r = NULL;
|
||||
struct bch_replicas_entry *e;
|
||||
const char *err;
|
||||
unsigned i;
|
||||
|
||||
mi = bch2_sb_get_members(sb);
|
||||
sb_r = bch2_sb_get_replicas(sb);
|
||||
if (!sb_r)
|
||||
return NULL;
|
||||
|
||||
for_each_replicas_entry(sb_r, e) {
|
||||
err = "invalid replicas entry: invalid data type";
|
||||
if (e->data_type >= BCH_DATA_NR)
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: too many devices";
|
||||
if (e->nr >= BCH_REPLICAS_MAX)
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: invalid device";
|
||||
for (i = 0; i < e->nr; i++)
|
||||
if (!bch2_dev_exists(sb, mi, e->devs[i]))
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = "cannot allocate memory";
|
||||
cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_r);
|
||||
if (!cpu_r)
|
||||
goto err;
|
||||
|
||||
sort_cmp_size(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
cpu_r->entry_size,
|
||||
memcmp, NULL);
|
||||
|
||||
for (i = 0; i + 1 < cpu_r->nr; i++) {
|
||||
struct bch_replicas_cpu_entry *l =
|
||||
cpu_replicas_entry(cpu_r, i);
|
||||
struct bch_replicas_cpu_entry *r =
|
||||
cpu_replicas_entry(cpu_r, i + 1);
|
||||
|
||||
BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
|
||||
|
||||
err = "duplicate replicas entry";
|
||||
if (!memcmp(l, r, cpu_r->entry_size))
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = NULL;
|
||||
err:
|
||||
kfree(cpu_r);
|
||||
return err;
|
||||
}
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *c, int err)
|
||||
{
|
||||
struct bch_sb_field_replicas *sb_r;
|
||||
struct bch_replicas_cpu *r, *old_r;
|
||||
struct bch_replicas_entry *dst_e;
|
||||
size_t i, j, bytes, dev_slots;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&c->replicas_gc_lock);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
r = rcu_dereference_protected(c->replicas_gc,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
|
||||
if (err) {
|
||||
rcu_assign_pointer(c->replicas_gc, NULL);
|
||||
kfree_rcu(r, rcu);
|
||||
goto err;
|
||||
}
|
||||
|
||||
dev_slots = replicas_dev_slots(r);
|
||||
|
||||
bytes = sizeof(struct bch_sb_field_replicas);
|
||||
|
||||
for (i = 0; i < r->nr; i++) {
|
||||
struct bch_replicas_cpu_entry *e =
|
||||
cpu_replicas_entry(r, i);
|
||||
|
||||
bytes += sizeof(struct bch_replicas_entry);
|
||||
for (j = 0; j < r->entry_size - 1; j++)
|
||||
bytes += hweight8(e->devs[j]);
|
||||
}
|
||||
|
||||
sb_r = bch2_fs_sb_resize_replicas(c,
|
||||
DIV_ROUND_UP(sizeof(*sb_r) + bytes, sizeof(u64)));
|
||||
if (!sb_r) {
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
}
|
||||
|
||||
memset(&sb_r->entries, 0,
|
||||
vstruct_end(&sb_r->field) -
|
||||
(void *) &sb_r->entries);
|
||||
|
||||
dst_e = sb_r->entries;
|
||||
for (i = 0; i < r->nr; i++) {
|
||||
struct bch_replicas_cpu_entry *src_e =
|
||||
cpu_replicas_entry(r, i);
|
||||
|
||||
dst_e->data_type = src_e->data_type;
|
||||
|
||||
for (j = 0; j < dev_slots; j++)
|
||||
if (replicas_test_dev(src_e, j))
|
||||
dst_e->devs[dst_e->nr++] = j;
|
||||
|
||||
dst_e = replicas_entry_next(dst_e);
|
||||
}
|
||||
|
||||
old_r = rcu_dereference_protected(c->replicas,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
rcu_assign_pointer(c->replicas, r);
|
||||
rcu_assign_pointer(c->replicas_gc, NULL);
|
||||
kfree_rcu(old_r, rcu);
|
||||
|
||||
bch2_write_super(c);
|
||||
err:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
|
||||
{
|
||||
struct bch_replicas_cpu *r, *src;
|
||||
unsigned i;
|
||||
|
||||
lockdep_assert_held(&c->replicas_gc_lock);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
BUG_ON(c->replicas_gc);
|
||||
|
||||
src = rcu_dereference_protected(c->replicas,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
|
||||
r = kzalloc(sizeof(struct bch_replicas_cpu) +
|
||||
src->nr * src->entry_size, GFP_NOIO);
|
||||
if (!r) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
r->entry_size = src->entry_size;
|
||||
r->nr = 0;
|
||||
|
||||
for (i = 0; i < src->nr; i++) {
|
||||
struct bch_replicas_cpu_entry *dst_e =
|
||||
cpu_replicas_entry(r, r->nr);
|
||||
struct bch_replicas_cpu_entry *src_e =
|
||||
cpu_replicas_entry(src, i);
|
||||
|
||||
if (!(src_e->data_type & typemask)) {
|
||||
memcpy(dst_e, src_e, r->entry_size);
|
||||
r->nr++;
|
||||
}
|
||||
}
|
||||
|
||||
eytzinger0_sort(r->entries,
|
||||
r->nr,
|
||||
r->entry_size,
|
||||
memcmp, NULL);
|
||||
|
||||
rcu_assign_pointer(c->replicas_gc, r);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -125,23 +125,12 @@ void bch2_write_super(struct bch_fs *);
|
||||
|
||||
/* replicas: */
|
||||
|
||||
/* iterate over bch_sb_field_replicas: */
|
||||
|
||||
static inline struct bch_replicas_entry *
|
||||
replicas_entry_next(struct bch_replicas_entry *i)
|
||||
{
|
||||
return (void *) i + offsetof(struct bch_replicas_entry, devs) + i->nr;
|
||||
}
|
||||
|
||||
#define for_each_replicas_entry(_r, _i) \
|
||||
for (_i = (_r)->entries; \
|
||||
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
|
||||
(_i) = replicas_entry_next(_i))
|
||||
|
||||
bool bch2_sb_has_replicas(struct bch_fs *, struct bkey_s_c_extent,
|
||||
enum bch_data_type);
|
||||
int bch2_check_mark_super(struct bch_fs *, struct bkey_s_c_extent,
|
||||
enum bch_data_type);
|
||||
int bch2_check_mark_super_devlist(struct bch_fs *, struct bch_devs_list *,
|
||||
enum bch_data_type);
|
||||
|
||||
struct replicas_status {
|
||||
struct {
|
||||
@ -161,4 +150,17 @@ unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
|
||||
int bch2_replicas_gc_end(struct bch_fs *, int);
|
||||
int bch2_replicas_gc_start(struct bch_fs *, unsigned);
|
||||
|
||||
/* iterate over superblock replicas - used by userspace tools: */
|
||||
|
||||
static inline struct bch_replicas_entry *
|
||||
replicas_entry_next(struct bch_replicas_entry *i)
|
||||
{
|
||||
return (void *) i + offsetof(struct bch_replicas_entry, devs) + i->nr;
|
||||
}
|
||||
|
||||
#define for_each_replicas_entry(_r, _i) \
|
||||
for (_i = (_r)->entries; \
|
||||
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
|
||||
(_i) = replicas_entry_next(_i))
|
||||
|
||||
#endif /* _BCACHEFS_SUPER_IO_H */
|
||||
|
@ -140,8 +140,9 @@ struct bch_fs *bch2_uuid_to_fs(uuid_le uuid)
|
||||
return c;
|
||||
}
|
||||
|
||||
int bch2_congested(struct bch_fs *c, int bdi_bits)
|
||||
int bch2_congested(void *data, int bdi_bits)
|
||||
{
|
||||
struct bch_fs *c = data;
|
||||
struct backing_dev_info *bdi;
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
@ -178,13 +179,6 @@ int bch2_congested(struct bch_fs *c, int bdi_bits)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_congested_fn(void *data, int bdi_bits)
|
||||
{
|
||||
struct bch_fs *c = data;
|
||||
|
||||
return bch2_congested(c, bdi_bits);
|
||||
}
|
||||
|
||||
/* Filesystem RO/RW: */
|
||||
|
||||
/*
|
||||
@ -218,7 +212,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
* Flush journal before stopping allocators, because flushing journal
|
||||
* blacklist entries involves allocating new btree nodes:
|
||||
*/
|
||||
bch2_journal_flush_pins(&c->journal, U64_MAX);
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
|
||||
if (!bch2_journal_error(&c->journal))
|
||||
bch2_btree_verify_flushed(c);
|
||||
@ -379,8 +373,6 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
bch2_io_clock_exit(&c->io_clock[WRITE]);
|
||||
bch2_io_clock_exit(&c->io_clock[READ]);
|
||||
bch2_fs_compress_exit(c);
|
||||
if (c->bdi.bdi_list.next)
|
||||
bdi_destroy(&c->bdi);
|
||||
lg_lock_free(&c->usage_lock);
|
||||
free_percpu(c->usage_percpu);
|
||||
mempool_exit(&c->btree_bounce_pool);
|
||||
@ -393,7 +385,7 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
mempool_exit(&c->btree_reserve_pool);
|
||||
mempool_exit(&c->fill_iter);
|
||||
percpu_ref_exit(&c->writes);
|
||||
kfree(c->replicas);
|
||||
kfree(rcu_dereference_protected(c->replicas, 1));
|
||||
|
||||
if (c->copygc_wq)
|
||||
destroy_workqueue(c->copygc_wq);
|
||||
@ -414,7 +406,7 @@ static void bch2_fs_exit(struct bch_fs *c)
|
||||
|
||||
for (i = 0; i < c->sb.nr_devices; i++)
|
||||
if (c->devs[i])
|
||||
bch2_dev_free(c->devs[i]);
|
||||
bch2_dev_free(rcu_dereference_protected(c->devs[i], 1));
|
||||
|
||||
closure_debug_destroy(&c->cl);
|
||||
kobject_put(&c->kobj);
|
||||
@ -576,10 +568,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
sizeof(struct btree_update)) ||
|
||||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
|
||||
bioset_init(&c->btree_read_bio, 1,
|
||||
offsetof(struct btree_read_bio, bio)) ||
|
||||
bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio)) ||
|
||||
bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio)) ||
|
||||
bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio)) ||
|
||||
offsetof(struct btree_read_bio, bio),
|
||||
BIOSET_NEED_BVECS) ||
|
||||
bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
|
||||
BIOSET_NEED_BVECS) ||
|
||||
bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
|
||||
BIOSET_NEED_BVECS) ||
|
||||
bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
|
||||
BIOSET_NEED_BVECS) ||
|
||||
mempool_init_page_pool(&c->bio_bounce_pages,
|
||||
max_t(unsigned,
|
||||
c->opts.btree_node_size,
|
||||
@ -588,7 +584,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
!(c->usage_percpu = alloc_percpu(struct bch_fs_usage)) ||
|
||||
lg_lock_init(&c->usage_lock) ||
|
||||
mempool_init_vp_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) ||
|
||||
bdi_setup_and_register(&c->bdi, "bcachefs") ||
|
||||
bch2_io_clock_init(&c->io_clock[READ]) ||
|
||||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
|
||||
bch2_fs_journal_init(&c->journal) ||
|
||||
@ -599,10 +594,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
bch2_fs_fsio_init(c))
|
||||
goto err;
|
||||
|
||||
c->bdi.ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
|
||||
c->bdi.congested_fn = bch2_congested_fn;
|
||||
c->bdi.congested_data = c;
|
||||
|
||||
mi = bch2_sb_get_members(c->disk_sb);
|
||||
for (i = 0; i < c->sb.nr_devices; i++)
|
||||
if (bch2_dev_exists(c->disk_sb, mi, i) &&
|
||||
@ -729,8 +720,12 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
continue;
|
||||
|
||||
err = "error reading btree root";
|
||||
if (bch2_btree_root_read(c, i, k, level))
|
||||
goto err;
|
||||
if (bch2_btree_root_read(c, i, k, level)) {
|
||||
if (i != BTREE_ID_ALLOC)
|
||||
goto err;
|
||||
|
||||
mustfix_fsck_err(c, "error reading btree root");
|
||||
}
|
||||
}
|
||||
|
||||
err = "error reading allocation information";
|
||||
@ -830,7 +825,7 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
closure_sync(&cl);
|
||||
|
||||
bch2_inode_init(c, &inode, 0, 0,
|
||||
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
|
||||
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
|
||||
inode.bi_inum = BCACHEFS_ROOT_INO;
|
||||
|
||||
bch2_inode_pack(&packed_inode, &inode);
|
||||
@ -877,6 +872,7 @@ out:
|
||||
bch2_journal_entries_free(&journal);
|
||||
return err;
|
||||
err:
|
||||
fsck_err:
|
||||
closure_sync(&cl);
|
||||
|
||||
switch (ret) {
|
||||
@ -995,24 +991,20 @@ static void bch2_dev_free(struct bch_dev *ca)
|
||||
kobject_put(&ca->kobj);
|
||||
}
|
||||
|
||||
static void bch2_dev_io_ref_release(struct percpu_ref *ref)
|
||||
{
|
||||
struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref);
|
||||
|
||||
complete(&ca->offline_complete);
|
||||
}
|
||||
|
||||
static void __bch2_dev_offline(struct bch_dev *ca)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
if (percpu_ref_is_zero(&ca->io_ref))
|
||||
return;
|
||||
|
||||
__bch2_dev_read_only(c, ca);
|
||||
|
||||
reinit_completion(&ca->offline_complete);
|
||||
reinit_completion(&ca->io_ref_completion);
|
||||
percpu_ref_kill(&ca->io_ref);
|
||||
wait_for_completion(&ca->offline_complete);
|
||||
wait_for_completion(&ca->io_ref_completion);
|
||||
|
||||
if (ca->kobj.state_in_sysfs) {
|
||||
struct kobject *block =
|
||||
@ -1026,27 +1018,18 @@ static void __bch2_dev_offline(struct bch_dev *ca)
|
||||
bch2_dev_journal_exit(ca);
|
||||
}
|
||||
|
||||
static void bch2_dev_ref_release(struct percpu_ref *ref)
|
||||
static void bch2_dev_ref_complete(struct percpu_ref *ref)
|
||||
{
|
||||
struct bch_dev *ca = container_of(ref, struct bch_dev, ref);
|
||||
|
||||
complete(&ca->stop_complete);
|
||||
complete(&ca->ref_completion);
|
||||
}
|
||||
|
||||
static void bch2_dev_stop(struct bch_dev *ca)
|
||||
static void bch2_dev_io_ref_complete(struct percpu_ref *ref)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref);
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
BUG_ON(rcu_access_pointer(c->devs[ca->dev_idx]) != ca);
|
||||
rcu_assign_pointer(c->devs[ca->dev_idx], NULL);
|
||||
|
||||
synchronize_rcu();
|
||||
|
||||
reinit_completion(&ca->stop_complete);
|
||||
percpu_ref_kill(&ca->ref);
|
||||
wait_for_completion(&ca->stop_complete);
|
||||
complete(&ca->io_ref_completion);
|
||||
}
|
||||
|
||||
static int bch2_dev_sysfs_online(struct bch_dev *ca)
|
||||
@ -1095,8 +1078,8 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
return -ENOMEM;
|
||||
|
||||
kobject_init(&ca->kobj, &bch2_dev_ktype);
|
||||
init_completion(&ca->stop_complete);
|
||||
init_completion(&ca->offline_complete);
|
||||
init_completion(&ca->ref_completion);
|
||||
init_completion(&ca->io_ref_completion);
|
||||
|
||||
ca->dev_idx = dev_idx;
|
||||
__set_bit(ca->dev_idx, ca->self.d);
|
||||
@ -1132,9 +1115,9 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
DIV_ROUND_UP(BTREE_NODE_RESERVE,
|
||||
ca->mi.bucket_size / c->opts.btree_node_size);
|
||||
|
||||
if (percpu_ref_init(&ca->ref, bch2_dev_ref_release,
|
||||
if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete,
|
||||
0, GFP_KERNEL) ||
|
||||
percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_release,
|
||||
percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
|
||||
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
|
||||
!init_fifo(&ca->free[RESERVE_BTREE], btree_node_reserve_buckets,
|
||||
GFP_KERNEL) ||
|
||||
@ -1155,7 +1138,7 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
GFP_KERNEL|__GFP_ZERO)) ||
|
||||
!(ca->usage_percpu = alloc_percpu(struct bch_dev_usage)) ||
|
||||
bioset_init(&ca->replica_set, 4,
|
||||
offsetof(struct bch_write_bio, bio)) ||
|
||||
offsetof(struct bch_write_bio, bio), 0) ||
|
||||
!(ca->io_done = alloc_percpu(*ca->io_done)))
|
||||
goto err;
|
||||
|
||||
@ -1180,8 +1163,6 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
struct bch_dev *ca;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
if (le64_to_cpu(sb->sb->seq) >
|
||||
le64_to_cpu(c->disk_sb->seq))
|
||||
bch2_sb_to_fs(c, sb->sb);
|
||||
@ -1189,13 +1170,15 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
|
||||
!c->devs[sb->sb->dev_idx]);
|
||||
|
||||
ca = c->devs[sb->sb->dev_idx];
|
||||
ca = bch_dev_locked(c, sb->sb->dev_idx);
|
||||
if (ca->disk_sb.bdev) {
|
||||
bch_err(c, "already have device online in slot %u",
|
||||
sb->sb->dev_idx);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
BUG_ON(!percpu_ref_is_zero(&ca->io_ref));
|
||||
|
||||
ret = bch2_dev_journal_init(ca, sb->sb);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -1222,7 +1205,7 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
if (bch2_dev_sysfs_online(ca))
|
||||
pr_warn("error creating sysfs objects");
|
||||
|
||||
bch2_mark_dev_superblock(c, ca, 0);
|
||||
bch2_mark_dev_superblock(c, ca, BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_RW)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
@ -1293,6 +1276,7 @@ static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
{
|
||||
struct replicas_status s;
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_dev *ca;
|
||||
unsigned i, flags = c->opts.degraded
|
||||
? BCH_FORCE_IF_DEGRADED
|
||||
: 0;
|
||||
@ -1301,14 +1285,19 @@ static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch2_sb_get_members(c->disk_sb);
|
||||
|
||||
for (i = 0; i < c->disk_sb->nr_devices; i++)
|
||||
if (bch2_dev_exists(c->disk_sb, mi, i) &&
|
||||
!bch2_dev_is_online(c->devs[i]) &&
|
||||
(c->devs[i]->mi.state == BCH_MEMBER_STATE_RW ||
|
||||
c->devs[i]->mi.state == BCH_MEMBER_STATE_RO)) {
|
||||
for (i = 0; i < c->disk_sb->nr_devices; i++) {
|
||||
if (!bch2_dev_exists(c->disk_sb, mi, i))
|
||||
continue;
|
||||
|
||||
ca = bch_dev_locked(c, i);
|
||||
|
||||
if (!bch2_dev_is_online(ca) &&
|
||||
(ca->mi.state == BCH_MEMBER_STATE_RW ||
|
||||
ca->mi.state == BCH_MEMBER_STATE_RO)) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
@ -1419,22 +1408,59 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
*
|
||||
* flag_data_bad() does not check btree pointers
|
||||
*/
|
||||
ret = bch2_flag_data_bad(ca);
|
||||
ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
|
||||
if (ret) {
|
||||
bch_err(ca, "Remove failed");
|
||||
bch_err(ca, "Remove failed: error %i dropping data", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_journal_flush_device(&c->journal, ca->dev_idx);
|
||||
if (ret) {
|
||||
bch_err(ca, "Remove failed: error %i flushing journal", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
data = bch2_dev_has_data(c, ca);
|
||||
if (data) {
|
||||
bch_err(ca, "Remove failed, still has data (%x)", data);
|
||||
char data_has_str[100];
|
||||
bch2_scnprint_flag_list(data_has_str,
|
||||
sizeof(data_has_str),
|
||||
bch2_data_types,
|
||||
data);
|
||||
bch_err(ca, "Remove failed, still has data (%s)", data_has_str);
|
||||
ret = -EBUSY;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_journal_meta(&c->journal);
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_ALLOC,
|
||||
POS(ca->dev_idx, 0),
|
||||
POS(ca->dev_idx + 1, 0),
|
||||
ZERO_VERSION,
|
||||
NULL, NULL, NULL);
|
||||
if (ret) {
|
||||
bch_err(ca, "Remove failed, error deleting alloc info");
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* must flush all existing journal entries, they might have
|
||||
* (overwritten) keys that point to the device we're removing:
|
||||
*/
|
||||
ret = bch2_journal_flush_all_pins(&c->journal);
|
||||
if (ret) {
|
||||
bch_err(ca, "Remove failed, journal error");
|
||||
goto err;
|
||||
}
|
||||
|
||||
__bch2_dev_offline(ca);
|
||||
bch2_dev_stop(ca);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
rcu_assign_pointer(c->devs[ca->dev_idx], NULL);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
percpu_ref_kill(&ca->ref);
|
||||
wait_for_completion(&ca->ref_completion);
|
||||
|
||||
bch2_dev_free(ca);
|
||||
|
||||
/*
|
||||
@ -1542,7 +1568,7 @@ have_slot:
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
ca = c->devs[dev_idx];
|
||||
ca = bch_dev_locked(c, dev_idx);
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_RW) {
|
||||
err = "journal alloc failed";
|
||||
if (bch2_dev_journal_alloc(ca))
|
||||
@ -1568,7 +1594,7 @@ err:
|
||||
/* Hot add existing device to running filesystem: */
|
||||
int bch2_dev_online(struct bch_fs *c, const char *path)
|
||||
{
|
||||
struct bch_sb_handle sb = { 0 };
|
||||
struct bch_sb_handle sb = { NULL };
|
||||
struct bch_dev *ca;
|
||||
unsigned dev_idx;
|
||||
const char *err;
|
||||
@ -1593,7 +1619,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
ca = c->devs[dev_idx];
|
||||
ca = bch_dev_locked(c, dev_idx);
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_RW) {
|
||||
err = __bch2_dev_read_write(c, ca);
|
||||
if (err)
|
||||
@ -1619,7 +1645,6 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
__bch2_dev_read_only(c, ca);
|
||||
__bch2_dev_offline(ca);
|
||||
|
||||
mutex_unlock(&c->state_lock);
|
||||
@ -1629,37 +1654,31 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
int bch2_dev_evacuate(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
unsigned data;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_RW) {
|
||||
bch_err(ca, "Cannot migrate data off RW device");
|
||||
mutex_unlock(&c->state_lock);
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
mutex_unlock(&c->state_lock);
|
||||
|
||||
ret = bch2_move_data_off_device(ca);
|
||||
ret = bch2_dev_data_migrate(c, ca, 0);
|
||||
if (ret) {
|
||||
bch_err(ca, "Error migrating data: %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = bch2_move_metadata_off_device(ca);
|
||||
if (ret) {
|
||||
bch_err(ca, "Error migrating metadata: %i", ret);
|
||||
return ret;
|
||||
goto err;
|
||||
}
|
||||
|
||||
data = bch2_dev_has_data(c, ca);
|
||||
if (data) {
|
||||
bch_err(ca, "Migrate error: data still present (%x)", data);
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
mutex_unlock(&c->state_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Filesystem open: */
|
||||
|
@ -59,6 +59,14 @@ static inline void bch2_dev_list_drop_dev(struct bch_devs_list *devs,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void bch2_dev_list_add_dev(struct bch_devs_list *devs,
|
||||
unsigned dev)
|
||||
{
|
||||
BUG_ON(bch2_dev_list_has_dev(*devs, dev));
|
||||
BUG_ON(devs->nr >= BCH_REPLICAS_MAX);
|
||||
devs->devs[devs->nr++] = dev;
|
||||
}
|
||||
|
||||
static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter,
|
||||
struct bch_devs_mask *mask)
|
||||
{
|
||||
@ -131,6 +139,26 @@ static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
|
||||
__for_each_online_member(ca, c, iter, \
|
||||
(1 << BCH_MEMBER_STATE_RW)|(1 << BCH_MEMBER_STATE_RO))
|
||||
|
||||
/*
|
||||
* If a key exists that references a device, the device won't be going away and
|
||||
* we can omit rcu_read_lock():
|
||||
*/
|
||||
static inline struct bch_dev *bch_dev_bkey_exists(const struct bch_fs *c, unsigned idx)
|
||||
{
|
||||
EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
|
||||
|
||||
return rcu_dereference_check(c->devs[idx], 1);
|
||||
}
|
||||
|
||||
static inline struct bch_dev *bch_dev_locked(struct bch_fs *c, unsigned idx)
|
||||
{
|
||||
EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
|
||||
|
||||
return rcu_dereference_protected(c->devs[idx],
|
||||
lockdep_is_held(&c->sb_lock) ||
|
||||
lockdep_is_held(&c->state_lock));
|
||||
}
|
||||
|
||||
/* XXX kill, move to struct bch_fs */
|
||||
static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
|
||||
{
|
||||
@ -146,7 +174,7 @@ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
|
||||
|
||||
struct bch_fs *bch2_bdev_to_fs(struct block_device *);
|
||||
struct bch_fs *bch2_uuid_to_fs(uuid_le);
|
||||
int bch2_congested(struct bch_fs *, int);
|
||||
int bch2_congested(void *, int);
|
||||
|
||||
bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
|
||||
enum bch_member_state, int);
|
||||
|
@ -739,7 +739,7 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
|
||||
c->open_buckets_wait.list.first ? "waiting" : "empty");
|
||||
}
|
||||
|
||||
const char * const bch2_rw[] = {
|
||||
static const char * const bch2_rw[] = {
|
||||
"read",
|
||||
"write",
|
||||
NULL
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include "clock.h"
|
||||
#include "extents.h"
|
||||
#include "io.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "super-io.h"
|
||||
#include "tier.h"
|
||||
@ -28,7 +27,7 @@ static bool tiering_pred(void *arg, struct bkey_s_c_extent e)
|
||||
return false;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (c->devs[ptr->dev]->mi.tier >= tier->idx)
|
||||
if (bch_dev_bkey_exists(c, ptr->dev)->mi.tier >= tier->idx)
|
||||
replicas++;
|
||||
|
||||
return replicas < c->opts.data_replicas;
|
||||
|
@ -34,8 +34,12 @@ struct closure;
|
||||
#define atomic64_sub_bug(i, v) BUG_ON(atomic64_sub_return(i, v) < 0)
|
||||
#define atomic64_add_bug(i, v) BUG_ON(atomic64_add_return(i, v) < 0)
|
||||
|
||||
#define memcpy(_dst, _src, _len) \
|
||||
#define memcpy(dst, src, len) \
|
||||
({ \
|
||||
void *_dst = (dst); \
|
||||
const void *_src = (src); \
|
||||
size_t _len = (len); \
|
||||
\
|
||||
BUG_ON(!((void *) (_dst) >= (void *) (_src) + (_len) || \
|
||||
(void *) (_dst) + (_len) <= (void *) (_src))); \
|
||||
memcpy(_dst, _src, _len); \
|
||||
|
@ -9,10 +9,10 @@
|
||||
*/
|
||||
#define __vstruct_u64s(_s) \
|
||||
({ \
|
||||
( type_is((_s)->u64s, u64) ? le64_to_cpu((_s)->u64s) \
|
||||
: type_is((_s)->u64s, u32) ? le32_to_cpu((_s)->u64s) \
|
||||
: type_is((_s)->u64s, u16) ? le16_to_cpu((_s)->u64s) \
|
||||
: ((_s)->u64s)); \
|
||||
( type_is((_s)->u64s, u64) ? le64_to_cpu((__force __le64) (_s)->u64s) \
|
||||
: type_is((_s)->u64s, u32) ? le32_to_cpu((__force __le32) (_s)->u64s) \
|
||||
: type_is((_s)->u64s, u16) ? le16_to_cpu((__force __le16) (_s)->u64s) \
|
||||
: ((__force u8) ((_s)->u64s))); \
|
||||
})
|
||||
|
||||
#define __vstruct_bytes(_type, _u64s) \
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "bcachefs.h"
|
||||
#include "bkey_methods.h"
|
||||
#include "btree_update.h"
|
||||
#include "compress.h"
|
||||
#include "extents.h"
|
||||
#include "fs.h"
|
||||
#include "str_hash.h"
|
||||
@ -358,6 +359,129 @@ static const struct xattr_handler bch_xattr_security_handler = {
|
||||
.flags = BCH_XATTR_INDEX_SECURITY,
|
||||
};
|
||||
|
||||
#ifndef NO_BCACHEFS_FS
|
||||
|
||||
static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
|
||||
struct dentry *dentry, struct inode *vinode,
|
||||
const char *name, void *buffer, size_t size)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||
struct bch_opts opts =
|
||||
bch2_inode_opts_to_opts(bch2_inode_opts_get(&inode->ei_inode));
|
||||
const struct bch_option *opt;
|
||||
int ret, id;
|
||||
u64 v;
|
||||
|
||||
id = bch2_opt_lookup(name);
|
||||
if (id < 0 || !bch2_opt_is_inode_opt(id))
|
||||
return -EINVAL;
|
||||
|
||||
opt = bch2_opt_table + id;
|
||||
|
||||
if (!bch2_opt_defined_by_id(&opts, id))
|
||||
return -ENODATA;
|
||||
|
||||
v = bch2_opt_get_by_id(&opts, id);
|
||||
|
||||
if (opt->type == BCH_OPT_STR)
|
||||
ret = snprintf(buffer, size, "%s", opt->choices[v]);
|
||||
else
|
||||
ret = snprintf(buffer, size, "%llu", v);
|
||||
|
||||
return ret <= size || !buffer ? ret : -ERANGE;
|
||||
}
|
||||
|
||||
struct inode_opt_set {
|
||||
int id;
|
||||
u64 v;
|
||||
bool defined;
|
||||
};
|
||||
|
||||
static int inode_opt_set_fn(struct bch_inode_info *inode,
|
||||
struct bch_inode_unpacked *bi,
|
||||
void *p)
|
||||
{
|
||||
struct inode_opt_set *s = p;
|
||||
|
||||
if (s->defined)
|
||||
bch2_inode_opt_set(bi, s->id, s->v);
|
||||
else
|
||||
bch2_inode_opt_clear(bi, s->id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
|
||||
struct dentry *dentry, struct inode *vinode,
|
||||
const char *name, const void *value,
|
||||
size_t size, int flags)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
const struct bch_option *opt;
|
||||
char *buf;
|
||||
struct inode_opt_set s;
|
||||
int ret;
|
||||
|
||||
s.id = bch2_opt_lookup(name);
|
||||
if (s.id < 0 || !bch2_opt_is_inode_opt(s.id))
|
||||
return -EINVAL;
|
||||
|
||||
opt = bch2_opt_table + s.id;
|
||||
|
||||
if (value) {
|
||||
buf = kmalloc(size + 1, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
memcpy(buf, value, size);
|
||||
buf[size] = '\0';
|
||||
|
||||
ret = bch2_opt_parse(opt, buf, &s.v);
|
||||
kfree(buf);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (s.id == Opt_compression) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
ret = bch2_check_set_has_compressed_data(c, s.v);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
s.defined = true;
|
||||
} else {
|
||||
s.defined = false;
|
||||
}
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
ret = __bch2_write_inode(c, inode, inode_opt_set_fn, &s);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct xattr_handler bch_xattr_bcachefs_handler = {
|
||||
.prefix = "bcachefs.",
|
||||
.get = bch2_xattr_bcachefs_get,
|
||||
.set = bch2_xattr_bcachefs_set,
|
||||
};
|
||||
|
||||
#endif /* NO_BCACHEFS_FS */
|
||||
|
||||
const struct xattr_handler *bch2_xattr_handlers[] = {
|
||||
&bch_xattr_user_handler,
|
||||
&posix_acl_access_xattr_handler,
|
||||
&posix_acl_default_xattr_handler,
|
||||
&bch_xattr_trusted_handler,
|
||||
&bch_xattr_security_handler,
|
||||
#ifndef NO_BCACHEFS_FS
|
||||
&bch_xattr_bcachefs_handler,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct xattr_handler *bch_xattr_handler_map[] = {
|
||||
[BCH_XATTR_INDEX_USER] = &bch_xattr_user_handler,
|
||||
[BCH_XATTR_INDEX_POSIX_ACL_ACCESS] =
|
||||
@ -368,15 +492,6 @@ static const struct xattr_handler *bch_xattr_handler_map[] = {
|
||||
[BCH_XATTR_INDEX_SECURITY] = &bch_xattr_security_handler,
|
||||
};
|
||||
|
||||
const struct xattr_handler *bch2_xattr_handlers[] = {
|
||||
&bch_xattr_user_handler,
|
||||
&posix_acl_access_xattr_handler,
|
||||
&posix_acl_default_xattr_handler,
|
||||
&bch_xattr_trusted_handler,
|
||||
&bch_xattr_security_handler,
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type)
|
||||
{
|
||||
return type < ARRAY_SIZE(bch_xattr_handler_map)
|
||||
|
58
linux/bio.c
58
linux/bio.c
@ -19,7 +19,38 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
static const struct {
|
||||
int err;
|
||||
const char *name;
|
||||
} blk_errors[] = {
|
||||
[BLK_STS_OK] = { 0, "" },
|
||||
[BLK_STS_NOTSUPP] = { -EOPNOTSUPP, "operation not supported" },
|
||||
[BLK_STS_TIMEOUT] = { -ETIMEDOUT, "timeout" },
|
||||
[BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" },
|
||||
[BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" },
|
||||
[BLK_STS_TARGET] = { -EREMOTEIO, "critical target" },
|
||||
[BLK_STS_NEXUS] = { -EBADE, "critical nexus" },
|
||||
[BLK_STS_MEDIUM] = { -ENODATA, "critical medium" },
|
||||
[BLK_STS_PROTECTION] = { -EILSEQ, "protection" },
|
||||
[BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
|
||||
[BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
|
||||
|
||||
/* device mapper special case, should not leak out: */
|
||||
[BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
|
||||
|
||||
/* everything else not covered above: */
|
||||
[BLK_STS_IOERR] = { -EIO, "I/O" },
|
||||
};
|
||||
|
||||
int blk_status_to_errno(blk_status_t status)
|
||||
{
|
||||
int idx = (__force int)status;
|
||||
|
||||
if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
|
||||
return -EIO;
|
||||
return blk_errors[idx].err;
|
||||
}
|
||||
|
||||
void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
||||
struct bio *src, struct bvec_iter *src_iter)
|
||||
@ -199,8 +230,8 @@ static struct bio *__bio_chain_endio(struct bio *bio)
|
||||
{
|
||||
struct bio *parent = bio->bi_private;
|
||||
|
||||
if (!parent->bi_error)
|
||||
parent->bi_error = bio->bi_error;
|
||||
if (!parent->bi_status)
|
||||
parent->bi_status = bio->bi_status;
|
||||
bio_put(bio);
|
||||
return parent;
|
||||
}
|
||||
@ -233,27 +264,6 @@ again:
|
||||
bio->bi_end_io(bio);
|
||||
}
|
||||
|
||||
void bio_endio_nodec(struct bio *bio)
|
||||
{
|
||||
goto nodec;
|
||||
|
||||
while (bio) {
|
||||
if (unlikely(!bio_remaining_done(bio)))
|
||||
break;
|
||||
nodec:
|
||||
if (bio->bi_end_io == bio_chain_endio) {
|
||||
struct bio *parent = bio->bi_private;
|
||||
parent->bi_error = bio->bi_error;
|
||||
bio_put(bio);
|
||||
bio = parent;
|
||||
} else {
|
||||
if (bio->bi_end_io)
|
||||
bio->bi_end_io(bio);
|
||||
bio = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void bio_reset(struct bio *bio)
|
||||
{
|
||||
unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
|
||||
|
@ -32,7 +32,7 @@ void generic_make_request(struct bio *bio)
|
||||
ret = fdatasync(bio->bi_bdev->bd_fd);
|
||||
if (ret) {
|
||||
fprintf(stderr, "fsync error: %m\n");
|
||||
bio->bi_error = -EIO;
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
bio_endio(bio);
|
||||
return;
|
||||
}
|
||||
@ -106,7 +106,7 @@ int submit_bio_wait(struct bio *bio)
|
||||
submit_bio(bio);
|
||||
wait_for_completion(&done);
|
||||
|
||||
return bio->bi_error;
|
||||
return blk_status_to_errno(bio->bi_status);
|
||||
}
|
||||
|
||||
int blkdev_issue_discard(struct block_device *bdev,
|
||||
@ -235,10 +235,8 @@ static int aio_completion_thread(void *arg)
|
||||
for (ev = events; ev < events + ret; ev++) {
|
||||
struct bio *bio = (struct bio *) ev->data;
|
||||
|
||||
if (ev->res < 0)
|
||||
bio->bi_error = ev->res;
|
||||
else if (ev->res != bio->bi_iter.bi_size)
|
||||
bio->bi_error = -EIO;
|
||||
if (ev->res != bio->bi_iter.bi_size)
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user