Update bcachefs sources to 62de7539dc bcachefs: Make bkey types globally unique

This commit is contained in:
Kent Overstreet 2018-11-30 12:38:54 -05:00
parent bca8b084ad
commit 0c7db4eca3
67 changed files with 2901 additions and 2920 deletions

View File

@ -1 +1 @@
da7fefde294e3c56359ee498a62a77182a4733cd
62de7539dc2586b4bd7058b138de89f334d0c6bd

View File

@ -160,8 +160,7 @@ static void list_keys(struct bch_fs *c, enum btree_id btree_id,
if (bkey_cmp(k.k->p, end) > 0)
break;
bch2_bkey_val_to_text(&PBUF(buf), c,
bkey_type(0, btree_id), k);
bch2_bkey_val_to_text(&PBUF(buf), c, k);
puts(buf);
}
bch2_btree_iter_unlock(&iter);
@ -202,8 +201,7 @@ static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id,
fputs(buf, stdout);
for_each_btree_node_key_unpack(b, k, &node_iter, &unpacked) {
bch2_bkey_val_to_text(&PBUF(buf), c,
bkey_type(0, btree_id), k);
bch2_bkey_val_to_text(&PBUF(buf), c, k);
putchar('\t');
puts(buf);
}

View File

@ -333,7 +333,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
e->k.p.inode = dst->bi_inum;
e->k.p.offset = logical + sectors;
e->k.size = sectors;
extent_ptr_append(e, (struct bch_extent_ptr) {
bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
.offset = physical,
.dev = 0,
.gen = bucket(ca, b)->mark.gen,
@ -347,8 +347,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
die("error reserving space in new filesystem: %s",
strerror(-ret));
bch2_mark_bkey_replicas(c, BCH_DATA_USER,
extent_i_to_s_c(e).s_c);
bch2_mark_bkey_replicas(c, extent_i_to_s_c(e).s_c);
ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
&res, NULL, 0);

View File

@ -164,7 +164,7 @@ TRACE_EVENT(btree_write,
TP_ARGS(b, bytes, sectors),
TP_STRUCT__entry(
__field(enum bkey_type, type)
__field(enum btree_node_type, type)
__field(unsigned, bytes )
__field(unsigned, sectors )
),

View File

@ -185,7 +185,8 @@ struct bch_sb *bch2_format(struct format_opts opts,
if (bch2_sb_realloc(&sb, 0))
die("insufficient memory");
sb.sb->version = cpu_to_le64(BCH_SB_VERSION_MAX);
sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
sb.sb->version_min = le16_to_cpu(bcachefs_metadata_version_current);
sb.sb->magic = BCACHE_MAGIC;
sb.sb->block_size = cpu_to_le16(opts.block_size);
sb.sb->user_uuid = opts.uuid;

View File

@ -23,9 +23,9 @@ static inline int acl_to_xattr_type(int type)
{
switch (type) {
case ACL_TYPE_ACCESS:
return BCH_XATTR_INDEX_POSIX_ACL_ACCESS;
return KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS;
case ACL_TYPE_DEFAULT:
return BCH_XATTR_INDEX_POSIX_ACL_DEFAULT;
return KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT;
default:
BUG();
}
@ -351,7 +351,7 @@ int bch2_acl_chmod(struct btree_trans *trans,
iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc,
&inode->ei_str_hash, inode->v.i_ino,
&X_SEARCH(BCH_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0),
&X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0),
BTREE_ITER_INTENT);
if (IS_ERR(iter))
return PTR_ERR(iter) != -ENOENT ? PTR_ERR(iter) : 0;

View File

@ -75,22 +75,15 @@ static unsigned bch_alloc_val_u64s(const struct bch_alloc *a)
const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
if (k.k->p.inode >= c->sb.nr_devices ||
!c->devs[k.k->p.inode])
return "invalid device";
switch (k.k->type) {
case BCH_ALLOC: {
struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
/* allow for unknown fields */
if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v))
return "incorrect value size";
break;
}
default:
return "invalid type";
}
/* allow for unknown fields */
if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v))
return "incorrect value size";
return NULL;
}
@ -98,14 +91,9 @@ const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k)
void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
switch (k.k->type) {
case BCH_ALLOC: {
struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
pr_buf(out, "gen %u", a.v->gen);
break;
}
}
pr_buf(out, "gen %u", a.v->gen);
}
static inline unsigned get_alloc_field(const u8 **p, unsigned bytes)
@ -157,7 +145,7 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
struct bucket *g;
const u8 *d;
if (k.k->type != BCH_ALLOC)
if (k.k->type != KEY_TYPE_alloc)
return;
a = bkey_s_c_to_alloc(k);

View File

@ -10,7 +10,7 @@
const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_alloc_ops (struct bkey_ops) { \
#define bch2_bkey_ops_alloc (struct bkey_ops) { \
.key_invalid = bch2_alloc_invalid, \
.val_to_text = bch2_alloc_to_text, \
}

View File

@ -922,7 +922,8 @@ err:
* as allocated out of @ob
*/
void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
struct bkey_i_extent *e, unsigned sectors)
struct bkey_i *k, unsigned sectors)
{
struct open_bucket *ob;
unsigned i;
@ -934,13 +935,11 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
struct bch_extent_ptr tmp = ob->ptr;
EBUG_ON(bch2_extent_has_device(extent_i_to_s_c(e), ob->ptr.dev));
tmp.cached = bkey_extent_is_cached(&e->k) ||
(!ca->mi.durability && wp->type == BCH_DATA_USER);
tmp.cached = !ca->mi.durability &&
wp->type == BCH_DATA_USER;
tmp.offset += ca->mi.bucket_size - ob->sectors_free;
extent_ptr_append(e, tmp);
bch2_bkey_append_ptr(k, tmp);
BUG_ON(sectors > ob->sectors_free);
ob->sectors_free -= sectors;

View File

@ -100,7 +100,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
struct closure *);
void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
struct bkey_i_extent *, unsigned);
struct bkey_i *, unsigned);
void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,

View File

@ -222,6 +222,8 @@
printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_err(c, fmt, ...) \
printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_err_ratelimited(c, fmt, ...) \
printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_verbose(c, fmt, ...) \
do { \
@ -331,6 +333,7 @@ enum bch_time_stats {
struct btree;
enum gc_phase {
GC_PHASE_NOT_RUNNING,
GC_PHASE_START,
GC_PHASE_SB,
@ -535,6 +538,7 @@ struct bch_fs {
uuid_le uuid;
uuid_le user_uuid;
u16 version;
u16 encoded_extent_max;
u8 nr_devices;
@ -684,16 +688,17 @@ struct bch_fs {
/* REBALANCE */
struct bch_fs_rebalance rebalance;
/* ERASURE CODING */
struct list_head ec_new_stripe_list;
struct mutex ec_new_stripe_lock;
GENRADIX(struct ec_stripe) ec_stripes;
struct mutex ec_stripes_lock;
/* STRIPES: */
GENRADIX(struct stripe) stripes[2];
struct mutex ec_stripe_create_lock;
ec_stripes_heap ec_stripes_heap;
spinlock_t ec_stripes_heap_lock;
/* ERASURE CODING */
struct list_head ec_new_stripe_list;
struct mutex ec_new_stripe_lock;
struct bio_set ec_bioset;
struct work_struct ec_stripe_delete_work;

View File

@ -302,15 +302,6 @@ static inline void bkey_init(struct bkey *k)
#define __BKEY_PADDED(key, pad) \
struct { struct bkey_i key; __u64 key ## _pad[pad]; }
#define BKEY_VAL_TYPE(name, nr) \
struct bkey_i_##name { \
union { \
struct bkey k; \
struct bkey_i k_i; \
}; \
struct bch_##name v; \
}
/*
* - DELETED keys are used internally to mark keys that should be ignored but
* override keys in composition order. Their version number is ignored.
@ -325,19 +316,37 @@ struct bkey_i_##name { \
* by new writes or cluster-wide GC. Node repair can also overwrite them with
* the same or a more recent version number, but not with an older version
* number.
*
* - WHITEOUT: for hash table btrees
*/
#define KEY_TYPE_DELETED 0
#define KEY_TYPE_DISCARD 1
#define KEY_TYPE_ERROR 2
#define KEY_TYPE_COOKIE 3
#define KEY_TYPE_PERSISTENT_DISCARD 4
#define KEY_TYPE_GENERIC_NR 128
#define BCH_BKEY_TYPES() \
x(deleted, 0) \
x(discard, 1) \
x(error, 2) \
x(cookie, 3) \
x(whiteout, 4) \
x(btree_ptr, 5) \
x(extent, 6) \
x(reservation, 7) \
x(inode, 8) \
x(inode_generation, 9) \
x(dirent, 10) \
x(xattr, 11) \
x(alloc, 12) \
x(quota, 13) \
x(stripe, 14)
enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr,
BCH_BKEY_TYPES()
#undef x
KEY_TYPE_MAX,
};
struct bch_cookie {
struct bch_val v;
__le64 cookie;
};
BKEY_VAL_TYPE(cookie, KEY_TYPE_COOKIE);
/* Extents */
@ -615,21 +624,12 @@ union bch_extent_entry {
#undef x
};
enum {
BCH_EXTENT = 128,
struct bch_btree_ptr {
struct bch_val v;
/*
* This is kind of a hack, we're overloading the type for a boolean that
* really should be part of the value - BCH_EXTENT and BCH_EXTENT_CACHED
* have the same value type:
*/
BCH_EXTENT_CACHED = 129,
/*
* Persistent reservation:
*/
BCH_RESERVATION = 130,
};
struct bch_extent_ptr start[0];
__u64 _data[0];
} __attribute__((packed, aligned(8)));
struct bch_extent {
struct bch_val v;
@ -637,7 +637,6 @@ struct bch_extent {
union bch_extent_entry start[0];
__u64 _data[0];
} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(extent, BCH_EXTENT);
struct bch_reservation {
struct bch_val v;
@ -646,7 +645,6 @@ struct bch_reservation {
__u8 nr_replicas;
__u8 pad[3];
} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(reservation, BCH_RESERVATION);
/* Maximum size (in u64s) a single pointer could be: */
#define BKEY_EXTENT_PTR_U64s_MAX\
@ -674,12 +672,6 @@ BKEY_VAL_TYPE(reservation, BCH_RESERVATION);
#define BCACHEFS_ROOT_INO 4096
enum bch_inode_types {
BCH_INODE_FS = 128,
BCH_INODE_BLOCKDEV = 129,
BCH_INODE_GENERATION = 130,
};
struct bch_inode {
struct bch_val v;
@ -688,7 +680,6 @@ struct bch_inode {
__le16 bi_mode;
__u8 fields[0];
} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(inode, BCH_INODE_FS);
struct bch_inode_generation {
struct bch_val v;
@ -696,7 +687,6 @@ struct bch_inode_generation {
__le32 bi_generation;
__le32 pad;
} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(inode_generation, BCH_INODE_GENERATION);
#define BCH_INODE_FIELDS() \
BCH_INODE_FIELD(bi_atime, 64) \
@ -761,24 +751,6 @@ enum {
LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 32);
struct bch_inode_blockdev {
struct bch_val v;
__le64 i_size;
__le64 i_flags;
/* Seconds: */
__le64 i_ctime;
__le64 i_mtime;
uuid_le i_uuid;
__u8 i_label[32];
} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(inode_blockdev, BCH_INODE_BLOCKDEV);
/* Thin provisioned volume, or cache for another block device? */
LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1)
/* Dirents */
/*
@ -792,11 +764,6 @@ LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1)
* collision:
*/
enum {
BCH_DIRENT = 128,
BCH_DIRENT_WHITEOUT = 129,
};
struct bch_dirent {
struct bch_val v;
@ -811,7 +778,6 @@ struct bch_dirent {
__u8 d_name[];
} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(dirent, BCH_DIRENT);
#define BCH_NAME_MAX (U8_MAX * sizeof(u64) - \
sizeof(struct bkey) - \
@ -820,16 +786,11 @@ BKEY_VAL_TYPE(dirent, BCH_DIRENT);
/* Xattrs */
enum {
BCH_XATTR = 128,
BCH_XATTR_WHITEOUT = 129,
};
#define BCH_XATTR_INDEX_USER 0
#define BCH_XATTR_INDEX_POSIX_ACL_ACCESS 1
#define BCH_XATTR_INDEX_POSIX_ACL_DEFAULT 2
#define BCH_XATTR_INDEX_TRUSTED 3
#define BCH_XATTR_INDEX_SECURITY 4
#define KEY_TYPE_XATTR_INDEX_USER 0
#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1
#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2
#define KEY_TYPE_XATTR_INDEX_TRUSTED 3
#define KEY_TYPE_XATTR_INDEX_SECURITY 4
struct bch_xattr {
struct bch_val v;
@ -838,14 +799,9 @@ struct bch_xattr {
__le16 x_val_len;
__u8 x_name[];
} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(xattr, BCH_XATTR);
/* Bucket/allocation information: */
enum {
BCH_ALLOC = 128,
};
enum {
BCH_ALLOC_FIELD_READ_TIME = 0,
BCH_ALLOC_FIELD_WRITE_TIME = 1,
@ -857,14 +813,9 @@ struct bch_alloc {
__u8 gen;
__u8 data[];
} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(alloc, BCH_ALLOC);
/* Quotas: */
enum {
BCH_QUOTA = 128,
};
enum quota_types {
QTYP_USR = 0,
QTYP_GRP = 1,
@ -887,14 +838,9 @@ struct bch_quota {
struct bch_val v;
struct bch_quota_counter c[Q_COUNTERS];
} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(quota, BCH_QUOTA);
/* Erasure coding */
enum {
BCH_STRIPE = 128,
};
struct bch_stripe {
struct bch_val v;
__le16 sectors;
@ -908,7 +854,6 @@ struct bch_stripe {
struct bch_extent_ptr ptrs[0];
} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(stripe, BCH_STRIPE);
/* Optional/variable size superblock sections: */
@ -1144,15 +1089,21 @@ struct bch_sb_field_clean {
/* Superblock: */
/*
* Version 8: BCH_SB_ENCODED_EXTENT_MAX_BITS
* BCH_MEMBER_DATA_ALLOWED
* Version 9: incompatible extent nonce change
* New versioning scheme:
* One common version number for all on disk data structures - superblock, btree
* nodes, journal entries
*/
#define BCH_JSET_VERSION_OLD 2
#define BCH_BSET_VERSION_OLD 3
#define BCH_SB_VERSION_MIN 7
#define BCH_SB_VERSION_EXTENT_MAX 8
#define BCH_SB_VERSION_EXTENT_NONCE_V1 9
#define BCH_SB_VERSION_MAX 9
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
bcachefs_metadata_version_new_versioning = 10,
bcachefs_metadata_version_bkey_renumber = 10,
bcachefs_metadata_version_max = 11,
};
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
#define BCH_SB_SECTOR 8
#define BCH_SB_MEMBERS_MAX 64 /* XXX kill */
@ -1171,6 +1122,9 @@ struct bch_sb_layout {
/*
* @offset - sector where this sb was written
* @version - on disk format version
* @version_min - Oldest metadata version this filesystem contains; so we can
* safely drop compatibility code and refuse to mount filesystems
* we'd need it for
* @magic - identifies as a bcachefs superblock (BCACHE_MAGIC)
* @seq - incremented each time superblock is written
* @uuid - used for generating various magic numbers and identifying
@ -1183,7 +1137,9 @@ struct bch_sb_layout {
*/
struct bch_sb {
struct bch_csum csum;
__le64 version;
__le16 version;
__le16 version_min;
__le16 pad[2];
uuid_le magic;
uuid_le uuid;
uuid_le user_uuid;
@ -1359,11 +1315,6 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
/* Journal */
#define BCACHE_JSET_VERSION_UUIDv1 1
#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */
#define BCACHE_JSET_VERSION_JKEYS 2
#define BCACHE_JSET_VERSION 2
#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
#define BCH_JSET_ENTRY_TYPES() \
@ -1443,35 +1394,26 @@ LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
/* Btree: */
#define DEFINE_BCH_BTREE_IDS() \
DEF_BTREE_ID(EXTENTS, 0, "extents") \
DEF_BTREE_ID(INODES, 1, "inodes") \
DEF_BTREE_ID(DIRENTS, 2, "dirents") \
DEF_BTREE_ID(XATTRS, 3, "xattrs") \
DEF_BTREE_ID(ALLOC, 4, "alloc") \
DEF_BTREE_ID(QUOTAS, 5, "quotas") \
DEF_BTREE_ID(EC, 6, "erasure_coding")
#define DEF_BTREE_ID(kwd, val, name) BTREE_ID_##kwd = val,
#define BCH_BTREE_IDS() \
x(EXTENTS, 0, "extents") \
x(INODES, 1, "inodes") \
x(DIRENTS, 2, "dirents") \
x(XATTRS, 3, "xattrs") \
x(ALLOC, 4, "alloc") \
x(QUOTAS, 5, "quotas") \
x(EC, 6, "erasure_coding")
enum btree_id {
DEFINE_BCH_BTREE_IDS()
#define x(kwd, val, name) BTREE_ID_##kwd = val,
BCH_BTREE_IDS()
#undef x
BTREE_ID_NR
};
#undef DEF_BTREE_ID
#define BTREE_MAX_DEPTH 4U
/* Btree nodes */
/* Version 1: Seed pointer into btree node checksum
*/
#define BCACHE_BSET_CSUM 1
#define BCACHE_BSET_KEY_v1 2
#define BCACHE_BSET_JOURNAL_SEQ 3
#define BCACHE_BSET_VERSION 3
/*
* Btree nodes
*

View File

@ -484,7 +484,7 @@ enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out,
pack_state_finish(&state, out);
out->u64s = f->key_u64s;
out->format = KEY_FORMAT_LOCAL_BTREE;
out->type = KEY_TYPE_DELETED;
out->type = KEY_TYPE_deleted;
#ifdef CONFIG_BCACHEFS_DEBUG
if (exact) {

View File

@ -52,10 +52,12 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64));
}
#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_DELETED)
#define bkey_val_end(_k) vstruct_idx((_k).v, bkey_val_u64s((_k).k))
#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted)
#define bkey_whiteout(_k) \
((_k)->type == KEY_TYPE_DELETED || (_k)->type == KEY_TYPE_DISCARD)
((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard)
#define bkey_packed_typecheck(_k) \
({ \
@ -430,7 +432,15 @@ static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k)
* bkey_i_extent to a bkey_i - since that's always safe, instead of conversion
* functions.
*/
#define __BKEY_VAL_ACCESSORS(name, nr, _assert) \
#define BKEY_VAL_ACCESSORS(name) \
struct bkey_i_##name { \
union { \
struct bkey k; \
struct bkey_i k_i; \
}; \
struct bch_##name v; \
}; \
\
struct bkey_s_c_##name { \
union { \
struct { \
@ -455,20 +465,20 @@ struct bkey_s_##name { \
\
static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \
{ \
_assert(k->k.type, nr); \
EBUG_ON(k->k.type != KEY_TYPE_##name); \
return container_of(&k->k, struct bkey_i_##name, k); \
} \
\
static inline const struct bkey_i_##name * \
bkey_i_to_##name##_c(const struct bkey_i *k) \
{ \
_assert(k->k.type, nr); \
EBUG_ON(k->k.type != KEY_TYPE_##name); \
return container_of(&k->k, struct bkey_i_##name, k); \
} \
\
static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \
{ \
_assert(k.k->type, nr); \
EBUG_ON(k.k->type != KEY_TYPE_##name); \
return (struct bkey_s_##name) { \
.k = k.k, \
.v = container_of(k.v, struct bch_##name, v), \
@ -477,7 +487,7 @@ static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \
\
static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\
{ \
_assert(k.k->type, nr); \
EBUG_ON(k.k->type != KEY_TYPE_##name); \
return (struct bkey_s_c_##name) { \
.k = k.k, \
.v = container_of(k.v, struct bch_##name, v), \
@ -503,7 +513,7 @@ name##_i_to_s_c(const struct bkey_i_##name *k) \
\
static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \
{ \
_assert(k->k.type, nr); \
EBUG_ON(k->k.type != KEY_TYPE_##name); \
return (struct bkey_s_##name) { \
.k = &k->k, \
.v = container_of(&k->v, struct bch_##name, v), \
@ -513,27 +523,13 @@ static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \
static inline struct bkey_s_c_##name \
bkey_i_to_s_c_##name(const struct bkey_i *k) \
{ \
_assert(k->k.type, nr); \
EBUG_ON(k->k.type != KEY_TYPE_##name); \
return (struct bkey_s_c_##name) { \
.k = &k->k, \
.v = container_of(&k->v, struct bch_##name, v), \
}; \
} \
\
static inline struct bch_##name * \
bkey_p_##name##_val(const struct bkey_format *f, \
struct bkey_packed *k) \
{ \
return container_of(bkeyp_val(f, k), struct bch_##name, v); \
} \
\
static inline const struct bch_##name * \
bkey_p_c_##name##_val(const struct bkey_format *f, \
const struct bkey_packed *k) \
{ \
return container_of(bkeyp_val(f, k), struct bch_##name, v); \
} \
\
static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
{ \
struct bkey_i_##name *k = \
@ -541,45 +537,23 @@ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
\
bkey_init(&k->k); \
memset(&k->v, 0, sizeof(k->v)); \
k->k.type = nr; \
k->k.type = KEY_TYPE_##name; \
set_bkey_val_bytes(&k->k, sizeof(k->v)); \
\
return k; \
}
#define __BKEY_VAL_ASSERT(_type, _nr) EBUG_ON(_type != _nr)
#define BKEY_VAL_ACCESSORS(name, _nr) \
static inline void __bch_##name##_assert(u8 type, u8 nr) \
{ \
EBUG_ON(type != _nr); \
} \
\
__BKEY_VAL_ACCESSORS(name, _nr, __bch_##name##_assert)
BKEY_VAL_ACCESSORS(cookie, KEY_TYPE_COOKIE);
static inline void __bch2_extent_assert(u8 type, u8 nr)
{
EBUG_ON(type != BCH_EXTENT && type != BCH_EXTENT_CACHED);
}
__BKEY_VAL_ACCESSORS(extent, BCH_EXTENT, __bch2_extent_assert);
BKEY_VAL_ACCESSORS(reservation, BCH_RESERVATION);
BKEY_VAL_ACCESSORS(inode, BCH_INODE_FS);
BKEY_VAL_ACCESSORS(inode_blockdev, BCH_INODE_BLOCKDEV);
BKEY_VAL_ACCESSORS(inode_generation, BCH_INODE_GENERATION);
BKEY_VAL_ACCESSORS(dirent, BCH_DIRENT);
BKEY_VAL_ACCESSORS(xattr, BCH_XATTR);
BKEY_VAL_ACCESSORS(alloc, BCH_ALLOC);
BKEY_VAL_ACCESSORS(quota, BCH_QUOTA);
BKEY_VAL_ACCESSORS(stripe, BCH_STRIPE);
BKEY_VAL_ACCESSORS(cookie);
BKEY_VAL_ACCESSORS(btree_ptr);
BKEY_VAL_ACCESSORS(extent);
BKEY_VAL_ACCESSORS(reservation);
BKEY_VAL_ACCESSORS(inode);
BKEY_VAL_ACCESSORS(inode_generation);
BKEY_VAL_ACCESSORS(dirent);
BKEY_VAL_ACCESSORS(xattr);
BKEY_VAL_ACCESSORS(alloc);
BKEY_VAL_ACCESSORS(quota);
BKEY_VAL_ACCESSORS(stripe);
/* byte order helpers */

View File

@ -11,66 +11,84 @@
#include "quota.h"
#include "xattr.h"
const struct bkey_ops bch2_bkey_ops[] = {
[BKEY_TYPE_EXTENTS] = bch2_bkey_extent_ops,
[BKEY_TYPE_INODES] = bch2_bkey_inode_ops,
[BKEY_TYPE_DIRENTS] = bch2_bkey_dirent_ops,
[BKEY_TYPE_XATTRS] = bch2_bkey_xattr_ops,
[BKEY_TYPE_ALLOC] = bch2_bkey_alloc_ops,
[BKEY_TYPE_QUOTAS] = bch2_bkey_quota_ops,
[BKEY_TYPE_EC] = bch2_bkey_ec_ops,
[BKEY_TYPE_BTREE] = bch2_bkey_btree_ops,
const char * const bch_bkey_types[] = {
#define x(name, nr) #name,
BCH_BKEY_TYPES()
#undef x
NULL
};
const char *bch2_bkey_val_invalid(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k)
static const char *deleted_key_invalid(const struct bch_fs *c,
struct bkey_s_c k)
{
const struct bkey_ops *ops = &bch2_bkey_ops[type];
switch (k.k->type) {
case KEY_TYPE_DELETED:
case KEY_TYPE_DISCARD:
return NULL;
case KEY_TYPE_ERROR:
return bkey_val_bytes(k.k) != 0
? "value size should be zero"
: NULL;
case KEY_TYPE_COOKIE:
return bkey_val_bytes(k.k) != sizeof(struct bch_cookie)
? "incorrect value size"
: NULL;
default:
if (k.k->type < KEY_TYPE_GENERIC_NR)
return "invalid type";
return ops->key_invalid(c, k);
}
return NULL;
}
const char *__bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k)
{
const struct bkey_ops *ops = &bch2_bkey_ops[type];
const struct bkey_ops bch2_bkey_ops_deleted = {
.key_invalid = deleted_key_invalid,
};
const struct bkey_ops bch2_bkey_ops_discard = {
.key_invalid = deleted_key_invalid,
};
static const char *empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
if (bkey_val_bytes(k.k))
return "value size should be zero";
return NULL;
}
const struct bkey_ops bch2_bkey_ops_error = {
.key_invalid = empty_val_key_invalid,
};
static const char *key_type_cookie_invalid(const struct bch_fs *c,
struct bkey_s_c k)
{
if (bkey_val_bytes(k.k) != sizeof(struct bch_cookie))
return "incorrect value size";
return NULL;
}
const struct bkey_ops bch2_bkey_ops_cookie = {
.key_invalid = key_type_cookie_invalid,
};
const struct bkey_ops bch2_bkey_ops_whiteout = {
.key_invalid = empty_val_key_invalid,
};
static const struct bkey_ops bch2_bkey_ops[] = {
#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
BCH_BKEY_TYPES()
#undef x
};
const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k)
{
if (k.k->type >= KEY_TYPE_MAX)
return "invalid type";
return bch2_bkey_ops[k.k->type].key_invalid(c, k);
}
const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
enum btree_node_type type)
{
if (k.k->u64s < BKEY_U64s)
return "u64s too small";
if (!ops->is_extents) {
if (k.k->size)
return "nonzero size field";
} else {
if (btree_node_type_is_extents(type)) {
if ((k.k->size == 0) != bkey_deleted(k.k))
return "bad size field";
} else {
if (k.k->size)
return "nonzero size field";
}
if (ops->is_extents &&
!k.k->size &&
!bkey_deleted(k.k))
return "zero size field";
if (k.k->p.snapshot)
return "nonzero snapshot";
@ -81,11 +99,11 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type,
return NULL;
}
const char *bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k)
const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
enum btree_node_type type)
{
return __bch2_bkey_invalid(c, type, k) ?:
bch2_bkey_val_invalid(c, type, k);
return __bch2_bkey_invalid(c, k, type) ?:
bch2_bkey_val_invalid(c, k);
}
const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
@ -101,24 +119,22 @@ const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
{
enum bkey_type type = btree_node_type(b);
const struct bkey_ops *ops = &bch2_bkey_ops[type];
const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
const char *invalid;
BUG_ON(!k.k->u64s);
invalid = bch2_bkey_invalid(c, type, k) ?:
invalid = bch2_bkey_invalid(c, k, btree_node_type(b)) ?:
bch2_bkey_in_btree_node(b, k);
if (invalid) {
char buf[160];
bch2_bkey_val_to_text(&PBUF(buf), c, type, k);
bch2_bkey_val_to_text(&PBUF(buf), c, k);
bch2_fs_bug(c, "invalid bkey %s: %s", buf, invalid);
return;
}
if (k.k->type >= KEY_TYPE_GENERIC_NR &&
ops->key_debugcheck)
if (ops->key_debugcheck)
ops->key_debugcheck(c, b, k);
}
@ -143,46 +159,90 @@ void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
}
void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
enum bkey_type type, struct bkey_s_c k)
struct bkey_s_c k)
{
const struct bkey_ops *ops = &bch2_bkey_ops[type];
const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
switch (k.k->type) {
case KEY_TYPE_DELETED:
pr_buf(out, " deleted");
break;
case KEY_TYPE_DISCARD:
pr_buf(out, " discard");
break;
case KEY_TYPE_ERROR:
pr_buf(out, " error");
break;
case KEY_TYPE_COOKIE:
pr_buf(out, " cookie");
break;
default:
if (k.k->type >= KEY_TYPE_GENERIC_NR && ops->val_to_text)
ops->val_to_text(out, c, k);
break;
}
if (likely(ops->val_to_text))
ops->val_to_text(out, c, k);
else
pr_buf(out, " %s", bch_bkey_types[k.k->type]);
}
void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
enum bkey_type type, struct bkey_s_c k)
struct bkey_s_c k)
{
bch2_bkey_to_text(out, k.k);
pr_buf(out, ": ");
bch2_val_to_text(out, c, type, k);
bch2_val_to_text(out, c, k);
}
void bch2_bkey_swab(enum bkey_type type,
const struct bkey_format *f,
struct bkey_packed *k)
void bch2_bkey_swab(const struct bkey_format *f,
struct bkey_packed *k)
{
const struct bkey_ops *ops = &bch2_bkey_ops[type];
const struct bkey_ops *ops = &bch2_bkey_ops[k->type];
bch2_bkey_swab_key(f, k);
if (ops->swab)
ops->swab(f, k);
}
bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
{
const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
return ops->key_normalize
? ops->key_normalize(c, k)
: false;
}
enum merge_result bch2_bkey_merge(struct bch_fs *c,
struct bkey_i *l, struct bkey_i *r)
{
const struct bkey_ops *ops = &bch2_bkey_ops[l->k.type];
if (!key_merging_disabled(c) &&
ops->key_merge &&
l->k.type == r->k.type &&
!bversion_cmp(l->k.version, r->k.version) &&
!bkey_cmp(l->k.p, bkey_start_pos(&r->k)))
return ops->key_merge(c, l, r);
return BCH_MERGE_NOMERGE;
}
static const struct old_bkey_type {
u8 btree_node_type;
u8 old;
u8 new;
} bkey_renumber_table[] = {
{BKEY_TYPE_BTREE, 128, KEY_TYPE_btree_ptr },
{BKEY_TYPE_EXTENTS, 128, KEY_TYPE_extent },
{BKEY_TYPE_EXTENTS, 129, KEY_TYPE_extent },
{BKEY_TYPE_EXTENTS, 130, KEY_TYPE_reservation },
{BKEY_TYPE_INODES, 128, KEY_TYPE_inode },
{BKEY_TYPE_INODES, 130, KEY_TYPE_inode_generation },
{BKEY_TYPE_DIRENTS, 128, KEY_TYPE_dirent },
{BKEY_TYPE_DIRENTS, 129, KEY_TYPE_whiteout },
{BKEY_TYPE_XATTRS, 128, KEY_TYPE_xattr },
{BKEY_TYPE_XATTRS, 129, KEY_TYPE_whiteout },
{BKEY_TYPE_ALLOC, 128, KEY_TYPE_alloc },
{BKEY_TYPE_QUOTAS, 128, KEY_TYPE_quota },
};
void bch2_bkey_renumber(enum btree_node_type btree_node_type,
struct bkey_packed *k,
int write)
{
const struct old_bkey_type *i;
for (i = bkey_renumber_table;
i < bkey_renumber_table + ARRAY_SIZE(bkey_renumber_table);
i++)
if (btree_node_type == i->btree_node_type &&
k->type == (write ? i->new : i->old)) {
k->type = write ? i->old : i->new;
break;
}
}

View File

@ -3,24 +3,12 @@
#include "bkey.h"
#define DEF_BTREE_ID(kwd, val, name) BKEY_TYPE_##kwd = val,
enum bkey_type {
DEFINE_BCH_BTREE_IDS()
BKEY_TYPE_BTREE,
};
#undef DEF_BTREE_ID
/* Type of a key in btree @id at level @level: */
static inline enum bkey_type bkey_type(unsigned level, enum btree_id id)
{
return level ? BKEY_TYPE_BTREE : (enum bkey_type) id;
}
struct bch_fs;
struct btree;
struct bkey;
enum btree_node_type;
extern const char * const bch_bkey_types[];
enum merge_result {
BCH_MERGE_NOMERGE,
@ -33,12 +21,6 @@ enum merge_result {
BCH_MERGE_MERGE,
};
typedef bool (*key_filter_fn)(struct bch_fs *, struct btree *,
struct bkey_s);
typedef enum merge_result (*key_merge_fn)(struct bch_fs *,
struct btree *,
struct bkey_i *, struct bkey_i *);
struct bkey_ops {
/* Returns reason for being invalid if invalid, else NULL: */
const char * (*key_invalid)(const struct bch_fs *,
@ -48,29 +30,34 @@ struct bkey_ops {
void (*val_to_text)(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
void (*swab)(const struct bkey_format *, struct bkey_packed *);
key_filter_fn key_normalize;
key_merge_fn key_merge;
bool is_extents;
bool (*key_normalize)(struct bch_fs *, struct bkey_s);
enum merge_result (*key_merge)(struct bch_fs *,
struct bkey_i *, struct bkey_i *);
};
const char *bch2_bkey_val_invalid(struct bch_fs *, enum bkey_type,
struct bkey_s_c);
const char *__bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c);
const char *bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c);
const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c);
const char *__bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
enum btree_node_type);
const char *bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
enum btree_node_type);
const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c);
void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
void bch2_bpos_to_text(struct printbuf *, struct bpos);
void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
void bch2_val_to_text(struct printbuf *, struct bch_fs *, enum bkey_type,
void bch2_val_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *,
enum bkey_type, struct bkey_s_c);
struct bkey_s_c);
void bch2_bkey_swab(enum bkey_type, const struct bkey_format *,
struct bkey_packed *);
void bch2_bkey_swab(const struct bkey_format *, struct bkey_packed *);
extern const struct bkey_ops bch2_bkey_ops[];
bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
enum merge_result bch2_bkey_merge(struct bch_fs *,
struct bkey_i *, struct bkey_i *);
void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
#endif /* _BCACHEFS_BKEY_METHODS_H */

652
libbcachefs/bkey_sort.c Normal file
View File

@ -0,0 +1,652 @@
#include "bcachefs.h"
#include "bkey_sort.h"
#include "bset.h"
#include "extents.h"
/* too many iterators, need to clean this up */
/* btree_node_iter_large: */
#define btree_node_iter_cmp_heap(h, _l, _r) btree_node_iter_cmp(b, _l, _r)
static inline bool
bch2_btree_node_iter_large_end(struct btree_node_iter_large *iter)
{
return !iter->used;
}
static inline struct bkey_packed *
bch2_btree_node_iter_large_peek_all(struct btree_node_iter_large *iter,
struct btree *b)
{
return bch2_btree_node_iter_large_end(iter)
? NULL
: __btree_node_offset_to_key(b, iter->data->k);
}
static void
bch2_btree_node_iter_large_advance(struct btree_node_iter_large *iter,
struct btree *b)
{
iter->data->k += __btree_node_offset_to_key(b, iter->data->k)->u64s;
EBUG_ON(!iter->used);
EBUG_ON(iter->data->k > iter->data->end);
if (iter->data->k == iter->data->end)
heap_del(iter, 0, btree_node_iter_cmp_heap, NULL);
else
heap_sift_down(iter, 0, btree_node_iter_cmp_heap, NULL);
}
static inline struct bkey_packed *
bch2_btree_node_iter_large_next_all(struct btree_node_iter_large *iter,
struct btree *b)
{
struct bkey_packed *ret = bch2_btree_node_iter_large_peek_all(iter, b);
if (ret)
bch2_btree_node_iter_large_advance(iter, b);
return ret;
}
void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter,
struct btree *b,
const struct bkey_packed *k,
const struct bkey_packed *end)
{
if (k != end) {
struct btree_node_iter_set n =
((struct btree_node_iter_set) {
__btree_node_key_to_offset(b, k),
__btree_node_key_to_offset(b, end)
});
__heap_add(iter, n, btree_node_iter_cmp_heap, NULL);
}
}
static void sort_key_next(struct btree_node_iter_large *iter,
struct btree *b,
struct btree_node_iter_set *i)
{
i->k += __btree_node_offset_to_key(b, i->k)->u64s;
if (i->k == i->end)
*i = iter->data[--iter->used];
}
/* regular sort_iters */
typedef int (*sort_cmp_fn)(struct btree *,
struct bkey_packed *,
struct bkey_packed *);
static inline void __sort_iter_sift(struct sort_iter *iter,
unsigned from,
sort_cmp_fn cmp)
{
unsigned i;
for (i = from;
i + 1 < iter->used &&
cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
i++)
swap(iter->data[i], iter->data[i + 1]);
}
static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp)
{
__sort_iter_sift(iter, 0, cmp);
}
static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
{
unsigned i = iter->used;
while (i--)
__sort_iter_sift(iter, i, cmp);
}
static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
{
return iter->used ? iter->data->k : NULL;
}
static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
{
iter->data->k = bkey_next(iter->data->k);
BUG_ON(iter->data->k > iter->data->end);
if (iter->data->k == iter->data->end)
array_remove_item(iter->data, iter->used, 0);
else
sort_iter_sift(iter, cmp);
}
static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
sort_cmp_fn cmp)
{
struct bkey_packed *ret = sort_iter_peek(iter);
if (ret)
sort_iter_advance(iter, cmp);
return ret;
}
/*
* Returns true if l > r - unless l == r, in which case returns true if l is
* older than r.
*
* Necessary for btree_sort_fixup() - if there are multiple keys that compare
* equal in different sets, we have to process them newest to oldest.
*/
#define key_sort_cmp(h, l, r) \
({ \
bkey_cmp_packed(b, \
__btree_node_offset_to_key(b, (l).k), \
__btree_node_offset_to_key(b, (r).k)) \
\
?: (l).k - (r).k; \
})
static inline bool should_drop_next_key(struct btree_node_iter_large *iter,
struct btree *b)
{
struct btree_node_iter_set *l = iter->data, *r = iter->data + 1;
struct bkey_packed *k = __btree_node_offset_to_key(b, l->k);
if (bkey_whiteout(k))
return true;
if (iter->used < 2)
return false;
if (iter->used > 2 &&
key_sort_cmp(iter, r[0], r[1]) >= 0)
r++;
/*
* key_sort_cmp() ensures that when keys compare equal the older key
* comes first; so if l->k compares equal to r->k then l->k is older and
* should be dropped.
*/
return !bkey_cmp_packed(b,
__btree_node_offset_to_key(b, l->k),
__btree_node_offset_to_key(b, r->k));
}
struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
struct btree *b,
struct btree_node_iter_large *iter)
{
struct bkey_packed *out = dst->start;
struct btree_nr_keys nr;
memset(&nr, 0, sizeof(nr));
heap_resort(iter, key_sort_cmp, NULL);
while (!bch2_btree_node_iter_large_end(iter)) {
if (!should_drop_next_key(iter, b)) {
struct bkey_packed *k =
__btree_node_offset_to_key(b, iter->data->k);
bkey_copy(out, k);
btree_keys_account_key_add(&nr, 0, out);
out = bkey_next(out);
}
sort_key_next(iter, b, iter->data);
heap_sift_down(iter, 0, key_sort_cmp, NULL);
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
return nr;
}
/*
* If keys compare equal, compare by pointer order:
*
* Necessary for sort_fix_overlapping() - if there are multiple keys that
* compare equal in different sets, we have to process them newest to oldest.
*/
#define extent_sort_cmp(h, l, r) \
({ \
struct bkey _ul = bkey_unpack_key(b, \
__btree_node_offset_to_key(b, (l).k)); \
struct bkey _ur = bkey_unpack_key(b, \
__btree_node_offset_to_key(b, (r).k)); \
\
bkey_cmp(bkey_start_pos(&_ul), \
bkey_start_pos(&_ur)) ?: (r).k - (l).k; \
})
static inline void extent_sort_sift(struct btree_node_iter_large *iter,
struct btree *b, size_t i)
{
heap_sift_down(iter, i, extent_sort_cmp, NULL);
}
static inline void extent_sort_next(struct btree_node_iter_large *iter,
struct btree *b,
struct btree_node_iter_set *i)
{
sort_key_next(iter, b, i);
heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL);
}
static void extent_sort_append(struct bch_fs *c,
struct btree *b,
struct btree_nr_keys *nr,
struct bkey_packed *start,
struct bkey_packed **prev,
struct bkey_packed *k)
{
struct bkey_format *f = &b->format;
BKEY_PADDED(k) tmp;
if (bkey_whiteout(k))
return;
bch2_bkey_unpack(b, &tmp.k, k);
if (*prev &&
bch2_bkey_merge(c, (void *) *prev, &tmp.k))
return;
if (*prev) {
bch2_bkey_pack(*prev, (void *) *prev, f);
btree_keys_account_key_add(nr, 0, *prev);
*prev = bkey_next(*prev);
} else {
*prev = start;
}
bkey_copy(*prev, &tmp.k);
}
struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
struct bset *dst,
struct btree *b,
struct btree_node_iter_large *iter)
{
struct bkey_format *f = &b->format;
struct btree_node_iter_set *_l = iter->data, *_r;
struct bkey_packed *prev = NULL, *out, *lk, *rk;
struct bkey l_unpacked, r_unpacked;
struct bkey_s l, r;
struct btree_nr_keys nr;
memset(&nr, 0, sizeof(nr));
heap_resort(iter, extent_sort_cmp, NULL);
while (!bch2_btree_node_iter_large_end(iter)) {
lk = __btree_node_offset_to_key(b, _l->k);
if (iter->used == 1) {
extent_sort_append(c, b, &nr, dst->start, &prev, lk);
extent_sort_next(iter, b, _l);
continue;
}
_r = iter->data + 1;
if (iter->used > 2 &&
extent_sort_cmp(iter, _r[0], _r[1]) >= 0)
_r++;
rk = __btree_node_offset_to_key(b, _r->k);
l = __bkey_disassemble(b, lk, &l_unpacked);
r = __bkey_disassemble(b, rk, &r_unpacked);
/* If current key and next key don't overlap, just append */
if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
extent_sort_append(c, b, &nr, dst->start, &prev, lk);
extent_sort_next(iter, b, _l);
continue;
}
/* Skip 0 size keys */
if (!r.k->size) {
extent_sort_next(iter, b, _r);
continue;
}
/*
* overlap: keep the newer key and trim the older key so they
* don't overlap. comparing pointers tells us which one is
* newer, since the bsets are appended one after the other.
*/
/* can't happen because of comparison func */
BUG_ON(_l->k < _r->k &&
!bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
if (_l->k > _r->k) {
/* l wins, trim r */
if (bkey_cmp(l.k->p, r.k->p) >= 0) {
sort_key_next(iter, b, _r);
} else {
__bch2_cut_front(l.k->p, r);
extent_save(b, rk, r.k);
}
extent_sort_sift(iter, b, _r - iter->data);
} else if (bkey_cmp(l.k->p, r.k->p) > 0) {
BKEY_PADDED(k) tmp;
/*
* r wins, but it overlaps in the middle of l - split l:
*/
bkey_reassemble(&tmp.k, l.s_c);
bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k);
__bch2_cut_front(r.k->p, l);
extent_save(b, lk, l.k);
extent_sort_sift(iter, b, 0);
extent_sort_append(c, b, &nr, dst->start, &prev,
bkey_to_packed(&tmp.k));
} else {
bch2_cut_back(bkey_start_pos(r.k), l.k);
extent_save(b, lk, l.k);
}
}
if (prev) {
bch2_bkey_pack(prev, (void *) prev, f);
btree_keys_account_key_add(&nr, 0, prev);
out = bkey_next(prev);
} else {
out = dst->start;
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
return nr;
}
/* Sort + repack in a new format: */
struct btree_nr_keys
bch2_sort_repack(struct bset *dst, struct btree *src,
struct btree_node_iter *src_iter,
struct bkey_format *out_f,
bool filter_whiteouts)
{
struct bkey_format *in_f = &src->format;
struct bkey_packed *in, *out = vstruct_last(dst);
struct btree_nr_keys nr;
memset(&nr, 0, sizeof(nr));
while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
if (filter_whiteouts && bkey_whiteout(in))
continue;
if (bch2_bkey_transform(out_f, out, bkey_packed(in)
? in_f : &bch2_bkey_format_current, in))
out->format = KEY_FORMAT_LOCAL_BTREE;
else
bch2_bkey_unpack(src, (void *) out, in);
btree_keys_account_key_add(&nr, 0, out);
out = bkey_next(out);
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
return nr;
}
/* Sort, repack, and merge: */
struct btree_nr_keys
bch2_sort_repack_merge(struct bch_fs *c,
struct bset *dst, struct btree *src,
struct btree_node_iter *iter,
struct bkey_format *out_f,
bool filter_whiteouts)
{
struct bkey_packed *k, *prev = NULL, *out;
struct btree_nr_keys nr;
BKEY_PADDED(k) tmp;
memset(&nr, 0, sizeof(nr));
while ((k = bch2_btree_node_iter_next_all(iter, src))) {
if (filter_whiteouts && bkey_whiteout(k))
continue;
/*
* The filter might modify pointers, so we have to unpack the
* key and values to &tmp.k:
*/
bch2_bkey_unpack(src, &tmp.k, k);
if (filter_whiteouts &&
bch2_bkey_normalize(c, bkey_i_to_s(&tmp.k)))
continue;
/* prev is always unpacked, for key merging: */
if (prev &&
bch2_bkey_merge(c, (void *) prev, &tmp.k) ==
BCH_MERGE_MERGE)
continue;
/*
* the current key becomes the new prev: advance prev, then
* copy the current key - but first pack prev (in place):
*/
if (prev) {
bch2_bkey_pack(prev, (void *) prev, out_f);
btree_keys_account_key_add(&nr, 0, prev);
prev = bkey_next(prev);
} else {
prev = vstruct_last(dst);
}
bkey_copy(prev, &tmp.k);
}
if (prev) {
bch2_bkey_pack(prev, (void *) prev, out_f);
btree_keys_account_key_add(&nr, 0, prev);
out = bkey_next(prev);
} else {
out = vstruct_last(dst);
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
return nr;
}
static inline int sort_keys_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
{
return bkey_cmp_packed(b, l, r) ?:
(int) bkey_whiteout(r) - (int) bkey_whiteout(l) ?:
(int) l->needs_whiteout - (int) r->needs_whiteout;
}
unsigned bch2_sort_keys(struct bkey_packed *dst,
struct sort_iter *iter,
bool filter_whiteouts)
{
const struct bkey_format *f = &iter->b->format;
struct bkey_packed *in, *next, *out = dst;
sort_iter_sort(iter, sort_keys_cmp);
while ((in = sort_iter_next(iter, sort_keys_cmp))) {
if (bkey_whiteout(in) &&
(filter_whiteouts || !in->needs_whiteout))
continue;
if (bkey_whiteout(in) &&
(next = sort_iter_peek(iter)) &&
!bkey_cmp_packed(iter->b, in, next)) {
BUG_ON(in->needs_whiteout &&
next->needs_whiteout);
/*
* XXX racy, called with read lock from write path
*
* leads to spurious BUG_ON() in bkey_unpack_key() in
* debug mode
*/
next->needs_whiteout |= in->needs_whiteout;
continue;
}
if (bkey_whiteout(in)) {
memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
set_bkeyp_val_u64s(f, out, 0);
} else {
bkey_copy(out, in);
}
out = bkey_next(out);
}
return (u64 *) out - (u64 *) dst;
}
static inline int sort_extents_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
{
return bkey_cmp_packed(b, l, r) ?:
(int) bkey_deleted(l) - (int) bkey_deleted(r);
}
unsigned bch2_sort_extents(struct bkey_packed *dst,
struct sort_iter *iter,
bool filter_whiteouts)
{
struct bkey_packed *in, *out = dst;
sort_iter_sort(iter, sort_extents_cmp);
while ((in = sort_iter_next(iter, sort_extents_cmp))) {
if (bkey_deleted(in))
continue;
if (bkey_whiteout(in) &&
(filter_whiteouts || !in->needs_whiteout))
continue;
bkey_copy(out, in);
out = bkey_next(out);
}
return (u64 *) out - (u64 *) dst;
}
static inline int sort_key_whiteouts_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
{
return bkey_cmp_packed(b, l, r);
}
unsigned bch2_sort_key_whiteouts(struct bkey_packed *dst,
struct sort_iter *iter)
{
struct bkey_packed *in, *out = dst;
sort_iter_sort(iter, sort_key_whiteouts_cmp);
while ((in = sort_iter_next(iter, sort_key_whiteouts_cmp))) {
bkey_copy(out, in);
out = bkey_next(out);
}
return (u64 *) out - (u64 *) dst;
}
static inline int sort_extent_whiteouts_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
{
struct bkey ul = bkey_unpack_key(b, l);
struct bkey ur = bkey_unpack_key(b, r);
return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur));
}
unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst,
struct sort_iter *iter)
{
const struct bkey_format *f = &iter->b->format;
struct bkey_packed *in, *out = dst;
struct bkey_i l, r;
bool prev = false, l_packed = false;
u64 max_packed_size = bkey_field_max(f, BKEY_FIELD_SIZE);
u64 max_packed_offset = bkey_field_max(f, BKEY_FIELD_OFFSET);
u64 new_size;
max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX);
sort_iter_sort(iter, sort_extent_whiteouts_cmp);
while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
if (bkey_deleted(in))
continue;
EBUG_ON(bkeyp_val_u64s(f, in));
EBUG_ON(in->type != KEY_TYPE_discard);
r.k = bkey_unpack_key(iter->b, in);
if (prev &&
bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) {
if (bkey_cmp(l.k.p, r.k.p) >= 0)
continue;
new_size = l_packed
? min(max_packed_size, max_packed_offset -
bkey_start_offset(&l.k))
: KEY_SIZE_MAX;
new_size = min(new_size, r.k.p.offset -
bkey_start_offset(&l.k));
BUG_ON(new_size < l.k.size);
bch2_key_resize(&l.k, new_size);
if (bkey_cmp(l.k.p, r.k.p) >= 0)
continue;
bch2_cut_front(l.k.p, &r);
}
if (prev) {
if (!bch2_bkey_pack(out, &l, f)) {
BUG_ON(l_packed);
bkey_copy(out, &l);
}
out = bkey_next(out);
}
l = r;
prev = true;
l_packed = bkey_packed(in);
}
if (prev) {
if (!bch2_bkey_pack(out, &l, f)) {
BUG_ON(l_packed);
bkey_copy(out, &l);
}
out = bkey_next(out);
}
return (u64 *) out - (u64 *) dst;
}

68
libbcachefs/bkey_sort.h Normal file
View File

@ -0,0 +1,68 @@
#ifndef _BCACHEFS_BKEY_SORT_H
#define _BCACHEFS_BKEY_SORT_H
struct btree_node_iter_large {
u16 used;
struct btree_node_iter_set data[MAX_BSETS];
};
void bch2_btree_node_iter_large_push(struct btree_node_iter_large *,
struct btree *,
const struct bkey_packed *,
const struct bkey_packed *);
struct sort_iter {
struct btree *b;
unsigned used;
struct sort_iter_set {
struct bkey_packed *k, *end;
} data[MAX_BSETS + 1];
};
static inline void sort_iter_init(struct sort_iter *iter, struct btree *b)
{
memset(iter, 0, sizeof(*iter));
iter->b = b;
}
static inline void sort_iter_add(struct sort_iter *iter,
struct bkey_packed *k,
struct bkey_packed *end)
{
BUG_ON(iter->used >= ARRAY_SIZE(iter->data));
if (k != end)
iter->data[iter->used++] = (struct sort_iter_set) { k, end };
}
struct btree_nr_keys
bch2_key_sort_fix_overlapping(struct bset *, struct btree *,
struct btree_node_iter_large *);
struct btree_nr_keys
bch2_extent_sort_fix_overlapping(struct bch_fs *, struct bset *,
struct btree *,
struct btree_node_iter_large *);
struct btree_nr_keys
bch2_sort_repack(struct bset *, struct btree *,
struct btree_node_iter *,
struct bkey_format *, bool);
struct btree_nr_keys
bch2_sort_repack_merge(struct bch_fs *,
struct bset *, struct btree *,
struct btree_node_iter *,
struct bkey_format *, bool);
unsigned bch2_sort_keys(struct bkey_packed *,
struct sort_iter *, bool);
unsigned bch2_sort_extents(struct bkey_packed *,
struct sort_iter *, bool);
unsigned bch2_sort_key_whiteouts(struct bkey_packed *,
struct sort_iter *);
unsigned bch2_sort_extent_whiteouts(struct bkey_packed *,
struct sort_iter *);
#endif /* _BCACHEFS_BKEY_SORT_H */

View File

@ -381,7 +381,7 @@ bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
static inline struct bkey_packed *
bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
{
return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_DISCARD + 1);
return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_discard + 1);
}
enum bch_extent_overlap {
@ -513,7 +513,7 @@ bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
static inline struct bkey_packed *
bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b)
{
return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_DISCARD + 1);
return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_discard + 1);
}
static inline struct bkey_packed *
@ -539,7 +539,7 @@ bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, struct btree *b)
static inline struct bkey_packed *
bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
{
return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1);
return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_discard + 1);
}
struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,

View File

@ -5,20 +5,17 @@
#include "btree_iter.h"
#include "btree_locking.h"
#include "debug.h"
#include "extents.h"
#include <linux/prefetch.h>
#include <trace/events/bcachefs.h>
#define DEF_BTREE_ID(kwd, val, name) name,
const char * const bch2_btree_ids[] = {
DEFINE_BCH_BTREE_IDS()
#define x(kwd, val, name) name,
BCH_BTREE_IDS()
#undef x
NULL
};
#undef DEF_BTREE_ID
void bch2_recalc_btree_reserve(struct bch_fs *c)
{
unsigned i, reserve = 16;
@ -99,7 +96,7 @@ static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
if (!b)
return NULL;
bkey_extent_init(&b->key);
bkey_btree_ptr_init(&b->key);
six_lock_init(&b->lock);
INIT_LIST_HEAD(&b->list);
INIT_LIST_HEAD(&b->write_blocked);
@ -115,7 +112,7 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
/* Cause future lookups for this node to fail: */
bkey_i_to_extent(&b->key)->v._data[0] = 0;
PTR_HASH(&b->key) = 0;
}
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
@ -602,7 +599,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
/* raced with another fill: */
/* mark as unhashed... */
bkey_i_to_extent(&b->key)->v._data[0] = 0;
PTR_HASH(&b->key) = 0;
mutex_lock(&bc->lock);
list_add(&b->list, &bc->freeable);
@ -904,8 +901,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
b->data->min_key.offset,
b->data->max_key.inode,
b->data->max_key.offset);
bch2_val_to_text(out, c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key));
bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key));
pr_buf(out, "\n"
" format: u64s %u fields %u %u %u %u %u\n"
" unpack fn len: %u\n"

View File

@ -3,7 +3,6 @@
#include "bcachefs.h"
#include "btree_types.h"
#include "extents.h"
struct btree_iter;
@ -36,12 +35,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *);
int bch2_fs_btree_cache_init(struct bch_fs *);
void bch2_fs_btree_cache_init_early(struct btree_cache *);
#define PTR_HASH(_k) (bkey_i_to_extent_c(_k)->v._data[0])
#define PTR_HASH(_k) *((u64 *) &bkey_i_to_btree_ptr_c(_k)->v)
/* is btree node in hash table? */
static inline bool btree_node_hashed(struct btree *b)
{
return bkey_extent_is_data(&b->key.k) && PTR_HASH(&b->key);
return b->key.k.type == KEY_TYPE_btree_ptr &&
PTR_HASH(&b->key);
}
#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \

View File

@ -109,152 +109,11 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
/* marking of btree keys/nodes: */
static bool bkey_type_needs_gc(enum bkey_type type)
{
switch (type) {
case BKEY_TYPE_BTREE:
case BKEY_TYPE_EXTENTS:
case BKEY_TYPE_EC:
return true;
default:
return false;
}
}
static void ptr_gen_recalc_oldest(struct bch_fs *c,
const struct bch_extent_ptr *ptr,
u8 *max_stale)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
size_t b = PTR_BUCKET_NR(ca, ptr);
if (gen_after(ca->oldest_gens[b], ptr->gen))
ca->oldest_gens[b] = ptr->gen;
*max_stale = max(*max_stale, ptr_stale(ca, ptr));
}
static u8 ptr_gens_recalc_oldest(struct bch_fs *c,
enum bkey_type type,
struct bkey_s_c k)
static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
u8 *max_stale, bool initial)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
u8 max_stale = 0;
switch (type) {
case BKEY_TYPE_BTREE:
case BKEY_TYPE_EXTENTS:
switch (k.k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
extent_for_each_ptr(e, ptr)
ptr_gen_recalc_oldest(c, ptr, &max_stale);
break;
}
}
break;
case BKEY_TYPE_EC:
switch (k.k->type) {
case BCH_STRIPE: {
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
for (ptr = s.v->ptrs;
ptr < s.v->ptrs + s.v->nr_blocks;
ptr++)
ptr_gen_recalc_oldest(c, ptr, &max_stale);
}
}
default:
break;
}
return max_stale;
}
static int ptr_gen_check(struct bch_fs *c,
enum bkey_type type,
const struct bch_extent_ptr *ptr)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
size_t b = PTR_BUCKET_NR(ca, ptr);
struct bucket *g = PTR_BUCKET(ca, ptr);
int ret = 0;
if (mustfix_fsck_err_on(!g->mark.gen_valid, c,
"found ptr with missing gen in alloc btree,\n"
"type %u gen %u",
type, ptr->gen)) {
g->_mark.gen = ptr->gen;
g->_mark.gen_valid = 1;
set_bit(b, ca->buckets_dirty);
}
if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
"%u ptr gen in the future: %u > %u",
type, ptr->gen, g->mark.gen)) {
g->_mark.gen = ptr->gen;
g->_mark.gen_valid = 1;
set_bit(b, ca->buckets_dirty);
set_bit(BCH_FS_FIXED_GENS, &c->flags);
}
fsck_err:
return ret;
}
static int ptr_gens_check(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k)
{
const struct bch_extent_ptr *ptr;
int ret = 0;
switch (type) {
case BKEY_TYPE_BTREE:
case BKEY_TYPE_EXTENTS:
switch (k.k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
extent_for_each_ptr(e, ptr) {
ret = ptr_gen_check(c, type, ptr);
if (ret)
return ret;
}
break;
}
}
break;
case BKEY_TYPE_EC:
switch (k.k->type) {
case BCH_STRIPE: {
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
for (ptr = s.v->ptrs;
ptr < s.v->ptrs + s.v->nr_blocks;
ptr++) {
ret = ptr_gen_check(c, type, ptr);
if (ret)
return ret;
}
}
}
break;
default:
break;
}
return ret;
}
/*
* For runtime mark and sweep:
*/
static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k, bool initial)
{
struct gc_pos pos = { 0 };
unsigned flags =
BCH_BUCKET_MARK_GC|
@ -269,52 +128,77 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
atomic64_set(&c->key_version, k.k->version.lo);
if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
fsck_err_on(!bch2_bkey_replicas_marked(c, type, k,
false), c,
fsck_err_on(!bch2_bkey_replicas_marked(c, k, false), c,
"superblock not marked as containing replicas (type %u)",
type)) {
ret = bch2_mark_bkey_replicas(c, type, k);
k.k->type)) {
ret = bch2_mark_bkey_replicas(c, k);
if (ret)
return ret;
}
ret = ptr_gens_check(c, type, k);
if (ret)
return ret;
bkey_for_each_ptr(ptrs, ptr) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
size_t b = PTR_BUCKET_NR(ca, ptr);
struct bucket *g = PTR_BUCKET(ca, ptr);
if (mustfix_fsck_err_on(!g->mark.gen_valid, c,
"found ptr with missing gen in alloc btree,\n"
"type %u gen %u",
k.k->type, ptr->gen)) {
g->_mark.gen = ptr->gen;
g->_mark.gen_valid = 1;
set_bit(b, ca->buckets_dirty);
}
if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
"%u ptr gen in the future: %u > %u",
k.k->type, ptr->gen, g->mark.gen)) {
g->_mark.gen = ptr->gen;
g->_mark.gen_valid = 1;
set_bit(b, ca->buckets_dirty);
set_bit(BCH_FS_FIXED_GENS, &c->flags);
}
}
}
bch2_mark_key(c, type, k, true, k.k->size, pos, NULL, 0, flags);
bkey_for_each_ptr(ptrs, ptr) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
size_t b = PTR_BUCKET_NR(ca, ptr);
ret = ptr_gens_recalc_oldest(c, type, k);
if (gen_after(ca->oldest_gens[b], ptr->gen))
ca->oldest_gens[b] = ptr->gen;
*max_stale = max(*max_stale, ptr_stale(ca, ptr));
}
bch2_mark_key(c, k, true, k.k->size, pos, NULL, 0, flags);
fsck_err:
return ret;
}
static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
bool initial)
u8 *max_stale, bool initial)
{
enum bkey_type type = btree_node_type(b);
struct btree_node_iter iter;
struct bkey unpacked;
struct bkey_s_c k;
u8 stale = 0;
int ret;
int ret = 0;
if (!bkey_type_needs_gc(type))
*max_stale = 0;
if (!btree_node_type_needs_gc(btree_node_type(b)))
return 0;
for_each_btree_node_key_unpack(b, k, &iter,
&unpacked) {
bch2_bkey_debugcheck(c, b, k);
ret = bch2_gc_mark_key(c, type, k, initial);
if (ret < 0)
return ret;
stale = max_t(u8, stale, ret);
ret = bch2_gc_mark_key(c, k, max_stale, initial);
if (ret)
break;
}
return stale;
return ret;
}
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
@ -323,15 +207,12 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
struct btree_iter iter;
struct btree *b;
struct range_checks r;
unsigned depth = bkey_type_needs_gc(btree_id) ? 0 : 1;
unsigned max_stale;
unsigned depth = btree_node_type_needs_gc(btree_id) ? 0 : 1;
u8 max_stale;
int ret = 0;
gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
if (!c->btree_roots[btree_id].b)
return 0;
/*
* if expensive_debug_checks is on, run range_checks on all leaf nodes:
*
@ -349,7 +230,9 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
bch2_verify_btree_nr_keys(b);
max_stale = btree_gc_mark_node(c, b, initial);
ret = btree_gc_mark_node(c, b, &max_stale, initial);
if (ret)
break;
gc_pos_set(c, gc_pos_btree_node(b));
@ -370,7 +253,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
bch2_btree_iter_cond_resched(&iter);
}
ret = bch2_btree_iter_unlock(&iter);
ret = bch2_btree_iter_unlock(&iter) ?: ret;
if (ret)
return ret;
@ -378,8 +261,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
b = c->btree_roots[btree_id].b;
if (!btree_node_fake(b))
bch2_gc_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key), initial);
bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
&max_stale, initial);
gc_pos_set(c, gc_pos_btree_root(b->btree_id));
mutex_unlock(&c->btree_root_lock);
@ -396,6 +279,7 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
bool initial)
{
enum btree_id ids[BTREE_ID_NR];
u8 max_stale;
unsigned i;
for (i = 0; i < BTREE_ID_NR; i++)
@ -404,13 +288,13 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
for (i = 0; i < BTREE_ID_NR; i++) {
enum btree_id id = ids[i];
enum bkey_type type = bkey_type(0, id);
enum btree_node_type type = __btree_node_type(0, id);
int ret = bch2_gc_btree(c, id, initial);
if (ret)
return ret;
if (journal && bkey_type_needs_gc(type)) {
if (journal && btree_node_type_needs_gc(type)) {
struct bkey_i *k, *n;
struct jset_entry *j;
struct journal_replay *r;
@ -418,10 +302,11 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
list_for_each_entry(r, journal, list)
for_each_jset_key(k, n, j, &r->j) {
if (type == bkey_type(j->level, j->btree_id)) {
ret = bch2_gc_mark_key(c, type,
bkey_i_to_s_c(k), initial);
if (ret < 0)
if (type == __btree_node_type(j->level, j->btree_id)) {
ret = bch2_gc_mark_key(c,
bkey_i_to_s_c(k),
&max_stale, initial);
if (ret)
return ret;
}
}
@ -519,8 +404,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
for_each_pending_btree_node_free(c, as, d)
if (d->index_update_done)
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&d->key),
bch2_mark_key(c, bkey_i_to_s_c(&d->key),
true, 0,
pos, NULL, 0,
BCH_BUCKET_MARK_GC);
@ -579,6 +463,8 @@ static void bch2_gc_free(struct bch_fs *c)
struct bch_dev *ca;
unsigned i;
genradix_free(&c->stripes[1]);
for_each_member_device(ca, c, i) {
kvpfree(rcu_dereference_protected(ca->buckets[1], 1),
sizeof(struct bucket_array) +
@ -599,6 +485,25 @@ static void bch2_gc_done_nocheck(struct bch_fs *c)
unsigned i;
int cpu;
{
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
struct stripe *dst, *src;
c->ec_stripes_heap.used = 0;
while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
(src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
*dst = *src;
if (dst->alive)
bch2_stripes_heap_insert(c, dst, dst_iter.pos);
genradix_iter_advance(&dst_iter, &c->stripes[0]);
genradix_iter_advance(&src_iter, &c->stripes[1]);
}
}
for_each_member_device(ca, c, i) {
struct bucket_array *src = __bucket_array(ca, 1);
@ -646,13 +551,21 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
#define copy_field(_f, _msg, ...) \
if (dst._f != src._f) { \
pr_info(_msg ": got %llu, should be %llu, fixing" \
bch_err(c, _msg ": got %llu, should be %llu, fixing"\
, ##__VA_ARGS__, dst._f, src._f); \
dst._f = src._f; \
}
#define copy_stripe_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \
bch_err_ratelimited(c, "stripe %zu has wrong "_msg \
": got %u, should be %u, fixing", \
dst_iter.pos, ##__VA_ARGS__, \
dst->_f, src->_f); \
dst->_f = src->_f; \
}
#define copy_bucket_field(_f) \
if (dst->b[b].mark._f != src->b[b].mark._f) { \
pr_info("dev %u bucket %zu has wrong " #_f \
bch_err_ratelimited(c, "dev %u bucket %zu has wrong " #_f\
": got %u, should be %u, fixing", \
i, b, dst->b[b].mark._f, src->b[b].mark._f); \
dst->b[b]._mark._f = src->b[b].mark._f; \
@ -669,6 +582,36 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
goto out;
}
{
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
struct stripe *dst, *src;
unsigned i;
c->ec_stripes_heap.used = 0;
while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
(src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
copy_stripe_field(alive, "alive");
copy_stripe_field(sectors, "sectors");
copy_stripe_field(algorithm, "algorithm");
copy_stripe_field(nr_blocks, "nr_blocks");
copy_stripe_field(nr_redundant, "nr_redundant");
copy_stripe_field(blocks_nonempty.counter,
"blocks_nonempty");
for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++)
copy_stripe_field(block_sectors[i].counter,
"block_sectors[%u]", i);
if (dst->alive)
bch2_stripes_heap_insert(c, dst, dst_iter.pos);
genradix_iter_advance(&dst_iter, &c->stripes[0]);
genradix_iter_advance(&src_iter, &c->stripes[1]);
}
}
for_each_member_device(ca, c, i) {
struct bucket_array *dst = __bucket_array(ca, 0);
struct bucket_array *src = __bucket_array(ca, 1);
@ -753,10 +696,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
out:
percpu_up_write(&c->usage_lock);
#undef copy_field
#undef copy_fs_field
#undef copy_dev_field
#undef copy_bucket_field
#undef copy_stripe_field
#undef copy_field
}
static int bch2_gc_start(struct bch_fs *c)
@ -764,6 +708,12 @@ static int bch2_gc_start(struct bch_fs *c)
struct bch_dev *ca;
unsigned i;
/*
* indicate to stripe code that we need to allocate for the gc stripes
* radix tree, too
*/
gc_pos_set(c, gc_phase(GC_PHASE_START));
BUG_ON(c->usage[1]);
c->usage[1] = alloc_percpu(struct bch_fs_usage);
@ -805,7 +755,7 @@ static int bch2_gc_start(struct bch_fs *c)
percpu_up_write(&c->usage_lock);
return 0;
return bch2_ec_mem_alloc(c, true);
}
/**
@ -870,7 +820,7 @@ out:
bch2_gc_done(c, initial);
/* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_START));
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
bch2_gc_free(c);
up_write(&c->gc_lock);
@ -1110,7 +1060,6 @@ next:
/* Free the old nodes and update our sliding window */
for (i = 0; i < nr_old_nodes; i++) {
bch2_btree_node_free_inmem(c, old_nodes[i], iter);
six_unlock_intent(&old_nodes[i]->lock);
/*
* the index update might have triggered a split, in which case

View File

@ -3,8 +3,6 @@
#include "btree_types.h"
enum bkey_type;
void bch2_coalesce(struct bch_fs *);
int bch2_gc(struct bch_fs *, struct list_head *, bool);
void bch2_gc_thread_stop(struct bch_fs *);
@ -57,9 +55,9 @@ static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)
{
switch (id) {
#define DEF_BTREE_ID(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n;
DEFINE_BCH_BTREE_IDS()
#undef DEF_BTREE_ID
#define x(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n;
BCH_BTREE_IDS()
#undef x
default:
BUG();
}

View File

@ -1,6 +1,7 @@
#include "bcachefs.h"
#include "bkey_methods.h"
#include "bkey_sort.h"
#include "btree_cache.h"
#include "btree_io.h"
#include "btree_iter.h"
@ -19,40 +20,6 @@
#include <trace/events/bcachefs.h>
/* btree_node_iter_large: */
#define btree_node_iter_cmp_heap(h, _l, _r) btree_node_iter_cmp(b, _l, _r)
void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter,
struct btree *b,
const struct bkey_packed *k,
const struct bkey_packed *end)
{
if (k != end) {
struct btree_node_iter_set n =
((struct btree_node_iter_set) {
__btree_node_key_to_offset(b, k),
__btree_node_key_to_offset(b, end)
});
__heap_add(iter, n, btree_node_iter_cmp_heap, NULL);
}
}
void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *iter,
struct btree *b)
{
iter->data->k += __btree_node_offset_to_key(b, iter->data->k)->u64s;
EBUG_ON(!iter->used);
EBUG_ON(iter->data->k > iter->data->end);
if (iter->data->k == iter->data->end)
heap_del(iter, 0, btree_node_iter_cmp_heap, NULL);
else
heap_sift_down(iter, 0, btree_node_iter_cmp_heap, NULL);
}
static void verify_no_dups(struct btree *b,
struct bkey_packed *start,
struct bkey_packed *end)
@ -113,193 +80,6 @@ static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
return mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
}
typedef int (*sort_cmp_fn)(struct btree *,
struct bkey_packed *,
struct bkey_packed *);
struct sort_iter {
struct btree *b;
unsigned used;
struct sort_iter_set {
struct bkey_packed *k, *end;
} data[MAX_BSETS + 1];
};
static void sort_iter_init(struct sort_iter *iter, struct btree *b)
{
memset(iter, 0, sizeof(*iter));
iter->b = b;
}
static inline void __sort_iter_sift(struct sort_iter *iter,
unsigned from,
sort_cmp_fn cmp)
{
unsigned i;
for (i = from;
i + 1 < iter->used &&
cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
i++)
swap(iter->data[i], iter->data[i + 1]);
}
static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp)
{
__sort_iter_sift(iter, 0, cmp);
}
static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
{
unsigned i = iter->used;
while (i--)
__sort_iter_sift(iter, i, cmp);
}
static void sort_iter_add(struct sort_iter *iter,
struct bkey_packed *k,
struct bkey_packed *end)
{
BUG_ON(iter->used >= ARRAY_SIZE(iter->data));
if (k != end)
iter->data[iter->used++] = (struct sort_iter_set) { k, end };
}
static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
{
return iter->used ? iter->data->k : NULL;
}
static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
{
iter->data->k = bkey_next(iter->data->k);
BUG_ON(iter->data->k > iter->data->end);
if (iter->data->k == iter->data->end)
array_remove_item(iter->data, iter->used, 0);
else
sort_iter_sift(iter, cmp);
}
static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
sort_cmp_fn cmp)
{
struct bkey_packed *ret = sort_iter_peek(iter);
if (ret)
sort_iter_advance(iter, cmp);
return ret;
}
static inline int sort_key_whiteouts_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
{
return bkey_cmp_packed(b, l, r);
}
static unsigned sort_key_whiteouts(struct bkey_packed *dst,
struct sort_iter *iter)
{
struct bkey_packed *in, *out = dst;
sort_iter_sort(iter, sort_key_whiteouts_cmp);
while ((in = sort_iter_next(iter, sort_key_whiteouts_cmp))) {
bkey_copy(out, in);
out = bkey_next(out);
}
return (u64 *) out - (u64 *) dst;
}
static inline int sort_extent_whiteouts_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
{
struct bkey ul = bkey_unpack_key(b, l);
struct bkey ur = bkey_unpack_key(b, r);
return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur));
}
static unsigned sort_extent_whiteouts(struct bkey_packed *dst,
struct sort_iter *iter)
{
const struct bkey_format *f = &iter->b->format;
struct bkey_packed *in, *out = dst;
struct bkey_i l, r;
bool prev = false, l_packed = false;
u64 max_packed_size = bkey_field_max(f, BKEY_FIELD_SIZE);
u64 max_packed_offset = bkey_field_max(f, BKEY_FIELD_OFFSET);
u64 new_size;
max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX);
sort_iter_sort(iter, sort_extent_whiteouts_cmp);
while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
if (bkey_deleted(in))
continue;
EBUG_ON(bkeyp_val_u64s(f, in));
EBUG_ON(in->type != KEY_TYPE_DISCARD);
r.k = bkey_unpack_key(iter->b, in);
if (prev &&
bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) {
if (bkey_cmp(l.k.p, r.k.p) >= 0)
continue;
new_size = l_packed
? min(max_packed_size, max_packed_offset -
bkey_start_offset(&l.k))
: KEY_SIZE_MAX;
new_size = min(new_size, r.k.p.offset -
bkey_start_offset(&l.k));
BUG_ON(new_size < l.k.size);
bch2_key_resize(&l.k, new_size);
if (bkey_cmp(l.k.p, r.k.p) >= 0)
continue;
bch2_cut_front(l.k.p, &r);
}
if (prev) {
if (!bch2_bkey_pack(out, &l, f)) {
BUG_ON(l_packed);
bkey_copy(out, &l);
}
out = bkey_next(out);
}
l = r;
prev = true;
l_packed = bkey_packed(in);
}
if (prev) {
if (!bch2_bkey_pack(out, &l, f)) {
BUG_ON(l_packed);
bkey_copy(out, &l);
}
out = bkey_next(out);
}
return (u64 *) out - (u64 *) dst;
}
static unsigned should_compact_bset(struct btree *b, struct bset_tree *t,
bool compacting,
enum compact_mode mode)
@ -420,11 +200,10 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
BUG_ON((void *) unwritten_whiteouts_start(c, b) <
(void *) btree_bkey_last(b, bset_tree_last(b)));
u64s = btree_node_is_extents(b)
? sort_extent_whiteouts(unwritten_whiteouts_start(c, b),
&sort_iter)
: sort_key_whiteouts(unwritten_whiteouts_start(c, b),
&sort_iter);
u64s = (btree_node_is_extents(b)
? bch2_sort_extent_whiteouts
: bch2_sort_key_whiteouts)(unwritten_whiteouts_start(c, b),
&sort_iter);
BUG_ON(u64s > b->whiteout_u64s);
BUG_ON(u64s != b->whiteout_u64s && !btree_node_is_extents(b));
@ -499,87 +278,6 @@ static bool bch2_drop_whiteouts(struct btree *b)
return ret;
}
static inline int sort_keys_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
{
return bkey_cmp_packed(b, l, r) ?:
(int) bkey_whiteout(r) - (int) bkey_whiteout(l) ?:
(int) l->needs_whiteout - (int) r->needs_whiteout;
}
static unsigned sort_keys(struct bkey_packed *dst,
struct sort_iter *iter,
bool filter_whiteouts)
{
const struct bkey_format *f = &iter->b->format;
struct bkey_packed *in, *next, *out = dst;
sort_iter_sort(iter, sort_keys_cmp);
while ((in = sort_iter_next(iter, sort_keys_cmp))) {
if (bkey_whiteout(in) &&
(filter_whiteouts || !in->needs_whiteout))
continue;
if (bkey_whiteout(in) &&
(next = sort_iter_peek(iter)) &&
!bkey_cmp_packed(iter->b, in, next)) {
BUG_ON(in->needs_whiteout &&
next->needs_whiteout);
/*
* XXX racy, called with read lock from write path
*
* leads to spurious BUG_ON() in bkey_unpack_key() in
* debug mode
*/
next->needs_whiteout |= in->needs_whiteout;
continue;
}
if (bkey_whiteout(in)) {
memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
set_bkeyp_val_u64s(f, out, 0);
} else {
bkey_copy(out, in);
}
out = bkey_next(out);
}
return (u64 *) out - (u64 *) dst;
}
static inline int sort_extents_cmp(struct btree *b,
struct bkey_packed *l,
struct bkey_packed *r)
{
return bkey_cmp_packed(b, l, r) ?:
(int) bkey_deleted(l) - (int) bkey_deleted(r);
}
static unsigned sort_extents(struct bkey_packed *dst,
struct sort_iter *iter,
bool filter_whiteouts)
{
struct bkey_packed *in, *out = dst;
sort_iter_sort(iter, sort_extents_cmp);
while ((in = sort_iter_next(iter, sort_extents_cmp))) {
if (bkey_deleted(in))
continue;
if (bkey_whiteout(in) &&
(filter_whiteouts || !in->needs_whiteout))
continue;
bkey_copy(out, in);
out = bkey_next(out);
}
return (u64 *) out - (u64 *) dst;
}
static void btree_node_sort(struct bch_fs *c, struct btree *b,
struct btree_iter *iter,
unsigned start_idx,
@ -618,9 +316,11 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
if (btree_node_is_extents(b))
filter_whiteouts = bset_written(b, start_bset);
u64s = btree_node_is_extents(b)
? sort_extents(out->keys.start, &sort_iter, filter_whiteouts)
: sort_keys(out->keys.start, &sort_iter, filter_whiteouts);
u64s = (btree_node_is_extents(b)
? bch2_sort_extents
: bch2_sort_keys)(out->keys.start,
&sort_iter,
filter_whiteouts);
out->keys.u64s = cpu_to_le16(u64s);
@ -678,101 +378,6 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
bch2_verify_btree_nr_keys(b);
}
/* Sort + repack in a new format: */
static struct btree_nr_keys sort_repack(struct bset *dst,
struct btree *src,
struct btree_node_iter *src_iter,
struct bkey_format *out_f,
bool filter_whiteouts)
{
struct bkey_format *in_f = &src->format;
struct bkey_packed *in, *out = vstruct_last(dst);
struct btree_nr_keys nr;
memset(&nr, 0, sizeof(nr));
while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
if (filter_whiteouts && bkey_whiteout(in))
continue;
if (bch2_bkey_transform(out_f, out, bkey_packed(in)
? in_f : &bch2_bkey_format_current, in))
out->format = KEY_FORMAT_LOCAL_BTREE;
else
bch2_bkey_unpack(src, (void *) out, in);
btree_keys_account_key_add(&nr, 0, out);
out = bkey_next(out);
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
return nr;
}
/* Sort, repack, and merge: */
static struct btree_nr_keys sort_repack_merge(struct bch_fs *c,
struct bset *dst,
struct btree *src,
struct btree_node_iter *iter,
struct bkey_format *out_f,
bool filter_whiteouts,
key_filter_fn filter,
key_merge_fn merge)
{
struct bkey_packed *k, *prev = NULL, *out;
struct btree_nr_keys nr;
BKEY_PADDED(k) tmp;
memset(&nr, 0, sizeof(nr));
while ((k = bch2_btree_node_iter_next_all(iter, src))) {
if (filter_whiteouts && bkey_whiteout(k))
continue;
/*
* The filter might modify pointers, so we have to unpack the
* key and values to &tmp.k:
*/
bch2_bkey_unpack(src, &tmp.k, k);
if (filter && filter(c, src, bkey_i_to_s(&tmp.k)))
continue;
/* prev is always unpacked, for key merging: */
if (prev &&
merge &&
merge(c, src, (void *) prev, &tmp.k) == BCH_MERGE_MERGE)
continue;
/*
* the current key becomes the new prev: advance prev, then
* copy the current key - but first pack prev (in place):
*/
if (prev) {
bch2_bkey_pack(prev, (void *) prev, out_f);
btree_keys_account_key_add(&nr, 0, prev);
prev = bkey_next(prev);
} else {
prev = vstruct_last(dst);
}
bkey_copy(prev, &tmp.k);
}
if (prev) {
bch2_bkey_pack(prev, (void *) prev, out_f);
btree_keys_account_key_add(&nr, 0, prev);
out = bkey_next(prev);
} else {
out = vstruct_last(dst);
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
return nr;
}
void bch2_btree_sort_into(struct bch_fs *c,
struct btree *dst,
struct btree *src)
@ -787,16 +392,13 @@ void bch2_btree_sort_into(struct bch_fs *c,
bch2_btree_node_iter_init_from_start(&src_iter, src);
if (btree_node_ops(src)->key_normalize ||
btree_node_ops(src)->key_merge)
nr = sort_repack_merge(c, btree_bset_first(dst),
if (btree_node_is_extents(src))
nr = bch2_sort_repack_merge(c, btree_bset_first(dst),
src, &src_iter,
&dst->format,
true,
btree_node_ops(src)->key_normalize,
btree_node_ops(src)->key_merge);
true);
else
nr = sort_repack(btree_bset_first(dst),
nr = bch2_sort_repack(btree_bset_first(dst),
src, &src_iter,
&dst->format,
true);
@ -1000,8 +602,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
{
struct bkey_packed *k, *prev = NULL;
struct bpos prev_pos = POS_MIN;
enum bkey_type type = btree_node_type(b);
bool seen_non_whiteout = false;
unsigned version;
const char *err;
int ret = 0;
@ -1047,13 +649,12 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
"invalid bkey format: %s", err);
}
if (btree_err_on(le16_to_cpu(i->version) != BCACHE_BSET_VERSION,
BTREE_ERR_FIXABLE, c, b, i,
"unsupported bset version")) {
i->version = cpu_to_le16(BCACHE_BSET_VERSION);
i->u64s = 0;
return 0;
}
version = le16_to_cpu(i->version);
btree_err_on((version != BCH_BSET_VERSION_OLD &&
version < bcachefs_metadata_version_min) ||
version >= bcachefs_metadata_version_max,
BTREE_ERR_FATAL, c, b, i,
"unsupported bset version");
if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
BTREE_ERR_FIXABLE, c, b, i,
@ -1102,17 +703,21 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
}
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN)
bch2_bkey_swab(type, &b->format, k);
bch2_bkey_swab(&b->format, k);
if (!write &&
version < bcachefs_metadata_version_bkey_renumber)
bch2_bkey_renumber(btree_node_type(b), k, write);
u = bkey_disassemble(b, k, &tmp);
invalid = __bch2_bkey_invalid(c, type, u) ?:
invalid = __bch2_bkey_invalid(c, u, btree_node_type(b)) ?:
bch2_bkey_in_btree_node(b, u) ?:
(write ? bch2_bkey_val_invalid(c, type, u) : NULL);
(write ? bch2_bkey_val_invalid(c, u) : NULL);
if (invalid) {
char buf[160];
bch2_bkey_val_to_text(&PBUF(buf), c, type, u);
bch2_bkey_val_to_text(&PBUF(buf), c, u);
btree_err(BTREE_ERR_FIXABLE, c, b, i,
"invalid bkey:\n%s\n%s", invalid, buf);
@ -1122,6 +727,10 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
continue;
}
if (write &&
version < bcachefs_metadata_version_bkey_renumber)
bch2_bkey_renumber(btree_node_type(b), k, write);
/*
* with the separate whiteouts thing (used for extents), the
* second set of keys actually can have whiteouts too, so we
@ -1287,17 +896,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
i = &b->data->keys;
for (k = i->start; k != vstruct_last(i);) {
enum bkey_type type = btree_node_type(b);
struct bkey tmp;
struct bkey_s_c u = bkey_disassemble(b, k, &tmp);
const char *invalid = bch2_bkey_val_invalid(c, type, u);
const char *invalid = bch2_bkey_val_invalid(c, u);
if (invalid ||
(inject_invalid_keys(c) &&
!bversion_cmp(u.k->version, MAX_VERSION))) {
char buf[160];
bch2_bkey_val_to_text(&PBUF(buf), c, type, u);
bch2_bkey_val_to_text(&PBUF(buf), c, u);
btree_err(BTREE_ERR_FIXABLE, c, b, i,
"invalid bkey %s: %s", buf, invalid);
@ -1367,7 +975,9 @@ start:
bch2_mark_io_failure(&failed, &rb->pick);
can_retry = bch2_btree_pick_ptr(c, b, &failed, &rb->pick) > 0;
can_retry = bch2_bkey_pick_read_device(c,
bkey_i_to_s_c(&b->key),
&failed, &rb->pick) > 0;
if (!bio->bi_status &&
!bch2_btree_node_read_done(c, b, can_retry))
@ -1410,7 +1020,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
trace_btree_read(c, b);
ret = bch2_btree_pick_ptr(c, b, NULL, &pick);
ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
NULL, &pick);
if (bch2_fs_fatal_err_on(ret <= 0, c,
"btree node read error: no device to read from")) {
set_btree_node_read_error(b);
@ -1537,8 +1148,8 @@ static void bch2_btree_node_write_error(struct bch_fs *c,
{
struct btree *b = wbio->wbio.bio.bi_private;
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
struct bkey_i_extent *new_key;
struct bkey_s_extent e;
struct bkey_i_btree_ptr *new_key;
struct bkey_s_btree_ptr bp;
struct bch_extent_ptr *ptr;
struct btree_iter iter;
int ret;
@ -1562,13 +1173,13 @@ retry:
bkey_copy(&tmp.k, &b->key);
new_key = bkey_i_to_extent(&tmp.k);
e = extent_i_to_s(new_key);
new_key = bkey_i_to_btree_ptr(&tmp.k);
bp = btree_ptr_i_to_s(new_key);
bch2_extent_drop_ptrs(e, ptr,
bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr,
bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
if (!bch2_extent_nr_ptrs(e.c))
if (!bch2_bkey_nr_ptrs(bp.s_c))
goto err;
ret = bch2_btree_node_update_key(c, &iter, b, new_key);
@ -1671,12 +1282,11 @@ static void btree_node_write_endio(struct bio *bio)
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
struct bset *i, unsigned sectors)
{
const struct bch_extent_ptr *ptr;
unsigned whiteout_u64s = 0;
int ret;
extent_for_each_ptr(bkey_i_to_s_c_extent(&b->key), ptr)
break;
if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE))
return -1;
ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE, false);
if (ret)
@ -1694,7 +1304,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
struct btree_node *bn = NULL;
struct btree_node_entry *bne = NULL;
BKEY_PADDED(key) k;
struct bkey_s_extent e;
struct bch_extent_ptr *ptr;
struct sort_iter sort_iter;
struct nonce nonce;
@ -1702,6 +1311,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
u64 seq = 0;
bool used_mempool;
unsigned long old, new;
bool validate_before_checksum = false;
void *data;
if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
@ -1815,8 +1425,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
b->whiteout_u64s = 0;
u64s = btree_node_is_extents(b)
? sort_extents(vstruct_last(i), &sort_iter, false)
: sort_keys(i->start, &sort_iter, false);
? bch2_sort_extents(vstruct_last(i), &sort_iter, false)
: bch2_sort_keys(i->start, &sort_iter, false);
le16_add_cpu(&i->u64s, u64s);
clear_needs_whiteout(i);
@ -1835,11 +1445,21 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
BUG_ON(i->seq != b->data->keys.seq);
i->version = cpu_to_le16(BCACHE_BSET_VERSION);
i->version = c->sb.version < bcachefs_metadata_version_new_versioning
? cpu_to_le16(BCH_BSET_VERSION_OLD)
: cpu_to_le16(c->sb.version);
SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c));
if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)))
validate_before_checksum = true;
/* validate_bset will be modifying: */
if (le16_to_cpu(i->version) <
bcachefs_metadata_version_bkey_renumber)
validate_before_checksum = true;
/* if we're going to be encrypting, check metadata validity first: */
if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) &&
if (validate_before_checksum &&
validate_bset_for_write(c, b, i, sectors_to_write))
goto err;
@ -1853,7 +1473,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
/* if we're not encrypting, check metadata after checksumming: */
if (!bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) &&
if (!validate_before_checksum &&
validate_bset_for_write(c, b, i, sectors_to_write))
goto err;
@ -1907,9 +1527,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
*/
bkey_copy(&k.key, &b->key);
e = bkey_i_to_s_extent(&k.key);
extent_for_each_ptr(e, ptr)
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr)
ptr->offset += b->written;
b->written += sectors_to_write;

View File

@ -142,46 +142,4 @@ void bch2_btree_flush_all_writes(struct bch_fs *);
void bch2_btree_verify_flushed(struct bch_fs *);
ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *);
/* Sorting */
struct btree_node_iter_large {
u16 used;
struct btree_node_iter_set data[MAX_BSETS];
};
void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *,
struct btree *);
void bch2_btree_node_iter_large_push(struct btree_node_iter_large *,
struct btree *,
const struct bkey_packed *,
const struct bkey_packed *);
static inline bool bch2_btree_node_iter_large_end(struct btree_node_iter_large *iter)
{
return !iter->used;
}
static inline struct bkey_packed *
bch2_btree_node_iter_large_peek_all(struct btree_node_iter_large *iter,
struct btree *b)
{
return bch2_btree_node_iter_large_end(iter)
? NULL
: __btree_node_offset_to_key(b, iter->data->k);
}
static inline struct bkey_packed *
bch2_btree_node_iter_large_next_all(struct btree_node_iter_large *iter,
struct btree *b)
{
struct bkey_packed *ret = bch2_btree_node_iter_large_peek_all(iter, b);
if (ret)
bch2_btree_node_iter_large_advance(iter, b);
return ret;
}
#endif /* _BCACHEFS_BTREE_IO_H */

View File

@ -263,10 +263,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
/* Btree iterator locking: */
#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_btree_iter_verify_locks(struct btree_iter *iter)
void __bch2_btree_iter_verify_locks(struct btree_iter *iter)
{
unsigned l;
BUG_ON((iter->flags & BTREE_ITER_NOUNLOCK) &&
!btree_node_locked(iter, 0));
for (l = 0; btree_iter_node(iter, l); l++) {
if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
!btree_node_locked(iter, l))
@ -276,6 +279,15 @@ void bch2_btree_iter_verify_locks(struct btree_iter *iter)
btree_node_locked_type(iter, l));
}
}
void bch2_btree_iter_verify_locks(struct btree_iter *iter)
{
struct btree_iter *linked;
for_each_btree_iter(iter, linked)
__bch2_btree_iter_verify_locks(linked);
}
#endif
__flatten
@ -381,9 +393,9 @@ void __bch2_btree_iter_downgrade(struct btree_iter *iter,
break;
}
}
bch2_btree_iter_verify_locks(linked);
}
bch2_btree_iter_verify_locks(iter);
}
int bch2_btree_iter_unlock(struct btree_iter *iter)
@ -420,7 +432,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
* whiteouts)
*/
k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS
? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_DISCARD)
? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_discard)
: bch2_btree_node_iter_prev_all(&tmp, b);
if (k && btree_iter_pos_cmp(iter, b, k) > 0) {
char buf[100];
@ -609,7 +621,7 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
* signal to bch2_btree_iter_peek_slot() that we're currently at
* a hole
*/
u->type = KEY_TYPE_DELETED;
u->type = KEY_TYPE_deleted;
return bkey_s_c_null;
}
@ -775,9 +787,17 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
struct btree_iter *linked;
unsigned level = b->level;
/* caller now responsible for unlocking @b */
BUG_ON(iter->l[level].b != b);
BUG_ON(!btree_node_intent_locked(iter, level));
iter->l[level].b = BTREE_ITER_NOT_END;
mark_btree_node_unlocked(iter, level);
for_each_btree_iter(iter, linked)
if (linked->l[level].b == b) {
btree_node_unlock(linked, level);
__btree_node_unlock(linked, level);
linked->l[level].b = BTREE_ITER_NOT_END;
}
}

View File

@ -94,7 +94,7 @@ btree_lock_want(struct btree_iter *iter, int level)
return BTREE_NODE_UNLOCKED;
}
static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
static inline void __btree_node_unlock(struct btree_iter *iter, unsigned level)
{
int lock_type = btree_node_locked_type(iter, level);
@ -105,6 +105,13 @@ static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
mark_btree_node_unlocked(iter, level);
}
static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
{
BUG_ON(!level && iter->flags & BTREE_ITER_NOUNLOCK);
__btree_node_unlock(iter, level);
}
static inline void __bch2_btree_iter_unlock(struct btree_iter *iter)
{
btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);

View File

@ -191,6 +191,7 @@ enum btree_iter_type {
*/
#define BTREE_ITER_IS_EXTENTS (1 << 4)
#define BTREE_ITER_ERROR (1 << 5)
#define BTREE_ITER_NOUNLOCK (1 << 6)
enum btree_iter_uptodate {
BTREE_ITER_UPTODATE = 0,
@ -403,20 +404,45 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i)
return i - (void *) b->data;
}
/* Type of keys @b contains: */
static inline enum bkey_type btree_node_type(struct btree *b)
enum btree_node_type {
#define x(kwd, val, name) BKEY_TYPE_##kwd = val,
BCH_BTREE_IDS()
#undef x
BKEY_TYPE_BTREE,
};
/* Type of a key in btree @id at level @level: */
static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
{
return b->level ? BKEY_TYPE_BTREE : b->btree_id;
return level ? BKEY_TYPE_BTREE : (enum btree_node_type) id;
}
static inline const struct bkey_ops *btree_node_ops(struct btree *b)
/* Type of keys @b contains: */
static inline enum btree_node_type btree_node_type(struct btree *b)
{
return &bch2_bkey_ops[btree_node_type(b)];
return __btree_node_type(b->level, b->btree_id);
}
static inline bool btree_node_type_is_extents(enum btree_node_type type)
{
return type == BKEY_TYPE_EXTENTS;
}
static inline bool btree_node_is_extents(struct btree *b)
{
return btree_node_type(b) == BKEY_TYPE_EXTENTS;
return btree_node_type_is_extents(btree_node_type(b));
}
static inline bool btree_node_type_needs_gc(enum btree_node_type type)
{
switch (type) {
case BKEY_TYPE_BTREE:
case BKEY_TYPE_EXTENTS:
case BKEY_TYPE_EC:
return true;
default:
return false;
}
}
struct btree_root {

View File

@ -119,7 +119,7 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
__le64, unsigned);
int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
struct btree *, struct bkey_i_extent *);
struct btree *, struct bkey_i_btree_ptr *);
/* new transactional interface: */

View File

@ -131,13 +131,15 @@ bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
/* Btree node freeing/allocation: */
static bool btree_key_matches(struct bch_fs *c,
struct bkey_s_c_extent l,
struct bkey_s_c_extent r)
struct bkey_s_c l,
struct bkey_s_c r)
{
struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(l);
struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(r);
const struct bch_extent_ptr *ptr1, *ptr2;
extent_for_each_ptr(l, ptr1)
extent_for_each_ptr(r, ptr2)
bkey_for_each_ptr(ptrs1, ptr1)
bkey_for_each_ptr(ptrs2, ptr2)
if (ptr1->dev == ptr2->dev &&
ptr1->gen == ptr2->gen &&
ptr1->offset == ptr2->offset)
@ -159,17 +161,11 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
{
struct bch_fs *c = as->c;
struct pending_btree_node_free *d;
/*
* btree_update lock is only needed here to avoid racing with
* gc:
*/
mutex_lock(&c->btree_interior_update_lock);
struct gc_pos pos = { 0 };
for (d = as->pending; d < as->pending + as->nr_pending; d++)
if (!bkey_cmp(k.k->p, d->key.k.p) &&
btree_key_matches(c, bkey_s_c_to_extent(k),
bkey_i_to_s_c_extent(&d->key)))
btree_key_matches(c, k, bkey_i_to_s_c(&d->key)))
goto found;
BUG();
found:
@ -200,20 +196,11 @@ found:
if (gc_pos_cmp(c->gc_pos, b
? gc_pos_btree_node(b)
: gc_pos_btree_root(as->btree_id)) >= 0 &&
gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) {
struct gc_pos pos = { 0 };
bch2_mark_key(c, BKEY_TYPE_BTREE,
gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0)
bch2_mark_key_locked(c,
bkey_i_to_s_c(&d->key),
false, 0, pos,
NULL, 0, BCH_BUCKET_MARK_GC);
/*
* Don't apply tmp - pending deletes aren't tracked in
* bch_alloc_stats:
*/
}
mutex_unlock(&c->btree_interior_update_lock);
}
static void __btree_node_free(struct bch_fs *c, struct btree *b)
@ -256,6 +243,11 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
struct btree_iter *iter)
{
struct btree_iter *linked;
for_each_btree_iter(iter, linked)
BUG_ON(linked->l[b->level].b == b);
/*
* Is this a node that isn't reachable on disk yet?
*
@ -267,11 +259,10 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
*/
btree_update_drop_new_node(c, b);
__bch2_btree_node_lock_write(b, iter);
six_lock_write(&b->lock);
__btree_node_free(c, b);
six_unlock_write(&b->lock);
bch2_btree_iter_node_drop(iter, b);
six_unlock_intent(&b->lock);
}
static void bch2_btree_node_free_ondisk(struct bch_fs *c,
@ -279,8 +270,7 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
{
BUG_ON(!pending->index_update_done);
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&pending->key),
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
false, 0,
gc_phase(GC_PHASE_PENDING_DELETE),
NULL, 0, 0);
@ -294,7 +284,6 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
struct write_point *wp;
struct btree *b;
BKEY_PADDED(k) tmp;
struct bkey_i_extent *e;
struct open_buckets ob = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
unsigned nr_reserve;
@ -345,8 +334,8 @@ retry:
goto retry;
}
e = bkey_extent_init(&tmp.k);
bch2_alloc_sectors_append_ptrs(c, wp, e, c->opts.btree_node_size);
bkey_btree_ptr_init(&tmp.k);
bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size);
bch2_open_bucket_get(c, wp, &ob);
bch2_alloc_sectors_done(c, wp);
@ -384,7 +373,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
b->data->flags = 0;
SET_BTREE_NODE_ID(b->data, as->btree_id);
SET_BTREE_NODE_LEVEL(b->data, level);
b->data->ptr = bkey_i_to_extent(&b->key)->v.start->ptr;
b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0];
bch2_btree_build_aux_trees(b);
@ -537,8 +526,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
goto err_free;
}
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key));
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
if (ret)
goto err_free;
@ -1078,8 +1066,10 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
__bch2_btree_set_root_inmem(c, b);
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key),
mutex_lock(&c->btree_interior_update_lock);
percpu_down_read_preempt_disable(&c->usage_lock);
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
true, 0,
gc_pos_btree_root(b->btree_id),
&stats, 0, 0);
@ -1090,6 +1080,9 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
&stats);
bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
gc_pos_btree_root(b->btree_id));
percpu_up_read_preempt_enable(&c->usage_lock);
mutex_unlock(&c->btree_interior_update_lock);
}
static void bch2_btree_set_root_ondisk(struct bch_fs *c, struct btree *b, int rw)
@ -1166,11 +1159,12 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, b));
if (bkey_extent_is_data(&insert->k))
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(insert),
true, 0,
gc_pos_btree_node(b), &stats, 0, 0);
mutex_lock(&c->btree_interior_update_lock);
percpu_down_read_preempt_disable(&c->usage_lock);
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
true, 0,
gc_pos_btree_node(b), &stats, 0, 0);
while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
@ -1188,6 +1182,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
gc_pos_btree_node(b));
percpu_up_read_preempt_enable(&c->usage_lock);
mutex_unlock(&c->btree_interior_update_lock);
bch2_btree_bset_insert_key(iter, b, node_iter, insert);
set_btree_node_dirty(b);
set_btree_node_need_write(b);
@ -1420,25 +1417,19 @@ static void btree_split(struct btree_update *as, struct btree *b,
if (n3)
bch2_open_buckets_put(c, &n3->ob);
/*
* Note - at this point other linked iterators could still have @b read
* locked; we're depending on the bch2_btree_iter_node_replace() calls
* below removing all references to @b so we don't return with other
* iterators pointing to a node they have locked that's been freed.
*
* We have to free the node first because the bch2_iter_node_replace()
* calls will drop _our_ iterator's reference - and intent lock - to @b.
*/
bch2_btree_node_free_inmem(c, b, iter);
/* Successful split, update the iterator to point to the new nodes: */
bch2_btree_iter_node_drop(iter, b);
if (n3)
bch2_btree_iter_node_replace(iter, n3);
if (n2)
bch2_btree_iter_node_replace(iter, n2);
bch2_btree_iter_node_replace(iter, n1);
bch2_btree_node_free_inmem(c, b, iter);
bch2_btree_iter_verify_locks(iter);
bch2_time_stats_update(&c->times[BCH_TIME_btree_split], start_time);
}
@ -1734,17 +1725,21 @@ retry:
bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
bch2_open_buckets_put(c, &n->ob);
bch2_btree_node_free_inmem(c, b, iter);
bch2_btree_node_free_inmem(c, m, iter);
bch2_btree_iter_node_drop(iter, b);
bch2_btree_iter_node_replace(iter, n);
bch2_btree_iter_verify(iter, n);
bch2_btree_node_free_inmem(c, b, iter);
bch2_btree_node_free_inmem(c, m, iter);
bch2_btree_update_done(as);
six_unlock_intent(&m->lock);
up_read(&c->gc_lock);
out:
bch2_btree_iter_verify_locks(iter);
/*
* Don't downgrade locks here: we're called after successful insert,
* and the caller will downgrade locks after a successful insert
@ -1827,9 +1822,9 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
bch2_open_buckets_put(c, &n->ob);
bch2_btree_node_free_inmem(c, b, iter);
bch2_btree_iter_node_drop(iter, b);
bch2_btree_iter_node_replace(iter, n);
bch2_btree_node_free_inmem(c, b, iter);
bch2_btree_update_done(as);
return 0;
@ -1892,7 +1887,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
struct btree_update *as,
struct btree_iter *iter,
struct btree *b, struct btree *new_hash,
struct bkey_i_extent *new_key)
struct bkey_i_btree_ptr *new_key)
{
struct btree *parent;
int ret;
@ -1955,8 +1950,10 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
bch2_btree_node_lock_write(b, iter);
bch2_mark_key(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&new_key->k_i),
mutex_lock(&c->btree_interior_update_lock);
percpu_down_read_preempt_disable(&c->usage_lock);
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
true, 0,
gc_pos_btree_root(b->btree_id),
&stats, 0, 0);
@ -1966,6 +1963,9 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
gc_pos_btree_root(b->btree_id));
percpu_up_read_preempt_enable(&c->usage_lock);
mutex_unlock(&c->btree_interior_update_lock);
if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
mutex_lock(&c->btree_cache.lock);
bch2_btree_node_hash_remove(&c->btree_cache, b);
@ -1986,7 +1986,8 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
}
int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
struct btree *b, struct bkey_i_extent *new_key)
struct btree *b,
struct bkey_i_btree_ptr *new_key)
{
struct btree *parent = btree_node_parent(iter, b);
struct btree_update *as = NULL;
@ -2052,8 +2053,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
goto err;
}
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
extent_i_to_s_c(new_key).s_c);
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&new_key->k_i));
if (ret)
goto err_free_update;
@ -2111,9 +2111,9 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
b->level = 0;
b->btree_id = id;
bkey_extent_init(&b->key);
bkey_btree_ptr_init(&b->key);
b->key.k.p = POS_MAX;
bkey_i_to_extent(&b->key)->v._data[0] = U64_MAX - id;
PTR_HASH(&b->key) = U64_MAX - id;
bch2_bset_init_first(b, &b->data->keys);
bch2_btree_build_aux_trees(b);

View File

@ -70,7 +70,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
goto overwrite;
}
k->type = KEY_TYPE_DELETED;
k->type = KEY_TYPE_deleted;
bch2_btree_node_iter_fix(iter, b, node_iter, k,
k->u64s, k->u64s);
bch2_btree_iter_verify(iter, b);
@ -186,7 +186,6 @@ bch2_insert_fixup_key(struct btree_insert *trans,
insert->k))
bch2_btree_journal_key(trans, iter, insert->k);
trans->did_work = true;
return BTREE_INSERT_OK;
}
@ -312,7 +311,6 @@ btree_key_can_insert(struct btree_insert *trans,
return BTREE_INSERT_BTREE_NODE_FULL;
if (!bch2_bkey_replicas_marked(c,
insert->iter->btree_id,
bkey_i_to_s_c(insert->k),
true))
return BTREE_INSERT_NEED_MARK_REPLICAS;
@ -337,6 +335,7 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
struct btree_iter *linked;
unsigned u64s;
int ret;
@ -414,12 +413,25 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
i->k->k.version = MAX_VERSION;
}
if (trans->flags & BTREE_INSERT_NOUNLOCK) {
/*
* linked iterators that weren't being updated may or may not
* have been traversed/locked, depending on what the caller was
* doing:
*/
for_each_btree_iter(trans->entries[0].iter, linked)
if (linked->uptodate < BTREE_ITER_NEED_RELOCK)
linked->flags |= BTREE_ITER_NOUNLOCK;
}
trans->did_work = true;
trans_for_each_entry(trans, i) {
switch (btree_insert_key_leaf(trans, i)) {
case BTREE_INSERT_OK:
break;
case BTREE_INSERT_NEED_TRAVERSE:
BUG_ON((trans->flags & BTREE_INSERT_ATOMIC));
BUG_ON((trans->flags &
(BTREE_INSERT_ATOMIC|BTREE_INSERT_NOUNLOCK)));
ret = -EINTR;
goto out;
default:
@ -440,8 +452,8 @@ static inline void btree_insert_entry_checks(struct bch_fs *c,
BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
BUG_ON(debug_check_bkeys(c) &&
!bkey_deleted(&i->k->k) &&
bch2_bkey_invalid(c, i->iter->btree_id,
bkey_i_to_s_c(i->k)));
bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
i->iter->btree_id));
}
/**
@ -465,8 +477,7 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
BUG_ON(!trans->nr);
for_each_btree_iter(trans->entries[0].iter, linked)
bch2_btree_iter_verify_locks(linked);
bch2_btree_iter_verify_locks(trans->entries[0].iter);
/* for the sake of sanity: */
BUG_ON(trans->nr > 1 && !(trans->flags & BTREE_INSERT_ATOMIC));
@ -508,15 +519,11 @@ retry:
out:
percpu_ref_put(&c->writes);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
/* make sure we didn't drop or screw up locks: */
for_each_btree_iter(trans->entries[0].iter, linked) {
bch2_btree_iter_verify_locks(linked);
BUG_ON((trans->flags & BTREE_INSERT_NOUNLOCK) &&
trans->did_work &&
!btree_node_locked(linked, 0));
}
}
/* make sure we didn't drop or screw up locks: */
bch2_btree_iter_verify_locks(trans->entries[0].iter);
for_each_btree_iter(trans->entries[0].iter, linked)
linked->flags &= ~BTREE_ITER_NOUNLOCK;
BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR);
@ -581,8 +588,7 @@ err:
}
bch2_btree_iter_unlock(trans->entries[0].iter);
ret = bch2_mark_bkey_replicas(c, i->iter->btree_id,
bkey_i_to_s_c(i->k))
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k))
?: -EINTR;
break;
default:

View File

@ -302,7 +302,7 @@ static inline int is_fragmented_bucket(struct bucket_mark m,
static inline enum bch_data_type bucket_type(struct bucket_mark m)
{
return m.cached_sectors && !m.dirty_sectors
? BCH_DATA_CACHED
? BCH_DATA_CACHED
: m.data_type;
}
@ -322,6 +322,8 @@ void bch2_fs_usage_apply(struct bch_fs *c,
s64 added = sum.data + sum.reserved;
s64 should_not_have_added;
percpu_rwsem_assert_held(&c->usage_lock);
/*
* Not allowed to reduce sectors_available except by getting a
* reservation:
@ -338,7 +340,6 @@ void bch2_fs_usage_apply(struct bch_fs *c,
stats->online_reserved -= added;
}
percpu_down_read_preempt_disable(&c->usage_lock);
/* online_reserved not subject to gc: */
this_cpu_ptr(c->usage[0])->online_reserved +=
stats->online_reserved;
@ -350,7 +351,6 @@ void bch2_fs_usage_apply(struct bch_fs *c,
bch2_usage_add(this_cpu_ptr(c->usage[1]), stats);
bch2_fs_stats_verify(c);
percpu_up_read_preempt_enable(&c->usage_lock);
memset(stats, 0, sizeof(*stats));
}
@ -372,14 +372,14 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
dev_usage = this_cpu_ptr(ca->usage[gc]);
if (bucket_type(old) != bucket_type(new)) {
if (bucket_type(old)) {
fs_usage->buckets[bucket_type(old)] -= ca->mi.bucket_size;
dev_usage->buckets[bucket_type(old)]--;
} else {
fs_usage->buckets[bucket_type(new)] += ca->mi.bucket_size;
dev_usage->buckets[bucket_type(new)]++;
}
if (bucket_type(old)) {
fs_usage->buckets[bucket_type(old)] -= ca->mi.bucket_size;
dev_usage->buckets[bucket_type(old)]--;
}
if (bucket_type(new)) {
fs_usage->buckets[bucket_type(new)] += ca->mi.bucket_size;
dev_usage->buckets[bucket_type(new)]++;
}
dev_usage->buckets_alloc +=
@ -402,11 +402,28 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_dev_stats_verify(ca);
}
#define bucket_data_cmpxchg(c, ca, stats, g, new, expr) \
void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
{
struct bucket_mark old = { .v.counter = 0 };
struct bch_fs_usage *fs_usage;
struct bucket_array *buckets;
struct bucket *g;
percpu_down_read_preempt_disable(&c->usage_lock);
fs_usage = this_cpu_ptr(c->usage[0]);
buckets = bucket_array(ca);
for_each_bucket(g, buckets)
if (g->mark.data_type)
bch2_dev_usage_update(c, ca, fs_usage, old, g->mark, false);
percpu_up_read_preempt_enable(&c->usage_lock);
}
#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \
({ \
struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
\
bch2_dev_usage_update(c, ca, stats, _old, new, gc); \
bch2_dev_usage_update(c, ca, fs_usage, _old, new, gc); \
_old; \
})
@ -486,12 +503,12 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
{
struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]);
struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark old, new;
struct bucket_mark new;
BUG_ON(type != BCH_DATA_SB &&
type != BCH_DATA_JOURNAL);
old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
new.data_type = type;
checked_add(new.dirty_sectors, sectors);
}));
@ -542,7 +559,7 @@ static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors)
crc.uncompressed_size));
}
static s64 ptr_disk_sectors(struct bkey_s_c_extent e,
static s64 ptr_disk_sectors(const struct bkey *k,
struct extent_ptr_decoded p,
s64 sectors)
{
@ -554,8 +571,8 @@ static s64 ptr_disk_sectors(struct bkey_s_c_extent e,
old_sectors = 0;
new_sectors = sectors;
} else {
old_sectors = e.k->size;
new_sectors = e.k->size + sectors;
old_sectors = k->size;
new_sectors = k->size + sectors;
}
sectors = -__disk_sectors(p.crc, old_sectors)
@ -571,7 +588,6 @@ static s64 ptr_disk_sectors(struct bkey_s_c_extent e,
* that with the gc pos seqlock held.
*/
static void bch2_mark_pointer(struct bch_fs *c,
struct bkey_s_c_extent e,
struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type,
struct bch_fs_usage *fs_usage,
@ -630,23 +646,25 @@ static void bch2_mark_pointer(struct bch_fs *c,
BUG_ON(!gc && bucket_became_unavailable(old, new));
}
static void bch2_mark_stripe_ptr(struct bch_fs *c,
struct bch_extent_stripe_ptr p,
s64 sectors, unsigned flags,
s64 *adjusted_disk_sectors,
unsigned *redundancy)
static int bch2_mark_stripe_ptr(struct bch_fs *c,
struct bch_extent_stripe_ptr p,
s64 sectors, unsigned flags,
s64 *adjusted_disk_sectors,
unsigned *redundancy,
bool gc)
{
struct ec_stripe *m;
struct stripe *m;
unsigned old, new, nr_data;
int blocks_nonempty_delta;
s64 parity_sectors;
m = genradix_ptr(&c->ec_stripes, p.idx);
if (WARN_ON(!m))
return;
m = genradix_ptr(&c->stripes[gc], p.idx);
if (WARN_ON(!m->alive))
return;
if (!m || !m->alive) {
bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
(u64) p.idx);
return -1;
}
nr_data = m->nr_blocks - m->nr_redundant;
@ -664,81 +682,74 @@ static void bch2_mark_stripe_ptr(struct bch_fs *c,
blocks_nonempty_delta = (int) !!new - (int) !!old;
if (!blocks_nonempty_delta)
return;
return 0;
atomic_add(blocks_nonempty_delta, &m->blocks_nonempty);
BUG_ON(atomic_read(&m->blocks_nonempty) < 0);
bch2_stripes_heap_update(c, m, p.idx);
if (!gc)
bch2_stripes_heap_update(c, m, p.idx);
return 0;
}
static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
s64 sectors, enum bch_data_type data_type,
struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags,
bool gc)
static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
s64 sectors, enum bch_data_type data_type,
struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags,
bool gc)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
s64 cached_sectors = 0;
s64 dirty_sectors = 0;
s64 ec_sectors = 0;
unsigned replicas = 0;
unsigned ec_redundancy = 0;
unsigned i;
int ret;
BUG_ON(!sectors);
switch (k.k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
s64 cached_sectors = 0;
s64 dirty_sectors = 0;
s64 ec_sectors = 0;
unsigned replicas = 0;
unsigned ec_redundancy = 0;
unsigned i;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
s64 disk_sectors = ptr_disk_sectors(k.k, p, sectors);
s64 adjusted_disk_sectors = disk_sectors;
extent_for_each_ptr_decode(e, p, entry) {
s64 disk_sectors = ptr_disk_sectors(e, p, sectors);
s64 adjusted_disk_sectors = disk_sectors;
bch2_mark_pointer(c, p, disk_sectors, data_type,
stats, journal_seq, flags, gc);
bch2_mark_pointer(c, e, p, disk_sectors, data_type,
stats, journal_seq, flags, gc);
if (!p.ptr.cached)
for (i = 0; i < p.ec_nr; i++) {
ret = bch2_mark_stripe_ptr(c, p.ec[i],
disk_sectors, flags,
&adjusted_disk_sectors,
&ec_redundancy, gc);
if (ret)
return ret;
}
if (!p.ptr.cached)
replicas++;
if (!p.ptr.cached)
for (i = 0; i < p.ec_nr; i++)
bch2_mark_stripe_ptr(c, p.ec[i],
disk_sectors, flags,
&adjusted_disk_sectors,
&ec_redundancy);
if (!p.ptr.cached)
replicas++;
if (p.ptr.cached)
cached_sectors += adjusted_disk_sectors;
else if (!p.ec_nr)
dirty_sectors += adjusted_disk_sectors;
else
ec_sectors += adjusted_disk_sectors;
}
replicas = clamp_t(unsigned, replicas,
1, ARRAY_SIZE(stats->replicas));
ec_redundancy = clamp_t(unsigned, ec_redundancy,
1, ARRAY_SIZE(stats->replicas));
stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors;
stats->replicas[replicas - 1].data[data_type] += dirty_sectors;
stats->replicas[ec_redundancy - 1].ec_data += ec_sectors;
break;
if (p.ptr.cached)
cached_sectors += adjusted_disk_sectors;
else if (!p.ec_nr)
dirty_sectors += adjusted_disk_sectors;
else
ec_sectors += adjusted_disk_sectors;
}
case BCH_RESERVATION: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
sectors *= replicas;
replicas = clamp_t(unsigned, replicas,
1, ARRAY_SIZE(stats->replicas));
replicas = clamp_t(unsigned, replicas,
1, ARRAY_SIZE(stats->replicas));
ec_redundancy = clamp_t(unsigned, ec_redundancy,
1, ARRAY_SIZE(stats->replicas));
stats->replicas[replicas - 1].persistent_reserved += sectors;
break;
}
}
stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors;
stats->replicas[replicas - 1].data[data_type] += dirty_sectors;
stats->replicas[ec_redundancy - 1].ec_data += ec_sectors;
return 0;
}
static void bucket_set_stripe(struct bch_fs *c,
@ -759,7 +770,7 @@ static void bucket_set_stripe(struct bch_fs *c,
BUG_ON(ptr_stale(ca, ptr));
old = bucket_cmpxchg(g, new, ({
old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
new.stripe = enabled;
if (journal_seq) {
new.journal_seq_valid = 1;
@ -768,103 +779,143 @@ static void bucket_set_stripe(struct bch_fs *c,
}));
BUG_ON(old.stripe == enabled);
bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
}
}
static void bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
bool inserting,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags,
bool gc)
static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
bool inserting,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags,
bool gc)
{
switch (k.k->type) {
case BCH_STRIPE: {
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
size_t idx = s.k->p.offset;
struct ec_stripe *m = genradix_ptr(&c->ec_stripes, idx);
unsigned i;
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
size_t idx = s.k->p.offset;
struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
unsigned i;
BUG_ON(!m);
BUG_ON(m->alive == inserting);
if (!m || (!inserting && !m->alive)) {
bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
idx);
return -1;
}
BUG_ON(atomic_read(&m->blocks_nonempty));
if (inserting && m->alive) {
bch_err_ratelimited(c, "error marking stripe %zu: already exists",
idx);
return -1;
}
for (i = 0; i < EC_STRIPE_MAX; i++)
BUG_ON(atomic_read(&m->block_sectors[i]));
BUG_ON(atomic_read(&m->blocks_nonempty));
if (inserting) {
m->sectors = le16_to_cpu(s.v->sectors);
m->algorithm = s.v->algorithm;
m->nr_blocks = s.v->nr_blocks;
m->nr_redundant = s.v->nr_redundant;
}
for (i = 0; i < EC_STRIPE_MAX; i++)
BUG_ON(atomic_read(&m->block_sectors[i]));
if (inserting) {
m->sectors = le16_to_cpu(s.v->sectors);
m->algorithm = s.v->algorithm;
m->nr_blocks = s.v->nr_blocks;
m->nr_redundant = s.v->nr_redundant;
}
if (!gc) {
if (inserting)
bch2_stripes_heap_insert(c, m, idx);
else
bch2_stripes_heap_del(c, m, idx);
} else {
m->alive = inserting;
}
bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc);
break;
}
}
bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc);
return 0;
}
static void __bch2_mark_key(struct bch_fs *c,
enum bkey_type type, struct bkey_s_c k,
bool inserting, s64 sectors,
struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags,
bool gc)
static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
bool inserting, s64 sectors,
struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags,
bool gc)
{
switch (type) {
case BKEY_TYPE_BTREE:
bch2_mark_extent(c, k, inserting
? c->opts.btree_node_size
: -c->opts.btree_node_size,
BCH_DATA_BTREE,
stats, journal_seq, flags, gc);
int ret = 0;
switch (k.k->type) {
case KEY_TYPE_btree_ptr:
ret = bch2_mark_extent(c, k, inserting
? c->opts.btree_node_size
: -c->opts.btree_node_size,
BCH_DATA_BTREE,
stats, journal_seq, flags, gc);
break;
case BKEY_TYPE_EXTENTS:
bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
stats, journal_seq, flags, gc);
case KEY_TYPE_extent:
ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
stats, journal_seq, flags, gc);
break;
case BKEY_TYPE_EC:
bch2_mark_stripe(c, k, inserting,
stats, journal_seq, flags, gc);
case KEY_TYPE_stripe:
ret = bch2_mark_stripe(c, k, inserting,
stats, journal_seq, flags, gc);
break;
case KEY_TYPE_reservation: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
sectors *= replicas;
replicas = clamp_t(unsigned, replicas,
1, ARRAY_SIZE(stats->replicas));
stats->replicas[replicas - 1].persistent_reserved += sectors;
break;
}
default:
break;
}
return ret;
}
void bch2_mark_key(struct bch_fs *c,
enum bkey_type type, struct bkey_s_c k,
int bch2_mark_key_locked(struct bch_fs *c,
struct bkey_s_c k,
bool inserting, s64 sectors,
struct gc_pos pos,
struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags)
{
percpu_down_read_preempt_disable(&c->usage_lock);
int ret;
if (!(flags & BCH_BUCKET_MARK_GC)) {
if (!stats)
stats = this_cpu_ptr(c->usage[0]);
__bch2_mark_key(c, type, k, inserting, sectors,
stats, journal_seq, flags, false);
ret = __bch2_mark_key(c, k, inserting, sectors,
stats, journal_seq, flags, false);
if (ret)
return ret;
}
if ((flags & BCH_BUCKET_MARK_GC) ||
gc_visited(c, pos)) {
__bch2_mark_key(c, type, k, inserting, sectors,
this_cpu_ptr(c->usage[1]),
journal_seq, flags, true);
ret = __bch2_mark_key(c, k, inserting, sectors,
this_cpu_ptr(c->usage[1]),
journal_seq, flags, true);
if (ret)
return ret;
}
return 0;
}
int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
bool inserting, s64 sectors,
struct gc_pos pos,
struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags)
{
int ret;
percpu_down_read_preempt_disable(&c->usage_lock);
ret = bch2_mark_key_locked(c, k, inserting, sectors,
pos, stats, journal_seq, flags);
percpu_up_read_preempt_enable(&c->usage_lock);
return ret;
}
void bch2_mark_update(struct btree_insert *trans,
@ -878,15 +929,19 @@ void bch2_mark_update(struct btree_insert *trans,
struct gc_pos pos = gc_pos_btree_node(b);
struct bkey_packed *_k;
if (!btree_node_type_needs_gc(iter->btree_id))
return;
percpu_down_read_preempt_disable(&c->usage_lock);
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
bch2_mark_key(c, btree_node_type(b), bkey_i_to_s_c(insert->k),
true,
bpos_min(insert->k->k.p, b->key.k.p).offset -
bkey_start_offset(&insert->k->k),
pos, &stats, trans->journal_res.seq, 0);
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
bpos_min(insert->k->k.p, b->key.k.p).offset -
bkey_start_offset(&insert->k->k),
pos, &stats, trans->journal_res.seq, 0);
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
KEY_TYPE_DISCARD))) {
KEY_TYPE_discard))) {
struct bkey unpacked;
struct bkey_s_c k;
s64 sectors = 0;
@ -915,9 +970,8 @@ void bch2_mark_update(struct btree_insert *trans,
sectors = k.k->p.offset - insert->k->k.p.offset;
BUG_ON(sectors <= 0);
bch2_mark_key(c, btree_node_type(b), k,
true, sectors,
pos, &stats, trans->journal_res.seq, 0);
bch2_mark_key_locked(c, k, true, sectors,
pos, &stats, trans->journal_res.seq, 0);
sectors = bkey_start_offset(&insert->k->k) -
k.k->p.offset;
@ -927,14 +981,15 @@ void bch2_mark_update(struct btree_insert *trans,
BUG_ON(sectors >= 0);
}
bch2_mark_key(c, btree_node_type(b), k,
false, sectors,
pos, &stats, trans->journal_res.seq, 0);
bch2_mark_key_locked(c, k, false, sectors,
pos, &stats, trans->journal_res.seq, 0);
bch2_btree_node_iter_advance(&node_iter, b);
}
bch2_fs_usage_apply(c, &stats, trans->disk_res, pos);
percpu_up_read_preempt_enable(&c->usage_lock);
}
/* Disk reservations: */

View File

@ -219,9 +219,12 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
#define BCH_BUCKET_MARK_NOATOMIC (1 << 0)
#define BCH_BUCKET_MARK_GC (1 << 1)
void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned);
int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned);
int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned);
void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *);
void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);

View File

@ -55,7 +55,8 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
v->btree_id = b->btree_id;
bch2_btree_keys_init(v, &c->expensive_debug_checks);
if (bch2_btree_pick_ptr(c, b, NULL, &pick) <= 0)
if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
NULL, &pick) <= 0)
return;
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
@ -222,8 +223,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
k = bch2_btree_iter_peek(&iter);
while (k.k && !(err = btree_iter_err(k))) {
bch2_bkey_val_to_text(&PBUF(i->buf), i->c,
bkey_type(0, i->id), k);
bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k);
i->bytes = strlen(i->buf);
BUG_ON(i->bytes >= PAGE_SIZE);
i->buf[i->bytes] = '\n';

View File

@ -64,8 +64,7 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
const struct bch_hash_desc bch2_dirent_hash_desc = {
.btree_id = BTREE_ID_DIRENTS,
.key_type = BCH_DIRENT,
.whiteout_type = BCH_DIRENT_WHITEOUT,
.key_type = KEY_TYPE_dirent,
.hash_key = dirent_hash_key,
.hash_bkey = dirent_hash_bkey,
.cmp_key = dirent_cmp_key,
@ -74,58 +73,37 @@ const struct bch_hash_desc bch2_dirent_hash_desc = {
const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_s_c_dirent d;
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
unsigned len;
switch (k.k->type) {
case BCH_DIRENT:
if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
return "value too small";
if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
return "value too small";
d = bkey_s_c_to_dirent(k);
len = bch2_dirent_name_bytes(d);
len = bch2_dirent_name_bytes(d);
if (!len)
return "empty name";
if (!len)
return "empty name";
/*
* older versions of bcachefs were buggy and creating dirent
* keys that were bigger than necessary:
*/
if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7))
return "value too big";
/*
* older versions of bcachefs were buggy and creating dirent
* keys that were bigger than necessary:
*/
if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7))
return "value too big";
if (len > BCH_NAME_MAX)
return "dirent name too big";
if (len > BCH_NAME_MAX)
return "dirent name too big";
return NULL;
case BCH_DIRENT_WHITEOUT:
return bkey_val_bytes(k.k) != 0
? "value size should be zero"
: NULL;
default:
return "invalid type";
}
return NULL;
}
void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
struct bkey_s_c_dirent d;
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
switch (k.k->type) {
case BCH_DIRENT:
d = bkey_s_c_to_dirent(k);
bch_scnmemcpy(out, d.v->d_name,
bch2_dirent_name_bytes(d));
pr_buf(out, " -> %llu", d.v->d_inum);
break;
case BCH_DIRENT_WHITEOUT:
pr_buf(out, "whiteout");
break;
}
bch_scnmemcpy(out, d.v->d_name,
bch2_dirent_name_bytes(d));
pr_buf(out, " -> %llu", d.v->d_inum);
}
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
@ -286,7 +264,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
* overwrite old_dst - just make sure to use a
* whiteout when deleting src:
*/
new_src->k.type = BCH_DIRENT_WHITEOUT;
new_src->k.type = KEY_TYPE_whiteout;
}
} else {
/* Check if we need a whiteout to delete src: */
@ -297,7 +275,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
return ret;
if (ret)
new_src->k.type = BCH_DIRENT_WHITEOUT;
new_src->k.type = KEY_TYPE_whiteout;
}
}
@ -360,7 +338,7 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
if (k.k->p.inode > dir_inum)
break;
if (k.k->type == BCH_DIRENT) {
if (k.k->type == KEY_TYPE_dirent) {
ret = -ENOTEMPTY;
break;
}
@ -384,7 +362,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file,
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
POS(inode->v.i_ino, ctx->pos), 0, k) {
if (k.k->type != BCH_DIRENT)
if (k.k->type != KEY_TYPE_dirent)
continue;
dirent = bkey_s_c_to_dirent(k);

View File

@ -8,7 +8,7 @@ extern const struct bch_hash_desc bch2_dirent_hash_desc;
const char *bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_dirent_ops (struct bkey_ops) { \
#define bch2_bkey_ops_dirent (struct bkey_ops) { \
.key_invalid = bch2_dirent_invalid, \
.val_to_text = bch2_dirent_to_text, \
}

View File

@ -122,49 +122,39 @@ static void *stripe_csum(struct bch_stripe *s, unsigned dev, unsigned csum_idx)
return csums + (dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes;
}
const char *bch2_ec_key_invalid(const struct bch_fs *c, struct bkey_s_c k)
const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
if (k.k->p.inode)
return "invalid stripe key";
switch (k.k->type) {
case BCH_STRIPE: {
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
if (bkey_val_bytes(k.k) < sizeof(*s))
return "incorrect value size";
if (bkey_val_bytes(k.k) < sizeof(*s))
return "incorrect value size";
if (bkey_val_u64s(k.k) != stripe_val_u64s(s))
return "incorrect value size";
if (bkey_val_u64s(k.k) != stripe_val_u64s(s))
return "incorrect value size";
return NULL;
}
default:
return "invalid type";
}
return NULL;
}
void bch2_ec_key_to_text(struct printbuf *out, struct bch_fs *c,
void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
switch (k.k->type) {
case BCH_STRIPE: {
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
unsigned i;
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
unsigned i;
pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
s->algorithm,
le16_to_cpu(s->sectors),
s->nr_blocks - s->nr_redundant,
s->nr_redundant,
s->csum_type,
1U << s->csum_granularity_bits);
pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
s->algorithm,
le16_to_cpu(s->sectors),
s->nr_blocks - s->nr_redundant,
s->nr_redundant,
s->csum_type,
1U << s->csum_granularity_bits);
for (i = 0; i < s->nr_blocks; i++)
pr_buf(out, " %u:%llu", s->ptrs[i].dev,
(u64) s->ptrs[i].offset);
}
}
for (i = 0; i < s->nr_blocks; i++)
pr_buf(out, " %u:%llu", s->ptrs[i].dev,
(u64) s->ptrs[i].offset);
}
static int ptr_matches_stripe(struct bch_fs *c,
@ -453,7 +443,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
POS(0, stripe_idx),
BTREE_ITER_SLOTS);
k = bch2_btree_iter_peek_slot(&iter);
if (btree_iter_err(k) || k.k->type != BCH_STRIPE) {
if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) {
__bcache_io_error(c,
"error doing reconstruct read: stripe not found");
kfree(buf);
@ -529,7 +519,7 @@ err:
return ret;
}
/* ec_stripe bucket accounting: */
/* stripe bucket accounting: */
static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
{
@ -550,7 +540,11 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
free_heap(&n);
}
if (!genradix_ptr_alloc(&c->ec_stripes, idx, gfp))
if (!genradix_ptr_alloc(&c->stripes[0], idx, gfp))
return -ENOMEM;
if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING &&
!genradix_ptr_alloc(&c->stripes[1], idx, gfp))
return -ENOMEM;
return 0;
@ -591,27 +585,26 @@ static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
{
struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap);
genradix_ptr(&c->ec_stripes, h->data[i].idx)->heap_idx = i;
genradix_ptr(&c->stripes[0], h->data[i].idx)->heap_idx = i;
}
static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
struct ec_stripe *m = genradix_ptr(&c->ec_stripes, idx);
struct stripe *m = genradix_ptr(&c->stripes[0], idx);
BUG_ON(!m->alive);
BUG_ON(m->heap_idx >= h->used);
BUG_ON(h->data[m->heap_idx].idx != idx);
}
static inline unsigned stripe_entry_blocks(struct ec_stripe *m)
static inline unsigned stripe_entry_blocks(struct stripe *m)
{
return atomic_read(&m->pin)
? UINT_MAX : atomic_read(&m->blocks_nonempty);
return atomic_read(&m->blocks_nonempty);
}
void bch2_stripes_heap_update(struct bch_fs *c,
struct ec_stripe *m, size_t idx)
struct stripe *m, size_t idx)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
bool queue_delete;
@ -645,7 +638,7 @@ void bch2_stripes_heap_update(struct bch_fs *c,
}
void bch2_stripes_heap_del(struct bch_fs *c,
struct ec_stripe *m, size_t idx)
struct stripe *m, size_t idx)
{
spin_lock(&c->ec_stripes_heap_lock);
heap_verify_backpointer(c, idx);
@ -658,7 +651,7 @@ void bch2_stripes_heap_del(struct bch_fs *c,
}
void bch2_stripes_heap_insert(struct bch_fs *c,
struct ec_stripe *m, size_t idx)
struct stripe *m, size_t idx)
{
spin_lock(&c->ec_stripes_heap_lock);
@ -677,7 +670,9 @@ void bch2_stripes_heap_insert(struct bch_fs *c,
spin_unlock(&c->ec_stripes_heap_lock);
}
static void ec_stripe_delete(struct bch_fs *c, unsigned idx)
/* stripe deletion */
static void ec_stripe_delete(struct bch_fs *c, size_t idx)
{
struct btree_iter iter;
struct bch_stripe *v = NULL;
@ -689,7 +684,7 @@ static void ec_stripe_delete(struct bch_fs *c, unsigned idx)
POS(0, idx),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(&iter);
if (btree_iter_err(k) || k.k->type != BCH_STRIPE)
if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe)
goto out;
v = kmalloc(bkey_val_bytes(k.k), GFP_KERNEL);
@ -716,6 +711,7 @@ static void ec_stripe_delete_work(struct work_struct *work)
ssize_t idx;
down_read(&c->gc_lock);
mutex_lock(&c->ec_stripe_create_lock);
while (1) {
spin_lock(&c->ec_stripes_heap_lock);
@ -728,13 +724,15 @@ static void ec_stripe_delete_work(struct work_struct *work)
ec_stripe_delete(c, idx);
}
mutex_unlock(&c->ec_stripe_create_lock);
up_read(&c->gc_lock);
}
/* stripe creation: */
static int ec_stripe_bkey_insert(struct bch_fs *c,
struct bkey_i_stripe *stripe)
{
struct ec_stripe *m;
struct btree_iter iter;
struct bkey_s_c k;
int ret;
@ -754,18 +752,13 @@ retry:
return bch2_btree_iter_unlock(&iter) ?: -ENOSPC;
found_slot:
mutex_lock(&c->ec_stripes_lock);
ret = ec_stripe_mem_alloc(c, &iter);
mutex_unlock(&c->ec_stripes_lock);
if (ret == -EINTR)
goto retry;
if (ret)
return ret;
m = genradix_ptr(&c->ec_stripes, iter.pos.offset);
atomic_inc(&m->pin);
stripe->k.p = iter.pos;
ret = bch2_btree_insert_at(c, NULL, NULL,
@ -774,14 +767,9 @@ found_slot:
BTREE_INSERT_ENTRY(&iter, &stripe->k_i));
bch2_btree_iter_unlock(&iter);
if (ret)
atomic_dec(&m->pin);
return ret;
}
/* stripe creation: */
static void extent_stripe_ptr_add(struct bkey_s_extent e,
struct ec_stripe_buf *s,
struct bch_extent_ptr *ptr,
@ -857,7 +845,6 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
*/
static void ec_stripe_create(struct ec_stripe_new *s)
{
struct ec_stripe *ec_stripe;
struct bch_fs *c = s->c;
struct open_bucket *ob;
struct bkey_i *k;
@ -897,10 +884,12 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err_put_writes;
}
mutex_lock(&c->ec_stripe_create_lock);
ret = ec_stripe_bkey_insert(c, &s->stripe.key);
if (ret) {
bch_err(c, "error creating stripe: error creating stripe key");
goto err_put_writes;
goto err_unlock;
}
for_each_keylist_key(&s->keys, k) {
@ -909,12 +898,8 @@ static void ec_stripe_create(struct ec_stripe_new *s)
break;
}
ec_stripe = genradix_ptr(&c->ec_stripes, s->stripe.key.k.p.offset);
atomic_dec(&ec_stripe->pin);
bch2_stripes_heap_update(c, ec_stripe,
s->stripe.key.k.p.offset);
err_unlock:
mutex_unlock(&c->ec_stripe_create_lock);
err_put_writes:
percpu_ref_put(&c->writes);
err:
@ -1221,7 +1206,7 @@ unlock:
mutex_unlock(&c->ec_new_stripe_lock);
}
int bch2_fs_ec_start(struct bch_fs *c)
int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
{
struct btree_iter iter;
struct bkey_s_c k;
@ -1237,19 +1222,25 @@ int bch2_fs_ec_start(struct bch_fs *c)
if (ret)
return ret;
if (!init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx),
if (!gc &&
!init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx),
GFP_KERNEL))
return -ENOMEM;
#if 0
ret = genradix_prealloc(&c->ec_stripes, idx, GFP_KERNEL);
ret = genradix_prealloc(&c->stripes[gc], idx, GFP_KERNEL);
#else
for (i = 0; i < idx; i++)
if (!genradix_ptr_alloc(&c->ec_stripes, i, GFP_KERNEL))
if (!genradix_ptr_alloc(&c->stripes[gc], i, GFP_KERNEL))
return -ENOMEM;
#endif
return 0;
}
int bch2_fs_ec_start(struct bch_fs *c)
{
return bch2_ec_mem_alloc(c, false);
}
void bch2_fs_ec_exit(struct bch_fs *c)
{
struct ec_stripe_head *h;
@ -1270,7 +1261,7 @@ void bch2_fs_ec_exit(struct bch_fs *c)
}
free_heap(&c->ec_stripes_heap);
genradix_free(&c->ec_stripes);
genradix_free(&c->stripes[0]);
bioset_exit(&c->ec_bioset);
}

View File

@ -4,13 +4,13 @@
#include "ec_types.h"
#include "keylist_types.h"
const char *bch2_ec_key_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_ec_key_to_text(struct printbuf *, struct bch_fs *,
const char *bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
#define bch2_bkey_ec_ops (struct bkey_ops) { \
.key_invalid = bch2_ec_key_invalid, \
.val_to_text = bch2_ec_key_to_text, \
#define bch2_bkey_ops_stripe (struct bkey_ops) { \
.key_invalid = bch2_stripe_invalid, \
.val_to_text = bch2_stripe_to_text, \
}
struct bch_read_bio;
@ -92,14 +92,16 @@ void bch2_ec_stripe_head_put(struct ec_stripe_head *);
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned,
unsigned, unsigned);
void bch2_stripes_heap_update(struct bch_fs *, struct ec_stripe *, size_t);
void bch2_stripes_heap_del(struct bch_fs *, struct ec_stripe *, size_t);
void bch2_stripes_heap_insert(struct bch_fs *, struct ec_stripe *, size_t);
void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
void bch2_ec_flush_new_stripes(struct bch_fs *);
int bch2_ec_mem_alloc(struct bch_fs *, bool);
int bch2_fs_ec_start(struct bch_fs *);
void bch2_fs_ec_exit(struct bch_fs *);

View File

@ -5,7 +5,7 @@
#define EC_STRIPE_MAX 16
struct ec_stripe {
struct stripe {
size_t heap_idx;
u16 sectors;
@ -15,7 +15,6 @@ struct ec_stripe {
u8 nr_redundant;
u8 alive;
atomic_t pin;
atomic_t blocks_nonempty;
atomic_t block_sectors[EC_STRIPE_MAX];
};

File diff suppressed because it is too large Load Diff

View File

@ -6,141 +6,37 @@
#include "extents_types.h"
struct bch_fs;
struct journal_res;
struct btree_node_iter;
struct btree_node_iter_large;
struct btree_insert;
struct btree_insert_entry;
struct bch_devs_mask;
union bch_extent_crc;
const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *,
struct bkey_s_c);
void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
/* extent entries: */
#define bch2_bkey_btree_ops (struct bkey_ops) { \
.key_invalid = bch2_btree_ptr_invalid, \
.key_debugcheck = bch2_btree_ptr_debugcheck, \
.val_to_text = bch2_btree_ptr_to_text, \
.swab = bch2_ptr_swab, \
}
#define extent_entry_last(_e) bkey_val_end(_e)
const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
bool bch2_ptr_normalize(struct bch_fs *, struct btree *, struct bkey_s);
enum merge_result bch2_extent_merge(struct bch_fs *, struct btree *,
struct bkey_i *, struct bkey_i *);
#define entry_to_ptr(_entry) \
({ \
EBUG_ON((_entry) && !extent_entry_is_ptr(_entry)); \
\
__builtin_choose_expr( \
type_is_exact(_entry, const union bch_extent_entry *), \
(const struct bch_extent_ptr *) (_entry), \
(struct bch_extent_ptr *) (_entry)); \
})
#define bch2_bkey_extent_ops (struct bkey_ops) { \
.key_invalid = bch2_extent_invalid, \
.key_debugcheck = bch2_extent_debugcheck, \
.val_to_text = bch2_extent_to_text, \
.swab = bch2_ptr_swab, \
.key_normalize = bch2_ptr_normalize, \
.key_merge = bch2_extent_merge, \
.is_extents = true, \
}
struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *,
struct btree *,
struct btree_node_iter_large *);
struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
struct bset *,
struct btree *,
struct btree_node_iter_large *);
void bch2_mark_io_failure(struct bch_io_failures *,
struct extent_ptr_decoded *);
int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *,
struct bch_io_failures *,
struct extent_ptr_decoded *);
int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c,
struct bch_io_failures *,
struct extent_ptr_decoded *);
void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
static inline bool bch2_extent_is_atomic(struct bkey *k,
struct btree_iter *iter)
{
struct btree *b = iter->l[0].b;
return bkey_cmp(k->p, b->key.k.p) <= 0 &&
bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0;
}
enum btree_insert_ret
bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *,
unsigned *);
enum btree_insert_ret
bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
unsigned, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
void bch2_extent_drop_device(struct bkey_s_extent, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_target(struct bch_fs *, struct bkey_s_c_extent, unsigned);
unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent);
unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c);
unsigned bch2_extent_is_compressed(struct bkey_s_c);
unsigned bch2_extent_durability(struct bch_fs *, struct bkey_s_c_extent);
bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent,
struct bch_extent_ptr, u64);
static inline bool bkey_extent_is_data(const struct bkey *k)
{
switch (k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
return true;
default:
return false;
}
}
static inline bool bkey_extent_is_allocation(const struct bkey *k)
{
switch (k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
case BCH_RESERVATION:
return true;
default:
return false;
}
}
static inline bool bch2_extent_is_fully_allocated(struct bkey_s_c k)
{
return bkey_extent_is_allocation(k.k) &&
!bch2_extent_is_compressed(k);
}
static inline bool bkey_extent_is_cached(const struct bkey *k)
{
return k->type == BCH_EXTENT_CACHED;
}
static inline void bkey_extent_set_cached(struct bkey *k, bool cached)
{
EBUG_ON(k->type != BCH_EXTENT &&
k->type != BCH_EXTENT_CACHED);
k->type = cached ? BCH_EXTENT_CACHED : BCH_EXTENT;
}
/* downcast, preserves const */
#define to_entry(_entry) \
({ \
BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \
!type_is(_entry, struct bch_extent_ptr *) && \
!type_is(_entry, struct bch_extent_stripe_ptr *)); \
\
__builtin_choose_expr( \
(type_is_exact(_entry, const union bch_extent_crc *) || \
type_is_exact(_entry, const struct bch_extent_ptr *) ||\
type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
(const union bch_extent_entry *) (_entry), \
(union bch_extent_entry *) (_entry)); \
})
static inline unsigned
__extent_entry_type(const union bch_extent_entry *e)
@ -205,21 +101,6 @@ union bch_extent_crc {
struct bch_extent_crc128 crc128;
};
/* downcast, preserves const */
#define to_entry(_entry) \
({ \
BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \
!type_is(_entry, struct bch_extent_ptr *) && \
!type_is(_entry, struct bch_extent_stripe_ptr *)); \
\
__builtin_choose_expr( \
(type_is_exact(_entry, const union bch_extent_crc *) || \
type_is_exact(_entry, const struct bch_extent_ptr *) ||\
type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
(const union bch_extent_entry *) (_entry), \
(union bch_extent_entry *) (_entry)); \
})
#define __entry_to_crc(_entry) \
__builtin_choose_expr( \
type_is_exact(_entry, const union bch_extent_entry *), \
@ -233,18 +114,6 @@ union bch_extent_crc {
__entry_to_crc(_entry); \
})
#define entry_to_ptr(_entry) \
({ \
EBUG_ON((_entry) && !extent_entry_is_ptr(_entry)); \
\
__builtin_choose_expr( \
type_is_exact(_entry, const union bch_extent_entry *), \
(const struct bch_extent_ptr *) (_entry), \
(struct bch_extent_ptr *) (_entry)); \
})
/* checksum entries: */
static inline struct bch_extent_crc_unpacked
bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
{
@ -302,43 +171,317 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
#undef common_fields
}
/* Extent entry iteration: */
/* bkey_ptrs: generically over any key type that has ptrs */
struct bkey_ptrs_c {
const union bch_extent_entry *start;
const union bch_extent_entry *end;
};
struct bkey_ptrs {
union bch_extent_entry *start;
union bch_extent_entry *end;
};
/* iterate over bkey ptrs */
#define extent_entry_next(_entry) \
((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry)))
#define extent_entry_last(_e) \
vstruct_idx((_e).v, bkey_val_u64s((_e).k))
#define __bkey_extent_entry_for_each_from(_start, _end, _entry) \
for ((_entry) = (_start); \
(_entry) < (_end); \
(_entry) = extent_entry_next(_entry))
/* Iterate over all entries: */
#define __bkey_ptr_next(_ptr, _end) \
({ \
typeof(_end) _entry; \
\
__bkey_extent_entry_for_each_from(to_entry(_ptr), _end, _entry) \
if (extent_entry_is_ptr(_entry)) \
break; \
\
_entry < (_end) ? entry_to_ptr(_entry) : NULL; \
})
#define bkey_extent_entry_for_each_from(_p, _entry, _start) \
__bkey_extent_entry_for_each_from(_start, (_p).end, _entry)
#define bkey_extent_entry_for_each(_p, _entry) \
bkey_extent_entry_for_each_from(_p, _entry, _p.start)
#define __bkey_for_each_ptr(_start, _end, _ptr) \
for ((_ptr) = (_start); \
((_ptr) = __bkey_ptr_next(_ptr, _end)); \
(_ptr)++)
#define bkey_ptr_next(_p, _ptr) \
__bkey_ptr_next(_ptr, (_p).end)
#define bkey_for_each_ptr(_p, _ptr) \
__bkey_for_each_ptr(&(_p).start->ptr, (_p).end, _ptr)
#define __bkey_ptr_next_decode(_k, _end, _ptr, _entry) \
({ \
__label__ out; \
\
(_ptr).idx = 0; \
(_ptr).ec_nr = 0; \
\
__bkey_extent_entry_for_each_from(_entry, _end, _entry) \
switch (extent_entry_type(_entry)) { \
case BCH_EXTENT_ENTRY_ptr: \
(_ptr).ptr = _entry->ptr; \
goto out; \
case BCH_EXTENT_ENTRY_crc32: \
case BCH_EXTENT_ENTRY_crc64: \
case BCH_EXTENT_ENTRY_crc128: \
(_ptr).crc = bch2_extent_crc_unpack(_k, \
entry_to_crc(_entry)); \
break; \
case BCH_EXTENT_ENTRY_stripe_ptr: \
(_ptr).ec[(_ptr).ec_nr++] = _entry->stripe_ptr; \
break; \
} \
out: \
_entry < (_end); \
})
#define __bkey_for_each_ptr_decode(_k, _start, _end, _ptr, _entry) \
for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL), \
(_entry) = _start; \
__bkey_ptr_next_decode(_k, _end, _ptr, _entry); \
(_entry) = extent_entry_next(_entry))
#define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry) \
__bkey_for_each_ptr_decode(_k, (_p).start, (_p).end, \
_ptr, _entry)
/* utility code common to all keys with pointers: */
static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
{
switch (k.k->type) {
case KEY_TYPE_btree_ptr: {
struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k);
return (struct bkey_ptrs_c) {
to_entry(&e.v->start[0]),
to_entry(bkey_val_end(e))
};
}
case KEY_TYPE_extent: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
return (struct bkey_ptrs_c) {
e.v->start,
extent_entry_last(e)
};
}
case KEY_TYPE_stripe: {
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
return (struct bkey_ptrs_c) {
to_entry(&s.v->ptrs[0]),
to_entry(&s.v->ptrs[s.v->nr_blocks]),
};
}
default:
return (struct bkey_ptrs_c) { NULL, NULL };
}
}
static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k)
{
struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k.s_c);
return (struct bkey_ptrs) {
(void *) p.start,
(void *) p.end
};
}
static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
{
struct bch_devs_list ret = (struct bch_devs_list) { 0 };
struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(p, ptr)
ret.devs[ret.nr++] = ptr->dev;
return ret;
}
static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
{
struct bch_devs_list ret = (struct bch_devs_list) { 0 };
struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(p, ptr)
if (!ptr->cached)
ret.devs[ret.nr++] = ptr->dev;
return ret;
}
static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
{
struct bch_devs_list ret = (struct bch_devs_list) { 0 };
struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(p, ptr)
if (ptr->cached)
ret.devs[ret.nr++] = ptr->dev;
return ret;
}
static inline bool bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
{
struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(p, ptr)
if (ptr->dev == dev)
return ptr;
return NULL;
}
unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c);
unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
void bch2_mark_io_failure(struct bch_io_failures *,
struct extent_ptr_decoded *);
int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
struct bch_io_failures *,
struct extent_ptr_decoded *);
/* bch_btree_ptr: */
const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *,
struct bkey_s_c);
void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
#define bch2_bkey_ops_btree_ptr (struct bkey_ops) { \
.key_invalid = bch2_btree_ptr_invalid, \
.key_debugcheck = bch2_btree_ptr_debugcheck, \
.val_to_text = bch2_btree_ptr_to_text, \
.swab = bch2_ptr_swab, \
}
/* bch_extent: */
const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
enum merge_result bch2_extent_merge(struct bch_fs *,
struct bkey_i *, struct bkey_i *);
#define bch2_bkey_ops_extent (struct bkey_ops) { \
.key_invalid = bch2_extent_invalid, \
.key_debugcheck = bch2_extent_debugcheck, \
.val_to_text = bch2_extent_to_text, \
.swab = bch2_ptr_swab, \
.key_normalize = bch2_extent_normalize, \
.key_merge = bch2_extent_merge, \
}
/* bch_reservation: */
const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
enum merge_result bch2_reservation_merge(struct bch_fs *,
struct bkey_i *, struct bkey_i *);
#define bch2_bkey_ops_reservation (struct bkey_ops) { \
.key_invalid = bch2_reservation_invalid, \
.val_to_text = bch2_reservation_to_text, \
.key_merge = bch2_reservation_merge, \
}
void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
static inline bool bch2_extent_is_atomic(struct bkey *k,
struct btree_iter *iter)
{
struct btree *b = iter->l[0].b;
return bkey_cmp(k->p, b->key.k.p) <= 0 &&
bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0;
}
enum btree_insert_ret
bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *,
unsigned *);
enum btree_insert_ret
bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *);
void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
unsigned, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_target(struct bch_fs *, struct bkey_s_c_extent, unsigned);
unsigned bch2_extent_is_compressed(struct bkey_s_c);
bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent,
struct bch_extent_ptr, u64);
static inline bool bkey_extent_is_data(const struct bkey *k)
{
switch (k->type) {
case KEY_TYPE_btree_ptr:
case KEY_TYPE_extent:
return true;
default:
return false;
}
}
static inline bool bkey_extent_is_allocation(const struct bkey *k)
{
switch (k->type) {
case KEY_TYPE_extent:
case KEY_TYPE_reservation:
return true;
default:
return false;
}
}
static inline bool bch2_extent_is_fully_allocated(struct bkey_s_c k)
{
return bkey_extent_is_allocation(k.k) &&
!bch2_extent_is_compressed(k);
}
void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr);
void bch2_bkey_drop_device(struct bkey_s, unsigned);
/* Extent entry iteration: */
#define extent_for_each_entry_from(_e, _entry, _start) \
for ((_entry) = _start; \
(_entry) < extent_entry_last(_e); \
(_entry) = extent_entry_next(_entry))
__bkey_extent_entry_for_each_from(_start, \
extent_entry_last(_e),_entry)
#define extent_for_each_entry(_e, _entry) \
extent_for_each_entry_from(_e, _entry, (_e).v->start)
/* Iterate over pointers only: */
#define extent_ptr_next(_e, _ptr) \
({ \
typeof(&(_e).v->start[0]) _entry; \
\
extent_for_each_entry_from(_e, _entry, to_entry(_ptr)) \
if (extent_entry_is_ptr(_entry)) \
break; \
\
_entry < extent_entry_last(_e) ? entry_to_ptr(_entry) : NULL; \
})
__bkey_ptr_next(_ptr, extent_entry_last(_e))
#define extent_for_each_ptr(_e, _ptr) \
for ((_ptr) = &(_e).v->start->ptr; \
((_ptr) = extent_ptr_next(_e, _ptr)); \
(_ptr)++)
/* Iterate over crcs only: */
__bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr)
#define extent_crc_next(_e, _crc, _iter) \
({ \
@ -357,41 +500,9 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
extent_crc_next(_e, _crc, _iter); \
(_iter) = extent_entry_next(_iter))
/* Iterate over pointers, with crcs: */
#define __extent_ptr_next_decode(_e, _ptr, _entry) \
({ \
__label__ out; \
\
(_ptr).idx = 0; \
(_ptr).ec_nr = 0; \
\
extent_for_each_entry_from(_e, _entry, _entry) \
switch (extent_entry_type(_entry)) { \
case BCH_EXTENT_ENTRY_ptr: \
(_ptr).ptr = _entry->ptr; \
goto out; \
case BCH_EXTENT_ENTRY_crc32: \
case BCH_EXTENT_ENTRY_crc64: \
case BCH_EXTENT_ENTRY_crc128: \
(_ptr).crc = bch2_extent_crc_unpack((_e).k, \
entry_to_crc(_entry)); \
break; \
case BCH_EXTENT_ENTRY_stripe_ptr: \
(_ptr).ec[(_ptr).ec_nr++] = _entry->stripe_ptr; \
break; \
} \
out: \
_entry < extent_entry_last(_e); \
})
#define extent_for_each_ptr_decode(_e, _ptr, _entry) \
for ((_ptr).crc = bch2_extent_crc_unpack((_e).k, NULL), \
(_entry) = (_e).v->start; \
__extent_ptr_next_decode(_e, _ptr, _entry); \
(_entry) = extent_entry_next(_entry))
/* Iterate over pointers backwards: */
__bkey_for_each_ptr_decode((_e).k, (_e).v->start, \
extent_entry_last(_e), _ptr, _entry)
void bch2_extent_crc_append(struct bkey_i_extent *,
struct bch_extent_crc_unpacked);
@ -420,96 +531,23 @@ static inline void __extent_entry_push(struct bkey_i_extent *e)
e->k.u64s += extent_entry_u64s(entry);
}
static inline void extent_ptr_append(struct bkey_i_extent *e,
struct bch_extent_ptr ptr)
{
ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
extent_entry_last(extent_i_to_s(e))->ptr = ptr;
__extent_entry_push(e);
}
static inline struct bch_devs_list bch2_extent_devs(struct bkey_s_c_extent e)
{
struct bch_devs_list ret = (struct bch_devs_list) { 0 };
const struct bch_extent_ptr *ptr;
extent_for_each_ptr(e, ptr)
ret.devs[ret.nr++] = ptr->dev;
return ret;
}
static inline struct bch_devs_list bch2_extent_dirty_devs(struct bkey_s_c_extent e)
{
struct bch_devs_list ret = (struct bch_devs_list) { 0 };
const struct bch_extent_ptr *ptr;
extent_for_each_ptr(e, ptr)
if (!ptr->cached)
ret.devs[ret.nr++] = ptr->dev;
return ret;
}
static inline struct bch_devs_list bch2_extent_cached_devs(struct bkey_s_c_extent e)
{
struct bch_devs_list ret = (struct bch_devs_list) { 0 };
const struct bch_extent_ptr *ptr;
extent_for_each_ptr(e, ptr)
if (ptr->cached)
ret.devs[ret.nr++] = ptr->dev;
return ret;
}
static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
{
switch (k.k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
return bch2_extent_devs(bkey_s_c_to_extent(k));
default:
return (struct bch_devs_list) { .nr = 0 };
}
}
static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
{
switch (k.k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
return bch2_extent_dirty_devs(bkey_s_c_to_extent(k));
default:
return (struct bch_devs_list) { .nr = 0 };
}
}
static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
{
switch (k.k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
return bch2_extent_cached_devs(bkey_s_c_to_extent(k));
default:
return (struct bch_devs_list) { .nr = 0 };
}
}
bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent,
struct bch_extent_crc_unpacked);
bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked);
union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent ,
struct bch_extent_ptr *);
union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
struct bch_extent_ptr *);
#define bch2_extent_drop_ptrs(_e, _ptr, _cond) \
#define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \
do { \
_ptr = &(_e).v->start->ptr; \
struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k); \
\
while ((_ptr = extent_ptr_next(e, _ptr))) { \
_ptr = &_ptrs.start->ptr; \
\
while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) { \
if (_cond) { \
_ptr = (void *) bch2_extent_drop_ptr(_e, _ptr); \
_ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr); \
_ptrs = bch2_bkey_ptrs(_k); \
continue; \
} \
\
@ -517,10 +555,34 @@ do { \
} \
} while (0)
bool bch2_cut_front(struct bpos, struct bkey_i *);
bool __bch2_cut_front(struct bpos, struct bkey_s);
static inline bool bch2_cut_front(struct bpos where, struct bkey_i *k)
{
return __bch2_cut_front(where, bkey_i_to_s(k));
}
bool bch2_cut_back(struct bpos, struct bkey *);
void bch2_key_resize(struct bkey *, unsigned);
/*
* In extent_sort_fix_overlapping(), insert_fixup_extent(),
* extent_merge_inline() - we're modifying keys in place that are packed. To do
* that we have to unpack the key, modify the unpacked key - then this
* copies/repacks the unpacked to the original as necessary.
*/
static inline void extent_save(struct btree *b, struct bkey_packed *dst,
struct bkey *src)
{
struct bkey_format *f = &b->format;
struct bkey_i *dst_unpacked;
if ((dst_unpacked = packed_to_bkey(dst)))
dst_unpacked->k = *src;
else
BUG_ON(!bch2_bkey_pack_key(dst, src, f));
}
int bch2_check_range_allocated(struct bch_fs *, struct bpos, u64);
#endif /* _BCACHEFS_EXTENTS_H */

View File

@ -121,7 +121,7 @@ static void bch2_quota_reservation_put(struct bch_fs *c,
BUG_ON(res->sectors > inode->ei_quota_reserved);
bch2_quota_acct(c, inode->ei_qid, Q_SPC,
-((s64) res->sectors), BCH_QUOTA_PREALLOC);
-((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC);
inode->ei_quota_reserved -= res->sectors;
mutex_unlock(&inode->ei_quota_lock);
@ -138,7 +138,7 @@ static int bch2_quota_reservation_add(struct bch_fs *c,
mutex_lock(&inode->ei_quota_lock);
ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors,
check_enospc ? BCH_QUOTA_PREALLOC : BCH_QUOTA_NOCHECK);
check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK);
if (likely(!ret)) {
inode->ei_quota_reserved += sectors;
res->sectors += sectors;
@ -220,7 +220,7 @@ static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
quota_res->sectors -= sectors;
inode->ei_quota_reserved -= sectors;
} else {
bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, BCH_QUOTA_WARN);
bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN);
}
#endif
inode->v.i_blocks += sectors;
@ -242,9 +242,15 @@ static s64 sum_sector_overwrites(struct bkey_i *new, struct btree_iter *_iter,
bch2_btree_iter_link(_iter, &iter);
bch2_btree_iter_copy(&iter, _iter);
for_each_btree_key_continue(&iter, BTREE_ITER_SLOTS, old) {
if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
break;
old = bch2_btree_iter_peek_slot(&iter);
while (1) {
/*
* should not be possible to get an error here, since we're
* carefully not advancing past @new and thus whatever leaf node
* @_iter currently points to:
*/
BUG_ON(btree_iter_err(old));
if (allocating &&
!bch2_extent_is_fully_allocated(old))
@ -256,6 +262,11 @@ static s64 sum_sector_overwrites(struct bkey_i *new, struct btree_iter *_iter,
bkey_start_offset(old.k))) *
(bkey_extent_is_allocation(&new->k) -
bkey_extent_is_allocation(old.k));
if (bkey_cmp(old.k->p, new->k.p) >= 0)
break;
old = bch2_btree_iter_next_slot(&iter);
}
bch2_btree_iter_unlink(&iter);
@ -848,7 +859,7 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
struct bvec_iter iter;
struct bio_vec bv;
unsigned nr_ptrs = !bch2_extent_is_compressed(k)
? bch2_extent_nr_dirty_ptrs(k)
? bch2_bkey_nr_dirty_ptrs(k)
: 0;
bio_for_each_segment(bv, bio, iter) {
@ -2397,7 +2408,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
BUG_ON(bkey_cmp(dst->pos, bkey_start_pos(&copy.k.k)));
ret = bch2_disk_reservation_get(c, &disk_res, copy.k.k.size,
bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k)),
bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k)),
BCH_DISK_RESERVATION_NOFAIL);
BUG_ON(ret);
@ -2504,7 +2515,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
goto btree_iter_err;
/* already reserved */
if (k.k->type == BCH_RESERVATION &&
if (k.k->type == KEY_TYPE_reservation &&
bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) {
bch2_btree_iter_next_slot(iter);
continue;
@ -2517,7 +2528,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
}
bkey_reservation_init(&reservation.k_i);
reservation.k.type = BCH_RESERVATION;
reservation.k.type = KEY_TYPE_reservation;
reservation.k.p = k.k->p;
reservation.k.size = k.k->size;
@ -2525,7 +2536,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
bch2_cut_back(end_pos, &reservation.k);
sectors = reservation.k.size;
reservation.v.nr_replicas = bch2_extent_nr_dirty_ptrs(k);
reservation.v.nr_replicas = bch2_bkey_nr_dirty_ptrs(k);
if (!bkey_extent_is_allocation(k.k)) {
ret = bch2_quota_reservation_add(c, inode,

View File

@ -281,7 +281,7 @@ __bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
if (tmpfile)
inode_u.bi_flags |= BCH_INODE_UNLINKED;
ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, BCH_QUOTA_PREALLOC);
ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, KEY_TYPE_QUOTA_PREALLOC);
if (ret)
return ERR_PTR(ret);
@ -394,7 +394,7 @@ err_trans:
make_bad_inode(&inode->v);
iput(&inode->v);
err:
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, BCH_QUOTA_WARN);
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN);
inode = ERR_PTR(ret);
goto out;
}
@ -999,7 +999,7 @@ static int bch2_fill_extent(struct fiemap_extent_info *info,
}
return 0;
} else if (k->k.type == BCH_RESERVATION) {
} else if (k->k.type == KEY_TYPE_reservation) {
return fiemap_fill_next_extent(info,
bkey_start_offset(&k->k) << 9,
0, k->k.size << 9,
@ -1028,7 +1028,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
POS(ei->v.i_ino, start >> 9), 0, k)
if (bkey_extent_is_data(k.k) ||
k.k->type == BCH_RESERVATION) {
k.k->type == KEY_TYPE_reservation) {
if (bkey_cmp(bkey_start_pos(k.k),
POS(ei->v.i_ino, (start + len) >> 9)) >= 0)
break;
@ -1329,9 +1329,9 @@ static void bch2_evict_inode(struct inode *vinode)
if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
BCH_QUOTA_WARN);
KEY_TYPE_QUOTA_WARN);
bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
BCH_QUOTA_WARN);
KEY_TYPE_QUOTA_WARN);
bch2_inode_rm(c, inode->v.i_ino);
WARN_ONCE(atomic_long_dec_return(&c->nr_inodes) < 0,

View File

@ -234,7 +234,6 @@ static int hash_check_duplicates(const struct bch_hash_desc desc,
!desc.cmp_bkey(k, k2), c,
"duplicate hash table keys:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c,
bkey_type(0, desc.btree_id),
k), buf))) {
ret = fsck_hash_delete_at(desc, &h->info, k_iter);
if (ret)
@ -254,7 +253,7 @@ static bool key_has_correct_hash(const struct bch_hash_desc desc,
{
u64 hash;
if (k.k->type != desc.whiteout_type &&
if (k.k->type != KEY_TYPE_whiteout &&
k.k->type != desc.key_type)
return true;
@ -279,7 +278,7 @@ static int hash_check_key(const struct bch_hash_desc desc,
u64 hashed;
int ret = 0;
if (k.k->type != desc.whiteout_type &&
if (k.k->type != KEY_TYPE_whiteout &&
k.k->type != desc.key_type)
return 0;
@ -299,7 +298,6 @@ static int hash_check_key(const struct bch_hash_desc desc,
desc.btree_id, k.k->p.offset,
hashed, h->chain->pos.offset,
(bch2_bkey_val_to_text(&PBUF(buf), c,
bkey_type(0, desc.btree_id),
k), buf))) {
ret = hash_redo_key(desc, h, c, k_iter, k, hashed);
if (ret) {
@ -369,7 +367,7 @@ static int check_dirent_hash(struct hash_check *h, struct bch_fs *c,
*k = bch2_btree_iter_peek(iter);
BUG_ON(k->k->type != BCH_DIRENT);
BUG_ON(k->k->type != KEY_TYPE_dirent);
}
err:
fsck_err:
@ -384,7 +382,6 @@ err_redo:
buf, strlen(buf), BTREE_ID_DIRENTS,
k->k->p.offset, hash, h->chain->pos.offset,
(bch2_bkey_val_to_text(&PBUF(buf), c,
bkey_type(0, BTREE_ID_DIRENTS),
*k), buf))) {
ret = hash_redo_key(bch2_dirent_hash_desc,
h, c, iter, *k, hash);
@ -470,7 +467,7 @@ static int check_extents(struct bch_fs *c)
if (fsck_err_on(w.have_inode &&
!(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
k.k->type != BCH_RESERVATION &&
k.k->type != KEY_TYPE_reservation &&
k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
"extent type %u offset %llu past end of inode %llu, i_size %llu",
k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
@ -528,13 +525,11 @@ static int check_dirents(struct bch_fs *c)
if (fsck_err_on(!w.have_inode, c,
"dirent in nonexisting directory:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c,
BTREE_ID_DIRENTS,
k), buf)) ||
fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
"dirent in non directory inode type %u:\n%s",
mode_to_type(w.inode.bi_mode),
(bch2_bkey_val_to_text(&PBUF(buf), c,
BTREE_ID_DIRENTS,
k), buf))) {
ret = bch2_btree_delete_at(iter, 0);
if (ret)
@ -556,7 +551,7 @@ static int check_dirents(struct bch_fs *c)
if (ret)
goto fsck_err;
if (k.k->type != BCH_DIRENT)
if (k.k->type != KEY_TYPE_dirent)
continue;
d = bkey_s_c_to_dirent(k);
@ -585,7 +580,6 @@ static int check_dirents(struct bch_fs *c)
if (fsck_err_on(d_inum == d.k->p.inode, c,
"dirent points to own directory:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c,
BTREE_ID_DIRENTS,
k), buf))) {
ret = remove_dirent(c, iter, d);
if (ret)
@ -603,7 +597,6 @@ static int check_dirents(struct bch_fs *c)
if (fsck_err_on(!have_target, c,
"dirent points to missing inode:\n%s",
(bch2_bkey_val_to_text(&PBUF(buf), c,
BTREE_ID_DIRENTS,
k), buf))) {
ret = remove_dirent(c, iter, d);
if (ret)
@ -617,7 +610,6 @@ static int check_dirents(struct bch_fs *c)
"incorrect d_type: should be %u:\n%s",
mode_to_type(target.bi_mode),
(bch2_bkey_val_to_text(&PBUF(buf), c,
BTREE_ID_DIRENTS,
k), buf))) {
struct bkey_i_dirent *n;
@ -898,7 +890,7 @@ next:
e->offset = k.k->p.offset;
if (k.k->type != BCH_DIRENT)
if (k.k->type != KEY_TYPE_dirent)
continue;
dirent = bkey_s_c_to_dirent(k);
@ -941,7 +933,7 @@ up:
}
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
if (k.k->type != BCH_INODE_FS)
if (k.k->type != KEY_TYPE_inode)
continue;
if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
@ -1029,7 +1021,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) {
switch (k.k->type) {
case BCH_DIRENT:
case KEY_TYPE_dirent:
d = bkey_s_c_to_dirent(k);
d_inum = le64_to_cpu(d.v->d_inum);
@ -1309,7 +1301,7 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
if (iter.pos.inode < nlinks_pos || !link)
link = &zero_links;
if (k.k && k.k->type == BCH_INODE_FS) {
if (k.k && k.k->type == KEY_TYPE_inode) {
/*
* Avoid potential deadlocks with iter for
* truncate/rm/etc.:
@ -1391,7 +1383,7 @@ static int check_inodes_fast(struct bch_fs *c)
int ret = 0;
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
if (k.k->type != BCH_INODE_FS)
if (k.k->type != KEY_TYPE_inode)
continue;
inode = bkey_s_c_to_inode(k);

View File

@ -177,76 +177,69 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
if (k.k->p.offset)
return "nonzero offset";
switch (k.k->type) {
case BCH_INODE_FS: {
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
struct bch_inode_unpacked unpacked;
if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
return "incorrect value size";
if (k.k->p.offset)
return "nonzero offset";
if (k.k->p.inode < BLOCKDEV_INODE_MAX)
return "fs inode in blockdev range";
if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
return "incorrect value size";
if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
return "invalid str hash type";
if (k.k->p.inode < BLOCKDEV_INODE_MAX)
return "fs inode in blockdev range";
if (bch2_inode_unpack(inode, &unpacked))
return "invalid variable length fields";
if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
return "invalid str hash type";
if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1)
return "invalid data checksum type";
if (bch2_inode_unpack(inode, &unpacked))
return "invalid variable length fields";
if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
return "invalid data checksum type";
if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1)
return "invalid data checksum type";
if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
unpacked.bi_nlink != 0)
return "flagged as unlinked but bi_nlink != 0";
if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
return "invalid data checksum type";
return NULL;
}
case BCH_INODE_BLOCKDEV:
if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_blockdev))
return "incorrect value size";
if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
unpacked.bi_nlink != 0)
return "flagged as unlinked but bi_nlink != 0";
if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
return "blockdev inode in fs range";
return NULL;
case BCH_INODE_GENERATION:
if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation))
return "incorrect value size";
return NULL;
default:
return "invalid type";
}
return NULL;
}
void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
struct bkey_s_c_inode inode;
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
struct bch_inode_unpacked unpacked;
switch (k.k->type) {
case BCH_INODE_FS:
inode = bkey_s_c_to_inode(k);
if (bch2_inode_unpack(inode, &unpacked)) {
pr_buf(out, "(unpack error)");
break;
}
if (bch2_inode_unpack(inode, &unpacked)) {
pr_buf(out, "(unpack error)");
return;
}
#define BCH_INODE_FIELD(_name, _bits) \
pr_buf(out, #_name ": %llu ", (u64) unpacked._name);
BCH_INODE_FIELDS()
pr_buf(out, #_name ": %llu ", (u64) unpacked._name);
BCH_INODE_FIELDS()
#undef BCH_INODE_FIELD
break;
}
}
const char *bch2_inode_generation_invalid(const struct bch_fs *c,
struct bkey_s_c k)
{
if (k.k->p.offset)
return "nonzero offset";
if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation))
return "incorrect value size";
return NULL;
}
void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
}
void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
@ -282,10 +275,9 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
static inline u32 bkey_generation(struct bkey_s_c k)
{
switch (k.k->type) {
case BCH_INODE_BLOCKDEV:
case BCH_INODE_FS:
case KEY_TYPE_inode:
BUG();
case BCH_INODE_GENERATION:
case KEY_TYPE_inode_generation:
return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
default:
return 0;
@ -331,8 +323,7 @@ again:
return ret;
switch (k.k->type) {
case BCH_INODE_BLOCKDEV:
case BCH_INODE_FS:
case KEY_TYPE_inode:
/* slot used */
if (iter->pos.inode >= max)
goto out;
@ -406,19 +397,19 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
return ret;
}
bch2_fs_inconsistent_on(k.k->type != BCH_INODE_FS, c,
bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c,
"inode %llu not found when deleting",
inode_nr);
switch (k.k->type) {
case BCH_INODE_FS: {
case KEY_TYPE_inode: {
struct bch_inode_unpacked inode_u;
if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u))
bi_generation = inode_u.bi_generation + 1;
break;
}
case BCH_INODE_GENERATION: {
case KEY_TYPE_inode_generation: {
struct bkey_s_c_inode_generation g =
bkey_s_c_to_inode_generation(k);
bi_generation = le32_to_cpu(g.v->bi_generation);
@ -456,7 +447,7 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
POS(inode_nr, 0),
BTREE_ITER_SLOTS, k) {
switch (k.k->type) {
case BCH_INODE_FS:
case KEY_TYPE_inode:
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
break;
default:
@ -465,7 +456,6 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
}
break;
}
return bch2_btree_iter_unlock(&iter) ?: ret;

View File

@ -8,11 +8,21 @@
const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_inode_ops (struct bkey_ops) { \
#define bch2_bkey_ops_inode (struct bkey_ops) { \
.key_invalid = bch2_inode_invalid, \
.val_to_text = bch2_inode_to_text, \
}
const char *bch2_inode_generation_invalid(const struct bch_fs *,
struct bkey_s_c);
void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
#define bch2_bkey_ops_inode_generation (struct bkey_ops) { \
.key_invalid = bch2_inode_generation_invalid, \
.val_to_text = bch2_inode_generation_to_text, \
}
struct bch_inode_unpacked {
u64 bi_inum;
__le64 bi_hash_seed;

View File

@ -202,20 +202,20 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
enum bch_data_type type,
const struct bkey_i *k)
{
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
const struct bch_extent_ptr *ptr;
struct bch_write_bio *n;
struct bch_dev *ca;
BUG_ON(c->opts.nochanges);
extent_for_each_ptr(e, ptr) {
bkey_for_each_ptr(ptrs, ptr) {
BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX ||
!c->devs[ptr->dev]);
ca = bch_dev_bkey_exists(c, ptr->dev);
if (ptr + 1 < &extent_entry_last(e)->ptr) {
if (to_entry(ptr + 1) < ptrs.end) {
n = to_wbio(bio_clone_fast(&wbio->bio, GFP_NOIO,
&ca->replica_set));
@ -300,7 +300,6 @@ static void __bch2_write_index(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
struct keylist *keys = &op->insert_keys;
struct bkey_s_extent e;
struct bch_extent_ptr *ptr;
struct bkey_i *src, *dst = keys->keys, *n, *k;
unsigned dev;
@ -310,12 +309,10 @@ static void __bch2_write_index(struct bch_write_op *op)
n = bkey_next(src);
bkey_copy(dst, src);
e = bkey_i_to_s_extent(dst);
bch2_extent_drop_ptrs(e, ptr,
bch2_bkey_drop_ptrs(bkey_i_to_s(dst), ptr,
test_bit(ptr->dev, op->failed.d));
if (!bch2_extent_nr_ptrs(e.c)) {
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(dst))) {
ret = -EIO;
goto err;
}
@ -416,10 +413,10 @@ static void init_append_extent(struct bch_write_op *op,
e->k.p = op->pos;
e->k.size = crc.uncompressed_size;
e->k.version = version;
bkey_extent_set_cached(&e->k, op->flags & BCH_WRITE_CACHED);
bch2_extent_crc_append(e, crc);
bch2_alloc_sectors_append_ptrs(op->c, wp, e, crc.compressed_size);
bch2_alloc_sectors_append_ptrs(op->c, wp, &e->k_i,
crc.compressed_size);
bch2_keylist_push(&op->insert_keys);
}
@ -1589,7 +1586,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
struct bpos pos = bkey_start_pos(k.k);
int pick_ret;
pick_ret = bch2_extent_pick_ptr(c, k, failed, &pick);
pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
/* hole or reservation - just zero fill: */
if (!pick_ret)

View File

@ -462,7 +462,7 @@ u64 bch2_journal_last_unwritten_seq(struct journal *j)
int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
bool need_reclaim = false;
int ret;
retry:
spin_lock(&j->lock);
@ -490,14 +490,11 @@ retry:
BUG_ON(journal_cur_seq(j) < seq);
if (!journal_entry_open(j)) {
need_reclaim = true;
goto blocked;
ret = journal_entry_open(j);
if (ret) {
spin_unlock(&j->lock);
return ret < 0 ? ret : 0;
}
spin_unlock(&j->lock);
return 0;
blocked:
if (!j->res_get_blocked_start)
j->res_get_blocked_start = local_clock() ?: 1;
@ -505,8 +502,7 @@ blocked:
closure_wait(&j->async_wait, cl);
spin_unlock(&j->lock);
if (need_reclaim)
bch2_journal_reclaim_work(&j->reclaim_work.work);
bch2_journal_reclaim_work(&j->reclaim_work.work);
return -EAGAIN;
}

View File

@ -141,11 +141,12 @@ static void journal_entry_null_range(void *start, void *end)
static int journal_validate_key(struct bch_fs *c, struct jset *jset,
struct jset_entry *entry,
struct bkey_i *k, enum bkey_type key_type,
struct bkey_i *k, enum btree_node_type key_type,
const char *type, int write)
{
void *next = vstruct_next(entry);
const char *invalid;
unsigned version = le32_to_cpu(jset->version);
int ret = 0;
if (journal_entry_err_on(!k->k.u64s, c,
@ -174,14 +175,17 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
}
if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN)
bch2_bkey_swab(key_type, NULL, bkey_to_packed(k));
bch2_bkey_swab(NULL, bkey_to_packed(k));
invalid = bch2_bkey_invalid(c, key_type, bkey_i_to_s_c(k));
if (!write &&
version < bcachefs_metadata_version_bkey_renumber)
bch2_bkey_renumber(key_type, bkey_to_packed(k), write);
invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k), key_type);
if (invalid) {
char buf[160];
bch2_bkey_val_to_text(&PBUF(buf), c, key_type,
bkey_i_to_s_c(k));
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k));
mustfix_fsck_err(c, "invalid %s in journal: %s\n%s",
type, invalid, buf);
@ -190,6 +194,10 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
journal_entry_null_range(vstruct_next(entry), next);
return 0;
}
if (write &&
version < bcachefs_metadata_version_bkey_renumber)
bch2_bkey_renumber(key_type, bkey_to_packed(k), write);
fsck_err:
return ret;
}
@ -203,8 +211,8 @@ static int journal_entry_validate_btree_keys(struct bch_fs *c,
vstruct_for_each(entry, k) {
int ret = journal_validate_key(c, jset, entry, k,
bkey_type(entry->level,
entry->btree_id),
__btree_node_type(entry->level,
entry->btree_id),
"key", write);
if (ret)
return ret;
@ -351,14 +359,17 @@ static int jset_validate(struct bch_fs *c,
{
size_t bytes = vstruct_bytes(jset);
struct bch_csum csum;
unsigned version;
int ret = 0;
if (le64_to_cpu(jset->magic) != jset_magic(c))
return JOURNAL_ENTRY_NONE;
if (le32_to_cpu(jset->version) != BCACHE_JSET_VERSION) {
bch_err(c, "unknown journal entry version %u",
le32_to_cpu(jset->version));
version = le32_to_cpu(jset->version);
if ((version != BCH_JSET_VERSION_OLD &&
version < bcachefs_metadata_version_min) ||
version >= bcachefs_metadata_version_max) {
bch_err(c, "unknown journal entry version %u", jset->version);
return BCH_FSCK_UNKNOWN_VERSION;
}
@ -929,7 +940,6 @@ static void __journal_write_alloc(struct journal *j,
unsigned replicas_want)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bkey_i_extent *e = bkey_i_to_extent(&w->key);
struct journal_device *ja;
struct bch_dev *ca;
unsigned i;
@ -951,13 +961,14 @@ static void __journal_write_alloc(struct journal *j,
if (!ca->mi.durability ||
ca->mi.state != BCH_MEMBER_STATE_RW ||
!ja->nr ||
bch2_extent_has_device(extent_i_to_s_c(e), ca->dev_idx) ||
bch2_bkey_has_device(bkey_i_to_s_c(&w->key),
ca->dev_idx) ||
sectors > ja->sectors_free)
continue;
bch2_dev_stripe_increment(c, ca, &j->wp.stripe);
extent_ptr_append(e,
bch2_bkey_append_ptr(&w->key,
(struct bch_extent_ptr) {
.offset = bucket_to_sector(ca,
ja->buckets[ja->cur_idx]) +
@ -1096,7 +1107,7 @@ static void journal_write_done(struct closure *cl)
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_buf *w = journal_prev_buf(j);
struct bch_devs_list devs =
bch2_extent_devs(bkey_i_to_s_c_extent(&w->key));
bch2_bkey_devs(bkey_i_to_s_c(&w->key));
u64 seq = le64_to_cpu(w->data->seq);
u64 last_seq = le64_to_cpu(w->data->last_seq);
@ -1158,7 +1169,7 @@ static void journal_write_endio(struct bio *bio)
unsigned long flags;
spin_lock_irqsave(&j->err_lock, flags);
bch2_extent_drop_device(bkey_i_to_s_extent(&w->key), ca->dev_idx);
bch2_bkey_drop_device(bkey_i_to_s(&w->key), ca->dev_idx);
spin_unlock_irqrestore(&j->err_lock, flags);
}
@ -1175,6 +1186,7 @@ void bch2_journal_write(struct closure *cl)
struct jset *jset;
struct bio *bio;
struct bch_extent_ptr *ptr;
bool validate_before_checksum = false;
unsigned i, sectors, bytes;
journal_buf_realloc(j, w);
@ -1196,12 +1208,22 @@ void bch2_journal_write(struct closure *cl)
jset->read_clock = cpu_to_le16(c->bucket_clock[READ].hand);
jset->write_clock = cpu_to_le16(c->bucket_clock[WRITE].hand);
jset->magic = cpu_to_le64(jset_magic(c));
jset->version = cpu_to_le32(BCACHE_JSET_VERSION);
jset->version = c->sb.version < bcachefs_metadata_version_new_versioning
? cpu_to_le32(BCH_JSET_VERSION_OLD)
: cpu_to_le32(c->sb.version);
SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) &&
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
validate_before_checksum = true;
if (le32_to_cpu(jset->version) <
bcachefs_metadata_version_bkey_renumber)
validate_before_checksum = true;
if (validate_before_checksum &&
jset_validate_entries(c, jset, WRITE))
goto err;
@ -1212,7 +1234,7 @@ void bch2_journal_write(struct closure *cl)
jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
journal_nonce(jset), jset);
if (!bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) &&
if (!validate_before_checksum &&
jset_validate_entries(c, jset, WRITE))
goto err;

View File

@ -14,7 +14,7 @@
#include "replicas.h"
#include "super-io.h"
static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e,
static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
unsigned dev_idx, int flags, bool metadata)
{
unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
@ -22,9 +22,9 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e,
unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED;
unsigned nr_good;
bch2_extent_drop_device(e, dev_idx);
bch2_bkey_drop_device(k, dev_idx);
nr_good = bch2_extent_durability(c, e.c);
nr_good = bch2_bkey_durability(c, k.s_c);
if ((!nr_good && !(flags & lost)) ||
(nr_good < replicas && !(flags & degraded)))
return -EINVAL;
@ -35,7 +35,6 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e,
static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
struct bkey_s_c k;
struct bkey_s_extent e;
BKEY_PADDED(key) tmp;
struct btree_iter iter;
int ret = 0;
@ -50,7 +49,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
!(ret = btree_iter_err(k))) {
if (!bkey_extent_is_data(k.k) ||
!bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k);
ret = bch2_mark_bkey_replicas(c, k);
if (ret)
break;
bch2_btree_iter_next(&iter);
@ -58,18 +57,18 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
}
bkey_reassemble(&tmp.key, k);
e = bkey_i_to_s_extent(&tmp.key);
ret = drop_dev_ptrs(c, e, dev_idx, flags, false);
ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.key),
dev_idx, flags, false);
if (ret)
break;
/*
* If the new extent no longer has any pointers, bch2_extent_normalize()
* will do the appropriate thing with it (turning it into a
* KEY_TYPE_ERROR key, or just a discard if it was a cached extent)
* KEY_TYPE_error key, or just a discard if it was a cached extent)
*/
bch2_extent_normalize(c, e.s);
bch2_extent_normalize(c, bkey_i_to_s(&tmp.key));
iter.pos = bkey_start_pos(&tmp.key.k);
@ -117,7 +116,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
struct bkey_i_extent *new_key;
struct bkey_i_btree_ptr *new_key;
retry:
if (!bch2_extent_has_device(bkey_i_to_s_c_extent(&b->key),
dev_idx)) {
@ -129,15 +128,14 @@ retry:
*/
bch2_btree_iter_downgrade(&iter);
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key));
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
if (ret)
goto err;
} else {
bkey_copy(&tmp.k, &b->key);
new_key = bkey_i_to_extent(&tmp.k);
new_key = bkey_i_to_btree_ptr(&tmp.k);
ret = drop_dev_ptrs(c, extent_i_to_s(new_key),
ret = drop_dev_ptrs(c, bkey_i_to_s(&new_key->k_i),
dev_idx, flags, true);
if (ret)
goto err;

View File

@ -100,8 +100,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bch2_cut_back(insert->k.p, &new->k);
if (m->data_cmd == DATA_REWRITE)
bch2_extent_drop_device(extent_i_to_s(insert),
m->data_opts.rewrite_dev);
bch2_bkey_drop_device(extent_i_to_s(insert).s,
m->data_opts.rewrite_dev);
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
if (bch2_extent_has_device(extent_i_to_s_c(insert), p.ptr.dev)) {
@ -132,8 +132,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
* has fewer replicas than when we last looked at it - meaning
* we need to get a disk reservation here:
*/
nr = bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) -
(bch2_extent_nr_dirty_ptrs(k) + m->nr_ptrs_reserved);
nr = bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) -
(bch2_bkey_nr_dirty_ptrs(k) + m->nr_ptrs_reserved);
if (nr > 0) {
/*
* can't call bch2_disk_reservation_add() with btree
@ -243,7 +243,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
switch (data_cmd) {
case DATA_ADD_REPLICAS: {
int nr = (int) io_opts.data_replicas -
bch2_extent_nr_dirty_ptrs(k);
bch2_bkey_nr_dirty_ptrs(k);
if (nr > 0) {
m->op.nr_replicas = m->nr_ptrs_reserved = nr;
@ -478,7 +478,6 @@ int bch2_move_data(struct bch_fs *c,
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
BKEY_PADDED(k) tmp;
struct bkey_s_c k;
struct bkey_s_c_extent e;
struct data_opts data_opts;
enum data_cmd data_cmd;
u64 delay, cur_inum = U64_MAX;
@ -531,8 +530,6 @@ peek:
if (!bkey_extent_is_data(k.k))
goto next_nondata;
e = bkey_s_c_to_extent(k);
if (cur_inum != k.k->p.inode) {
struct bch_inode_unpacked inode;
@ -546,8 +543,7 @@ peek:
goto peek;
}
switch ((data_cmd = pred(c, arg, BKEY_TYPE_EXTENTS, e,
&io_opts, &data_opts))) {
switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {
case DATA_SKIP:
goto next;
case DATA_SCRUB:
@ -582,7 +578,7 @@ peek:
if (rate)
bch2_ratelimit_increment(rate, k.k->size);
next:
atomic64_add(k.k->size * bch2_extent_nr_dirty_ptrs(k),
atomic64_add(k.k->size * bch2_bkey_nr_dirty_ptrs(k),
&stats->sectors_seen);
next_nondata:
bch2_btree_iter_next(&stats->iter);
@ -614,7 +610,7 @@ static int bch2_gc_data_replicas(struct bch_fs *c)
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
BTREE_ITER_PREFETCH, k) {
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k);
ret = bch2_mark_bkey_replicas(c, k);
if (ret)
break;
}
@ -638,8 +634,7 @@ static int bch2_gc_btree_replicas(struct bch_fs *c)
for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key));
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
bch2_btree_iter_cond_resched(&iter);
}
@ -669,10 +664,9 @@ static int bch2_move_btree(struct bch_fs *c,
for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
switch ((cmd = pred(c, arg, BKEY_TYPE_BTREE,
bkey_i_to_s_c_extent(&b->key),
&io_opts,
&data_opts))) {
switch ((cmd = pred(c, arg,
bkey_i_to_s_c(&b->key),
&io_opts, &data_opts))) {
case DATA_SKIP:
goto next;
case DATA_SCRUB:
@ -698,8 +692,7 @@ next:
#if 0
static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
enum bkey_type type,
struct bkey_s_c_extent e,
struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
@ -708,33 +701,38 @@ static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
#endif
static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
enum bkey_type type,
struct bkey_s_c_extent e,
struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
unsigned nr_good = bch2_extent_durability(c, e);
unsigned replicas = type == BKEY_TYPE_BTREE
? c->opts.metadata_replicas
: io_opts->data_replicas;
unsigned nr_good = bch2_bkey_durability(c, k);
unsigned replicas = 0;
switch (k.k->type) {
case KEY_TYPE_btree_ptr:
replicas = c->opts.metadata_replicas;
break;
case KEY_TYPE_extent:
replicas = io_opts->data_replicas;
break;
}
if (!nr_good || nr_good >= replicas)
return DATA_SKIP;
data_opts->target = 0;
data_opts->btree_insert_flags = 0;
data_opts->btree_insert_flags = 0;
return DATA_ADD_REPLICAS;
}
static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
enum bkey_type type,
struct bkey_s_c_extent e,
struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
struct bch_ioctl_data *op = arg;
if (!bch2_extent_has_device(e, op->migrate.dev))
if (!bch2_bkey_has_device(k, op->migrate.dev))
return DATA_SKIP;
data_opts->target = 0;

View File

@ -46,7 +46,7 @@ int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
struct bkey_s_c);
typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
enum bkey_type, struct bkey_s_c_extent,
struct bkey_s_c,
struct bch_io_opts *, struct data_opts *);
int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,

View File

@ -65,36 +65,42 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
}
static bool __copygc_pred(struct bch_dev *ca,
struct bkey_s_c_extent e)
struct bkey_s_c k)
{
copygc_heap *h = &ca->copygc_heap;
const struct bch_extent_ptr *ptr =
bch2_extent_has_device(e, ca->dev_idx);
if (ptr) {
struct copygc_heap_entry search = { .offset = ptr->offset };
switch (k.k->type) {
case KEY_TYPE_extent: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr =
bch2_extent_has_device(e, ca->dev_idx);
ssize_t i = eytzinger0_find_le(h->data, h->used,
sizeof(h->data[0]),
bucket_offset_cmp, &search);
if (ptr) {
struct copygc_heap_entry search = { .offset = ptr->offset };
return (i >= 0 &&
ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
ptr->gen == h->data[i].gen);
ssize_t i = eytzinger0_find_le(h->data, h->used,
sizeof(h->data[0]),
bucket_offset_cmp, &search);
return (i >= 0 &&
ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
ptr->gen == h->data[i].gen);
}
break;
}
}
return false;
}
static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
enum bkey_type type,
struct bkey_s_c_extent e,
struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
struct bch_dev *ca = arg;
if (!__copygc_pred(ca, e))
if (!__copygc_pred(ca, k))
return DATA_SKIP;
data_opts->target = dev_to_target(ca->dev_idx);

View File

@ -180,6 +180,9 @@ enum opt_type {
OPT_BOOL(), \
NO_SB_OPT, false) \
BCH_OPT(nostart, u8, OPT_INTERNAL, \
OPT_BOOL(), \
NO_SB_OPT, false) \
BCH_OPT(version_upgrade, u8, OPT_MOUNT, \
OPT_BOOL(), \
NO_SB_OPT, false)

View File

@ -21,23 +21,13 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = {
const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_s_c_quota dq;
if (k.k->p.inode >= QTYP_NR)
return "invalid quota type";
switch (k.k->type) {
case BCH_QUOTA: {
dq = bkey_s_c_to_quota(k);
if (bkey_val_bytes(k.k) != sizeof(struct bch_quota))
return "incorrect value size";
if (bkey_val_bytes(k.k) != sizeof(struct bch_quota))
return "incorrect value size";
return NULL;
}
default:
return "invalid type";
}
return NULL;
}
static const char * const bch2_quota_counters[] = {
@ -48,20 +38,14 @@ static const char * const bch2_quota_counters[] = {
void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
struct bkey_s_c_quota dq;
struct bkey_s_c_quota dq = bkey_s_c_to_quota(k);
unsigned i;
switch (k.k->type) {
case BCH_QUOTA:
dq = bkey_s_c_to_quota(k);
for (i = 0; i < Q_COUNTERS; i++)
pr_buf(out, "%s hardlimit %llu softlimit %llu",
bch2_quota_counters[i],
le64_to_cpu(dq.v->c[i].hardlimit),
le64_to_cpu(dq.v->c[i].softlimit));
break;
}
for (i = 0; i < Q_COUNTERS; i++)
pr_buf(out, "%s hardlimit %llu softlimit %llu",
bch2_quota_counters[i],
le64_to_cpu(dq.v->c[i].hardlimit),
le64_to_cpu(dq.v->c[i].softlimit));
}
#ifdef CONFIG_BCACHEFS_QUOTA
@ -177,7 +161,7 @@ static int bch2_quota_check_limit(struct bch_fs *c,
BUG_ON((s64) n < 0);
if (mode == BCH_QUOTA_NOCHECK)
if (mode == KEY_TYPE_QUOTA_NOCHECK)
return 0;
if (v <= 0) {
@ -200,7 +184,7 @@ static int bch2_quota_check_limit(struct bch_fs *c,
if (qc->hardlimit &&
qc->hardlimit < n &&
!ignore_hardlimit(q)) {
if (mode == BCH_QUOTA_PREALLOC)
if (mode == KEY_TYPE_QUOTA_PREALLOC)
return -EDQUOT;
prepare_warning(qc, qtype, counter, msgs, HARDWARN);
@ -211,7 +195,7 @@ static int bch2_quota_check_limit(struct bch_fs *c,
qc->timer &&
ktime_get_real_seconds() >= qc->timer &&
!ignore_hardlimit(q)) {
if (mode == BCH_QUOTA_PREALLOC)
if (mode == KEY_TYPE_QUOTA_PREALLOC)
return -EDQUOT;
prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN);
@ -220,7 +204,7 @@ static int bch2_quota_check_limit(struct bch_fs *c,
if (qc->softlimit &&
qc->softlimit < n &&
qc->timer == 0) {
if (mode == BCH_QUOTA_PREALLOC)
if (mode == KEY_TYPE_QUOTA_PREALLOC)
return -EDQUOT;
prepare_warning(qc, qtype, counter, msgs, SOFTWARN);
@ -311,13 +295,13 @@ int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC,
dst_q[i]->c[Q_SPC].v + space,
BCH_QUOTA_PREALLOC);
KEY_TYPE_QUOTA_PREALLOC);
if (ret)
goto err;
ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO,
dst_q[i]->c[Q_INO].v + 1,
BCH_QUOTA_PREALLOC);
KEY_TYPE_QUOTA_PREALLOC);
if (ret)
goto err;
}
@ -346,7 +330,7 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
BUG_ON(k.k->p.inode >= QTYP_NR);
switch (k.k->type) {
case BCH_QUOTA:
case KEY_TYPE_quota:
dq = bkey_s_c_to_quota(k);
q = &c->quotas[k.k->p.inode];
@ -446,15 +430,15 @@ int bch2_fs_quota_read(struct bch_fs *c)
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN,
BTREE_ITER_PREFETCH, k) {
switch (k.k->type) {
case BCH_INODE_FS:
case KEY_TYPE_inode:
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
if (ret)
return ret;
bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors,
BCH_QUOTA_NOCHECK);
KEY_TYPE_QUOTA_NOCHECK);
bch2_quota_acct(c, bch_qid(&u), Q_INO, 1,
BCH_QUOTA_NOCHECK);
KEY_TYPE_QUOTA_NOCHECK);
}
}
return bch2_btree_iter_unlock(&iter) ?: ret;
@ -699,22 +683,19 @@ static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid,
struct bch_fs *c = sb->s_fs_info;
struct bch_memquota_type *q = &c->quotas[kqid->type];
qid_t qid = from_kqid(&init_user_ns, *kqid);
struct genradix_iter iter = genradix_iter_init(&q->table, qid);
struct genradix_iter iter;
struct bch_memquota *mq;
int ret = 0;
mutex_lock(&q->lock);
while ((mq = genradix_iter_peek(&iter, &q->table))) {
genradix_for_each_from(&q->table, iter, mq, qid)
if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) {
__bch2_quota_get(qdq, mq);
*kqid = make_kqid(current_user_ns(), kqid->type, iter.pos);
goto found;
}
genradix_iter_advance(&iter, &q->table);
}
ret = -ENOENT;
found:
mutex_unlock(&q->lock);
@ -745,7 +726,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
return ret;
switch (k.k->type) {
case BCH_QUOTA:
case KEY_TYPE_quota:
new_quota.v = *bkey_s_c_to_quota(k).v;
break;
}

View File

@ -9,15 +9,15 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
const char *bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_quota_ops (struct bkey_ops) { \
#define bch2_bkey_ops_quota (struct bkey_ops) { \
.key_invalid = bch2_quota_invalid, \
.val_to_text = bch2_quota_to_text, \
}
enum quota_acct_mode {
BCH_QUOTA_PREALLOC,
BCH_QUOTA_WARN,
BCH_QUOTA_NOCHECK,
KEY_TYPE_QUOTA_PREALLOC,
KEY_TYPE_QUOTA_WARN,
KEY_TYPE_QUOTA_NOCHECK,
};
static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u)

View File

@ -69,28 +69,34 @@ void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors)
}
static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
enum bkey_type type,
struct bkey_s_c_extent e,
struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
switch (k.k->type) {
case KEY_TYPE_extent: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
/* Make sure we have room to add a new pointer: */
if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
BKEY_EXTENT_VAL_U64s_MAX)
return DATA_SKIP;
extent_for_each_ptr_decode(e, p, entry)
if (rebalance_ptr_pred(c, p, io_opts))
goto found;
/* Make sure we have room to add a new pointer: */
if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
BKEY_EXTENT_VAL_U64s_MAX)
return DATA_SKIP;
extent_for_each_ptr_decode(e, p, entry)
if (rebalance_ptr_pred(c, p, io_opts))
goto found;
return DATA_SKIP;
found:
data_opts->target = io_opts->background_target;
data_opts->btree_insert_flags = 0;
return DATA_ADD_REPLICAS;
data_opts->target = io_opts->background_target;
data_opts->btree_insert_flags = 0;
return DATA_ADD_REPLICAS;
}
default:
return DATA_SKIP;
}
}
struct rebalance_work {

View File

@ -146,6 +146,10 @@ int bch2_fs_recovery(struct bch_fs *c)
mutex_unlock(&c->sb_lock);
goto err;
}
if (le16_to_cpu(c->disk_sb.sb->version) <
bcachefs_metadata_version_bkey_renumber)
bch2_sb_clean_renumber(clean, READ);
}
mutex_unlock(&c->sb_lock);
@ -264,12 +268,18 @@ int bch2_fs_recovery(struct bch_fs *c)
if (ret)
goto err;
if (!test_bit(BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags)) {
mutex_lock(&c->sb_lock);
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
mutex_unlock(&c->sb_lock);
mutex_lock(&c->sb_lock);
if (c->opts.version_upgrade) {
if (c->sb.version < bcachefs_metadata_version_new_versioning)
c->disk_sb.sb->version_min =
le16_to_cpu(bcachefs_metadata_version_min);
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
}
if (!test_bit(BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags))
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
mutex_unlock(&c->sb_lock);
if (enabled_qtypes(c)) {
bch_verbose(c, "reading quotas:");
ret = bch2_fs_quota_read(c);
@ -304,6 +314,9 @@ int bch2_fs_initialize(struct bch_fs *c)
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
for (i = 0; i < BTREE_ID_NR; i++)
bch2_btree_root_alloc(c, i);
ret = bch2_initial_gc(c, &journal);
if (ret)
goto err;
@ -315,9 +328,6 @@ int bch2_fs_initialize(struct bch_fs *c)
goto err;
}
for (i = 0; i < BTREE_ID_NR; i++)
bch2_btree_root_alloc(c, i);
/*
* journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer:
@ -378,9 +388,12 @@ int bch2_fs_initialize(struct bch_fs *c)
goto err;
mutex_lock(&c->sb_lock);
c->disk_sb.sb->version = c->disk_sb.sb->version_min =
le16_to_cpu(bcachefs_metadata_version_current);
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
bch2_write_super(c);
mutex_unlock(&c->sb_lock);

View File

@ -72,64 +72,57 @@ void bch2_cpu_replicas_to_text(struct printbuf *out,
static void extent_to_replicas(struct bkey_s_c k,
struct bch_replicas_entry *r)
{
if (bkey_extent_is_data(k.k)) {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
r->nr_required = 1;
r->nr_required = 1;
extent_for_each_ptr_decode(e, p, entry) {
if (p.ptr.cached)
continue;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
if (p.ptr.cached)
continue;
if (p.ec_nr) {
r->nr_devs = 0;
break;
}
r->devs[r->nr_devs++] = p.ptr.dev;
if (p.ec_nr) {
r->nr_devs = 0;
break;
}
r->devs[r->nr_devs++] = p.ptr.dev;
}
}
static void stripe_to_replicas(struct bkey_s_c k,
struct bch_replicas_entry *r)
{
if (k.k->type == BCH_STRIPE) {
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
const struct bch_extent_ptr *ptr;
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
const struct bch_extent_ptr *ptr;
r->nr_required = s.v->nr_blocks - s.v->nr_redundant;
r->nr_required = s.v->nr_blocks - s.v->nr_redundant;
for (ptr = s.v->ptrs;
ptr < s.v->ptrs + s.v->nr_blocks;
ptr++)
r->devs[r->nr_devs++] = ptr->dev;
}
for (ptr = s.v->ptrs;
ptr < s.v->ptrs + s.v->nr_blocks;
ptr++)
r->devs[r->nr_devs++] = ptr->dev;
}
static void bkey_to_replicas(enum bkey_type type,
struct bkey_s_c k,
static void bkey_to_replicas(struct bkey_s_c k,
struct bch_replicas_entry *e)
{
e->nr_devs = 0;
switch (type) {
case BKEY_TYPE_BTREE:
switch (k.k->type) {
case KEY_TYPE_btree_ptr:
e->data_type = BCH_DATA_BTREE;
extent_to_replicas(k, e);
break;
case BKEY_TYPE_EXTENTS:
case KEY_TYPE_extent:
e->data_type = BCH_DATA_USER;
extent_to_replicas(k, e);
break;
case BKEY_TYPE_EC:
case KEY_TYPE_stripe:
e->data_type = BCH_DATA_USER;
stripe_to_replicas(k, e);
break;
default:
break;
}
replicas_entry_sort(e);
@ -295,26 +288,21 @@ int bch2_mark_replicas(struct bch_fs *c,
return __bch2_mark_replicas(c, &search.e);
}
int bch2_mark_bkey_replicas(struct bch_fs *c,
enum bkey_type type,
struct bkey_s_c k)
int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
{
struct bch_replicas_entry_padded search;
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
unsigned i;
int ret;
memset(&search, 0, sizeof(search));
if (type == BKEY_TYPE_EXTENTS) {
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
unsigned i;
for (i = 0; i < cached.nr; i++)
if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
bch2_dev_list_single(cached.devs[i]))))
return ret;
for (i = 0; i < cached.nr; i++)
if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
bch2_dev_list_single(cached.devs[i]))))
return ret;
}
bkey_to_replicas(type, k, &search.e);
bkey_to_replicas(k, &search.e);
return search.e.nr_devs
? __bch2_mark_replicas(c, &search.e)
@ -718,26 +706,22 @@ bool bch2_replicas_marked(struct bch_fs *c,
}
bool bch2_bkey_replicas_marked(struct bch_fs *c,
enum bkey_type type,
struct bkey_s_c k,
bool check_gc_replicas)
{
struct bch_replicas_entry_padded search;
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
unsigned i;
memset(&search, 0, sizeof(search));
if (type == BKEY_TYPE_EXTENTS) {
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
unsigned i;
for (i = 0; i < cached.nr; i++)
if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
bch2_dev_list_single(cached.devs[i]),
check_gc_replicas))
return false;
for (i = 0; i < cached.nr; i++)
if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
bch2_dev_list_single(cached.devs[i]),
check_gc_replicas))
return false;
}
bkey_to_replicas(type, k, &search.e);
bkey_to_replicas(k, &search.e);
return search.e.nr_devs
? replicas_has_entry(c, &search.e, check_gc_replicas)

View File

@ -5,12 +5,11 @@
bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
struct bch_devs_list, bool);
bool bch2_bkey_replicas_marked(struct bch_fs *, enum bkey_type,
bool bch2_bkey_replicas_marked(struct bch_fs *,
struct bkey_s_c, bool);
int bch2_mark_replicas(struct bch_fs *, enum bch_data_type,
struct bch_devs_list);
int bch2_mark_bkey_replicas(struct bch_fs *, enum bkey_type,
struct bkey_s_c);
int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);

View File

@ -117,7 +117,6 @@ static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
struct bch_hash_desc {
enum btree_id btree_id;
u8 key_type;
u8 whiteout_type;
u64 (*hash_key)(const struct bch_hash_info *, const void *);
u64 (*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c);
@ -148,7 +147,7 @@ bch2_hash_lookup(struct btree_trans *trans,
if (k.k->type == desc.key_type) {
if (!desc.cmp_key(k, key))
return iter;
} else if (k.k->type == desc.whiteout_type) {
} else if (k.k->type == KEY_TYPE_whiteout) {
;
} else {
/* hole, not found */
@ -201,7 +200,7 @@ static inline int bch2_hash_needs_whiteout(struct btree_trans *trans,
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) {
if (k.k->type != desc.key_type &&
k.k->type != desc.whiteout_type)
k.k->type != KEY_TYPE_whiteout)
return false;
if (k.k->type == desc.key_type &&
@ -244,7 +243,7 @@ static inline int __bch2_hash_set(struct btree_trans *trans,
return PTR_ERR(slot);
}
if (k.k->type != desc.whiteout_type)
if (k.k->type != KEY_TYPE_whiteout)
goto not_found;
}
@ -294,7 +293,7 @@ static inline int bch2_hash_delete_at(struct btree_trans *trans,
bkey_init(&delete->k);
delete->k.p = iter->pos;
delete->k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED;
delete->k.type = ret ? KEY_TYPE_whiteout : KEY_TYPE_deleted;
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, delete));
return 0;

View File

@ -232,21 +232,25 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
struct bch_sb_field *f;
struct bch_sb_field_members *mi;
const char *err;
u32 version, version_min;
u16 block_size;
if (le64_to_cpu(sb->version) < BCH_SB_VERSION_MIN ||
le64_to_cpu(sb->version) > BCH_SB_VERSION_MAX)
return"Unsupported superblock version";
version = le16_to_cpu(sb->version);
version_min = version >= bcachefs_metadata_version_new_versioning
? le16_to_cpu(sb->version_min)
: version;
if (version >= bcachefs_metadata_version_max ||
version_min < bcachefs_metadata_version_min)
return "Unsupported superblock version";
if (version_min > version)
return "Bad minimum version";
if (sb->features[1] ||
(le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR)))
return "Filesystem has incompatible features";
if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) {
SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb, 7);
SET_BCH_SB_POSIX_ACL(sb, 1);
}
block_size = le16_to_cpu(sb->block_size);
if (!is_power_of_2(block_size) ||
@ -333,13 +337,6 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
return err;
}
if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_NONCE_V1 &&
bch2_sb_get_crypt(sb) &&
BCH_SB_INITIALIZED(sb))
return "Incompatible extent nonces";
sb->version = cpu_to_le64(BCH_SB_VERSION_MAX);
return NULL;
}
@ -356,6 +353,7 @@ static void bch2_sb_update(struct bch_fs *c)
c->sb.uuid = src->uuid;
c->sb.user_uuid = src->user_uuid;
c->sb.version = le16_to_cpu(src->version);
c->sb.nr_devices = src->nr_devices;
c->sb.clean = BCH_SB_CLEAN(src);
c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src);
@ -377,6 +375,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
unsigned i;
dst->version = src->version;
dst->version_min = src->version_min;
dst->seq = src->seq;
dst->uuid = src->uuid;
dst->user_uuid = src->user_uuid;
@ -476,8 +475,8 @@ reread:
if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC))
return "Not a bcachefs superblock";
if (le64_to_cpu(sb->sb->version) < BCH_SB_VERSION_MIN ||
le64_to_cpu(sb->sb->version) > BCH_SB_VERSION_MAX)
if (le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_min ||
le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max)
return "Unsupported superblock version";
bytes = vstruct_bytes(sb->sb);
@ -843,12 +842,6 @@ static const char *bch2_sb_validate_members(struct bch_sb *sb,
return "bucket size smaller than btree node size";
}
if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX)
for (m = mi->members;
m < mi->members + sb->nr_devices;
m++)
SET_BCH_MEMBER_DATA_ALLOWED(m, ~0);
return NULL;
}
@ -878,6 +871,16 @@ static const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
/* BCH_SB_FIELD_clean: */
void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
{
struct jset_entry *entry;
for (entry = clean->start;
entry < (struct jset_entry *) vstruct_end(&clean->field);
entry = vstruct_next(entry))
bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write);
}
void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
{
struct bch_sb_field_clean *sb_clean;
@ -932,6 +935,10 @@ void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
BUG_ON(entry != vstruct_end(&sb_clean->field));
if (le16_to_cpu(c->disk_sb.sb->version) <
bcachefs_metadata_version_bkey_renumber)
bch2_sb_clean_renumber(sb_clean, WRITE);
mutex_unlock(&c->btree_root_lock);
write_super:
bch2_write_super(c);

View File

@ -134,6 +134,8 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
/* BCH_SB_FIELD_clean: */
void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
void bch2_fs_mark_clean(struct bch_fs *, bool);
void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,

View File

@ -9,6 +9,7 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
#include "bkey_sort.h"
#include "btree_cache.h"
#include "btree_gc.h"
#include "btree_update_interior.h"
@ -580,7 +581,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
INIT_LIST_HEAD(&c->ec_new_stripe_list);
mutex_init(&c->ec_new_stripe_lock);
mutex_init(&c->ec_stripes_lock);
mutex_init(&c->ec_stripe_create_lock);
spin_lock_init(&c->ec_stripes_heap_lock);
seqcount_init(&c->gc_pos_lock);

View File

@ -276,7 +276,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
return -EPERM;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
if (k.k->type == BCH_EXTENT) {
if (k.k->type == KEY_TYPE_extent) {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;

View File

@ -61,8 +61,7 @@ static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
const struct bch_hash_desc bch2_xattr_hash_desc = {
.btree_id = BTREE_ID_XATTRS,
.key_type = BCH_XATTR,
.whiteout_type = BCH_XATTR_WHITEOUT,
.key_type = KEY_TYPE_xattr,
.hash_key = xattr_hash_key,
.hash_bkey = xattr_hash_bkey,
.cmp_key = xattr_cmp_key,
@ -72,71 +71,50 @@ const struct bch_hash_desc bch2_xattr_hash_desc = {
const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
const struct xattr_handler *handler;
struct bkey_s_c_xattr xattr;
struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
switch (k.k->type) {
case BCH_XATTR:
if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
return "value too small";
if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
return "value too small";
xattr = bkey_s_c_to_xattr(k);
if (bkey_val_u64s(k.k) <
xattr_val_u64s(xattr.v->x_name_len,
le16_to_cpu(xattr.v->x_val_len)))
return "value too small";
if (bkey_val_u64s(k.k) <
xattr_val_u64s(xattr.v->x_name_len,
le16_to_cpu(xattr.v->x_val_len)))
return "value too small";
if (bkey_val_u64s(k.k) >
xattr_val_u64s(xattr.v->x_name_len,
le16_to_cpu(xattr.v->x_val_len) + 4))
return "value too big";
if (bkey_val_u64s(k.k) >
xattr_val_u64s(xattr.v->x_name_len,
le16_to_cpu(xattr.v->x_val_len) + 4))
return "value too big";
handler = bch2_xattr_type_to_handler(xattr.v->x_type);
if (!handler)
return "invalid type";
if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len))
return "xattr name has invalid characters";
return NULL;
case BCH_XATTR_WHITEOUT:
return bkey_val_bytes(k.k) != 0
? "value size should be zero"
: NULL;
default:
handler = bch2_xattr_type_to_handler(xattr.v->x_type);
if (!handler)
return "invalid type";
}
if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len))
return "xattr name has invalid characters";
return NULL;
}
void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
const struct xattr_handler *handler;
struct bkey_s_c_xattr xattr;
struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
switch (k.k->type) {
case BCH_XATTR:
xattr = bkey_s_c_to_xattr(k);
handler = bch2_xattr_type_to_handler(xattr.v->x_type);
if (handler && handler->prefix)
pr_buf(out, "%s", handler->prefix);
else if (handler)
pr_buf(out, "(type %u)", xattr.v->x_type);
else
pr_buf(out, "(unknown type %u)", xattr.v->x_type);
handler = bch2_xattr_type_to_handler(xattr.v->x_type);
if (handler && handler->prefix)
pr_buf(out, "%s", handler->prefix);
else if (handler)
pr_buf(out, "(type %u)", xattr.v->x_type);
else
pr_buf(out, "(unknown type %u)", xattr.v->x_type);
bch_scnmemcpy(out, xattr.v->x_name,
xattr.v->x_name_len);
pr_buf(out, ":");
bch_scnmemcpy(out, xattr_val(xattr.v),
le16_to_cpu(xattr.v->x_val_len));
break;
case BCH_XATTR_WHITEOUT:
pr_buf(out, "whiteout");
break;
}
bch_scnmemcpy(out, xattr.v->x_name,
xattr.v->x_name_len);
pr_buf(out, ":");
bch_scnmemcpy(out, xattr_val(xattr.v),
le16_to_cpu(xattr.v->x_val_len));
}
int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
@ -260,7 +238,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
if (k.k->p.inode > inum)
break;
if (k.k->type != BCH_XATTR)
if (k.k->type != KEY_TYPE_xattr)
continue;
xattr = bkey_s_c_to_xattr(k).v;
@ -313,7 +291,7 @@ static const struct xattr_handler bch_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
.get = bch2_xattr_get_handler,
.set = bch2_xattr_set_handler,
.flags = BCH_XATTR_INDEX_USER,
.flags = KEY_TYPE_XATTR_INDEX_USER,
};
static bool bch2_xattr_trusted_list(struct dentry *dentry)
@ -326,14 +304,14 @@ static const struct xattr_handler bch_xattr_trusted_handler = {
.list = bch2_xattr_trusted_list,
.get = bch2_xattr_get_handler,
.set = bch2_xattr_set_handler,
.flags = BCH_XATTR_INDEX_TRUSTED,
.flags = KEY_TYPE_XATTR_INDEX_TRUSTED,
};
static const struct xattr_handler bch_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.get = bch2_xattr_get_handler,
.set = bch2_xattr_set_handler,
.flags = BCH_XATTR_INDEX_SECURITY,
.flags = KEY_TYPE_XATTR_INDEX_SECURITY,
};
#ifndef NO_BCACHEFS_FS
@ -471,13 +449,13 @@ const struct xattr_handler *bch2_xattr_handlers[] = {
};
static const struct xattr_handler *bch_xattr_handler_map[] = {
[BCH_XATTR_INDEX_USER] = &bch_xattr_user_handler,
[BCH_XATTR_INDEX_POSIX_ACL_ACCESS] =
[KEY_TYPE_XATTR_INDEX_USER] = &bch_xattr_user_handler,
[KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS] =
&posix_acl_access_xattr_handler,
[BCH_XATTR_INDEX_POSIX_ACL_DEFAULT] =
[KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT] =
&posix_acl_default_xattr_handler,
[BCH_XATTR_INDEX_TRUSTED] = &bch_xattr_trusted_handler,
[BCH_XATTR_INDEX_SECURITY] = &bch_xattr_security_handler,
[KEY_TYPE_XATTR_INDEX_TRUSTED] = &bch_xattr_trusted_handler,
[KEY_TYPE_XATTR_INDEX_SECURITY] = &bch_xattr_security_handler,
};
static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type)

View File

@ -8,7 +8,7 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc;
const char *bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_xattr_ops (struct bkey_ops) { \
#define bch2_bkey_ops_xattr (struct bkey_ops) { \
.key_invalid = bch2_xattr_invalid, \
.val_to_text = bch2_xattr_to_text, \
}