Update bcachefs sources to 6d9ff21de7 bcachefs: Kill journal buf bloom filter

This commit is contained in:
Kent Overstreet 2021-11-05 15:08:36 -04:00
parent e87deba8e4
commit 5b84952401
19 changed files with 340 additions and 295 deletions

View File

@ -1 +1 @@
5fd0c701024d14d7e493780354a150d0e20cf0a6
6d9ff21de70cf194bd9f783172bd6cac5cbce3eb

View File

@ -147,10 +147,44 @@ static int bch2_alloc_unpack_v2(struct bkey_alloc_unpacked *out,
return 0;
}
static void bch2_alloc_pack_v2(struct bkey_alloc_buf *dst,
static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
struct bkey_s_c k)
{
struct bkey_s_c_alloc_v3 a = bkey_s_c_to_alloc_v3(k);
const u8 *in = a.v->data;
const u8 *end = bkey_val_end(a);
unsigned fieldnr = 0;
int ret;
u64 v;
out->gen = a.v->gen;
out->oldest_gen = a.v->oldest_gen;
out->data_type = a.v->data_type;
out->journal_seq = le64_to_cpu(a.v->journal_seq);
#define x(_name, _bits) \
if (fieldnr < a.v->nr_fields) { \
ret = bch2_varint_decode_fast(in, end, &v); \
if (ret < 0) \
return ret; \
in += ret; \
} else { \
v = 0; \
} \
out->_name = v; \
if (v != out->_name) \
return -1; \
fieldnr++;
BCH_ALLOC_FIELDS_V2()
#undef x
return 0;
}
static void bch2_alloc_pack_v3(struct bkey_alloc_buf *dst,
const struct bkey_alloc_unpacked src)
{
struct bkey_i_alloc_v2 *a = bkey_alloc_v2_init(&dst->k);
struct bkey_i_alloc_v3 *a = bkey_alloc_v3_init(&dst->k);
unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
u8 *out = a->v.data;
u8 *end = (void *) &dst[1];
@ -161,6 +195,7 @@ static void bch2_alloc_pack_v2(struct bkey_alloc_buf *dst,
a->v.gen = src.gen;
a->v.oldest_gen = src.oldest_gen;
a->v.data_type = src.data_type;
a->v.journal_seq = cpu_to_le64(src.journal_seq);
#define x(_name, _bits) \
nr_fields++; \
@ -194,10 +229,17 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
.gen = 0,
};
if (k.k->type == KEY_TYPE_alloc_v2)
bch2_alloc_unpack_v2(&ret, k);
else if (k.k->type == KEY_TYPE_alloc)
switch (k.k->type) {
case KEY_TYPE_alloc:
bch2_alloc_unpack_v1(&ret, k);
break;
case KEY_TYPE_alloc_v2:
bch2_alloc_unpack_v2(&ret, k);
break;
case KEY_TYPE_alloc_v3:
bch2_alloc_unpack_v3(&ret, k);
break;
}
return ret;
}
@ -206,7 +248,7 @@ void bch2_alloc_pack(struct bch_fs *c,
struct bkey_alloc_buf *dst,
const struct bkey_alloc_unpacked src)
{
bch2_alloc_pack_v2(dst, src);
bch2_alloc_pack_v3(dst, src);
}
static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
@ -249,13 +291,28 @@ const char *bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k)
return NULL;
}
const char *bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_alloc_unpacked u;
if (k.k->p.inode >= c->sb.nr_devices ||
!c->devs[k.k->p.inode])
return "invalid device";
if (bch2_alloc_unpack_v3(&u, k))
return "unpack error";
return NULL;
}
void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
pr_buf(out, "gen %u oldest_gen %u data_type %s",
u.gen, u.oldest_gen, bch2_data_types[u.data_type]);
pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu",
u.gen, u.oldest_gen, bch2_data_types[u.data_type],
u.journal_seq);
#define x(_name, ...) pr_buf(out, " " #_name " %llu", (u64) u._name);
BCH_ALLOC_FIELDS_V2()
#undef x
@ -268,8 +325,7 @@ static int bch2_alloc_read_fn(struct btree_trans *trans, struct bkey_s_c k)
struct bucket *g;
struct bkey_alloc_unpacked u;
if (k.k->type != KEY_TYPE_alloc &&
k.k->type != KEY_TYPE_alloc_v2)
if (!bkey_is_alloc(k.k))
return 0;
ca = bch_dev_bkey_exists(c, k.k->p.inode);

View File

@ -9,6 +9,7 @@
extern const char * const bch2_allocator_states[];
struct bkey_alloc_unpacked {
u64 journal_seq;
u64 bucket;
u8 dev;
u8 gen;
@ -21,19 +22,11 @@ struct bkey_alloc_unpacked {
struct bkey_alloc_buf {
struct bkey_i k;
struct bch_alloc_v3 v;
union {
struct {
#define x(_name, _bits) + _bits / 8
u8 _pad[8 + BCH_ALLOC_FIELDS_V1()];
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
#undef x
} _v1;
struct {
#define x(_name, _bits) + 8 + _bits / 8
u8 _pad[8 + BCH_ALLOC_FIELDS_V2()];
#undef x
} _v2;
};
} __attribute__((packed, aligned(8)));
/* How out of date a pointer gen is allowed to be: */
@ -79,6 +72,7 @@ alloc_mem_to_key(struct btree_iter *iter,
const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c);
const char *bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c);
const char *bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_alloc (struct bkey_ops) { \
@ -91,6 +85,18 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
.val_to_text = bch2_alloc_to_text, \
}
#define bch2_bkey_ops_alloc_v3 (struct bkey_ops) { \
.key_invalid = bch2_alloc_v3_invalid, \
.val_to_text = bch2_alloc_to_text, \
}
static inline bool bkey_is_alloc(const struct bkey *k)
{
return k->type == KEY_TYPE_alloc ||
k->type == KEY_TYPE_alloc_v2 ||
k->type == KEY_TYPE_alloc_v3;
}
int bch2_alloc_read(struct bch_fs *);
static inline void bch2_wake_allocator(struct bch_dev *ca)

View File

@ -344,7 +344,9 @@ static inline void bkey_init(struct bkey *k)
x(indirect_inline_data, 19) \
x(alloc_v2, 20) \
x(subvolume, 21) \
x(snapshot, 22)
x(snapshot, 22) \
x(inode_v2, 23) \
x(alloc_v3, 24)
enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr,
@ -681,6 +683,16 @@ struct bch_inode {
__u8 fields[0];
} __attribute__((packed, aligned(8)));
struct bch_inode_v2 {
struct bch_val v;
__le64 bi_journal_seq;
__le64 bi_hash_seed;
__le64 bi_flags;
__le16 bi_mode;
__u8 fields[0];
} __attribute__((packed, aligned(8)));
struct bch_inode_generation {
struct bch_val v;
@ -772,6 +784,9 @@ LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24);
LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31);
/* Dirents */
/*
@ -866,6 +881,17 @@ struct bch_alloc_v2 {
x(stripe, 32) \
x(stripe_redundancy, 8)
struct bch_alloc_v3 {
struct bch_val v;
__le64 journal_seq;
__le32 flags;
__u8 nr_fields;
__u8 gen;
__u8 oldest_gen;
__u8 data_type;
__u8 data[];
} __attribute__((packed, aligned(8)));
enum {
#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
BCH_ALLOC_FIELDS_V1()
@ -1272,7 +1298,8 @@ enum bcachefs_metadata_version {
bcachefs_metadata_version_snapshot_2 = 15,
bcachefs_metadata_version_reflink_p_fix = 16,
bcachefs_metadata_version_subvol_dirent = 17,
bcachefs_metadata_version_max = 18,
bcachefs_metadata_version_inode_v2 = 18,
bcachefs_metadata_version_max = 19,
};
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)

View File

@ -113,6 +113,7 @@ static unsigned bch2_key_types_allowed[] = {
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_whiteout)|
(1U << KEY_TYPE_inode)|
(1U << KEY_TYPE_inode_v2)|
(1U << KEY_TYPE_inode_generation),
[BKEY_TYPE_dirents] =
(1U << KEY_TYPE_deleted)|
@ -128,7 +129,8 @@ static unsigned bch2_key_types_allowed[] = {
[BKEY_TYPE_alloc] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_alloc)|
(1U << KEY_TYPE_alloc_v2),
(1U << KEY_TYPE_alloc_v2)|
(1U << KEY_TYPE_alloc_v3),
[BKEY_TYPE_quotas] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_quota),

View File

@ -605,6 +605,7 @@ static inline bool btree_node_is_extents(struct btree *b)
#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \
((1U << BKEY_TYPE_alloc)| \
(1U << BKEY_TYPE_inodes)| \
(1U << BKEY_TYPE_stripes)| \
(1U << BKEY_TYPE_snapshots))
@ -652,8 +653,12 @@ enum btree_update_flags {
#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC)
#define BTREE_TRIGGER_WANTS_OLD_AND_NEW \
((1U << KEY_TYPE_stripe)| \
((1U << KEY_TYPE_alloc)| \
(1U << KEY_TYPE_alloc_v2)| \
(1U << KEY_TYPE_alloc_v3)| \
(1U << KEY_TYPE_stripe)| \
(1U << KEY_TYPE_inode)| \
(1U << KEY_TYPE_inode_v2)| \
(1U << KEY_TYPE_snapshot))
static inline bool btree_node_type_needs_gc(enum btree_node_type type)

View File

@ -362,9 +362,6 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
i->level,
i->k);
bch2_journal_set_has_inode(j, &trans->journal_res,
i->k->k.p.inode);
if (trans->journal_seq)
*trans->journal_seq = trans->journal_res.seq;
}

View File

@ -13,6 +13,7 @@
#include "buckets.h"
#include "ec.h"
#include "error.h"
#include "inode.h"
#include "movinggc.h"
#include "recovery.h"
#include "reflink.h"
@ -537,8 +538,7 @@ static int bch2_mark_alloc(struct btree_trans *trans,
struct bucket_mark old_m, m;
/* We don't do anything for deletions - do we?: */
if (new.k->type != KEY_TYPE_alloc &&
new.k->type != KEY_TYPE_alloc_v2)
if (!bkey_is_alloc(new.k))
return 0;
/*
@ -548,6 +548,15 @@ static int bch2_mark_alloc(struct btree_trans *trans,
!(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
return 0;
if (flags & BTREE_TRIGGER_INSERT) {
struct bch_alloc_v3 *v = (struct bch_alloc_v3 *) new.v;
BUG_ON(!journal_seq);
BUG_ON(new.k->type != KEY_TYPE_alloc_v3);
v->journal_seq = cpu_to_le64(journal_seq);
}
ca = bch_dev_bkey_exists(c, new.k->p.inode);
if (new.k->p.offset >= ca->mi.nbuckets)
@ -1091,12 +1100,24 @@ static int bch2_mark_inode(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct bch_fs_usage __percpu *fs_usage;
u64 journal_seq = trans->journal_res.seq;
preempt_disable();
fs_usage = fs_usage_ptr(c, trans->journal_res.seq, flags & BTREE_TRIGGER_GC);
fs_usage->nr_inodes += new.k->type == KEY_TYPE_inode;
fs_usage->nr_inodes -= old.k->type == KEY_TYPE_inode;
preempt_enable();
if (flags & BTREE_TRIGGER_INSERT) {
struct bch_inode_v2 *v = (struct bch_inode_v2 *) new.v;
BUG_ON(!journal_seq);
BUG_ON(new.k->type != KEY_TYPE_inode_v2);
v->bi_journal_seq = cpu_to_le64(journal_seq);
}
if (flags & BTREE_TRIGGER_GC) {
preempt_disable();
fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC);
fs_usage->nr_inodes += bkey_is_inode(new.k);
fs_usage->nr_inodes -= bkey_is_inode(old.k);
preempt_enable();
}
return 0;
}
@ -1215,6 +1236,7 @@ static int bch2_mark_key_locked(struct btree_trans *trans,
switch (k.k->type) {
case KEY_TYPE_alloc:
case KEY_TYPE_alloc_v2:
case KEY_TYPE_alloc_v3:
return bch2_mark_alloc(trans, old, new, flags);
case KEY_TYPE_btree_ptr:
case KEY_TYPE_btree_ptr_v2:
@ -1224,6 +1246,7 @@ static int bch2_mark_key_locked(struct btree_trans *trans,
case KEY_TYPE_stripe:
return bch2_mark_stripe(trans, old, new, flags);
case KEY_TYPE_inode:
case KEY_TYPE_inode_v2:
return bch2_mark_inode(trans, old, new, flags);
case KEY_TYPE_reservation:
return bch2_mark_reservation(trans, old, new, flags);
@ -1680,8 +1703,7 @@ static int bch2_trans_mark_inode(struct btree_trans *trans,
struct bkey_s_c new,
unsigned flags)
{
int nr = (new.k->type == KEY_TYPE_inode) -
(old.k->type == KEY_TYPE_inode);
int nr = bkey_is_inode(new.k) - bkey_is_inode(old.k);
if (nr) {
struct replicas_delta_list *d =
@ -1829,6 +1851,7 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old,
case KEY_TYPE_stripe:
return bch2_trans_mark_stripe(trans, old, new, flags);
case KEY_TYPE_inode:
case KEY_TYPE_inode_v2:
return bch2_trans_mark_inode(trans, old, new, flags);
case KEY_TYPE_reservation:
return bch2_trans_mark_reservation(trans, k, flags);

View File

@ -57,8 +57,6 @@ static void journal_seq_copy(struct bch_fs *c,
if (old >= journal_seq)
break;
} while ((v = atomic64_cmpxchg(dst_seq, old, journal_seq)) != old);
bch2_journal_set_has_inum(&c->journal, dst->v.i_ino, journal_seq);
}
static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
@ -257,8 +255,6 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
bch2_vfs_inode_init(c, inum, inode, &inode_u);
inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum.inum);
unlock_new_inode(&inode->v);
return &inode->v;
@ -1205,7 +1201,7 @@ static void bch2_vfs_inode_init(struct bch_fs *c, subvol_inum inum,
inode->v.i_size = bi->bi_size;
inode->ei_flags = 0;
inode->ei_journal_seq = 0;
inode->ei_journal_seq = bi->bi_journal_seq;
inode->ei_quota_reserved = 0;
inode->ei_qid = bch_qid(bi);
inode->ei_subvol = inum.subvol;

View File

@ -133,7 +133,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
goto err;
}
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
ret = bch2_inode_unpack(k, inode);
err:
if (ret && ret != -EINTR)
bch_err(trans->c, "error %i fetching inode %llu",
@ -157,8 +157,8 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
if (ret)
goto err;
ret = k.k->type == KEY_TYPE_inode
? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
ret = bkey_is_inode(k.k)
? bch2_inode_unpack(k, inode)
: -ENOENT;
if (!ret)
*snapshot = iter.pos.snapshot;
@ -261,7 +261,7 @@ retry:
if (ret)
goto err;
if (k.k->type != KEY_TYPE_inode) {
if (!bkey_is_inode(k.k)) {
bch2_fs_inconsistent(trans->c,
"inode %llu:%u not found when deleting",
inum, snapshot);
@ -269,7 +269,7 @@ retry:
goto err;
}
bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u);
bch2_inode_unpack(k, &inode_u);
/* Subvolume root? */
if (inode_u.bi_subvol) {
@ -581,7 +581,7 @@ static int inode_walker_realloc(struct inode_walker *w)
}
static int add_inode(struct bch_fs *c, struct inode_walker *w,
struct bkey_s_c_inode inode)
struct bkey_s_c inode)
{
struct bch_inode_unpacked u;
int ret;
@ -623,8 +623,8 @@ static int __walk_inode(struct btree_trans *trans,
if (k.k->p.offset != pos.inode)
break;
if (k.k->type == KEY_TYPE_inode)
add_inode(c, w, bkey_s_c_to_inode(k));
if (bkey_is_inode(k.k))
add_inode(c, w, k);
}
bch2_trans_iter_exit(trans, &iter);
@ -676,11 +676,11 @@ static int __get_visible_inodes(struct btree_trans *trans,
if (k.k->p.offset != inum)
break;
if (k.k->type != KEY_TYPE_inode)
if (!bkey_is_inode(k.k))
continue;
if (ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) {
add_inode(c, w, bkey_s_c_to_inode(k));
add_inode(c, w, k);
if (k.k->p.snapshot >= s->pos.snapshot)
break;
}
@ -805,7 +805,6 @@ static int check_inode(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct bkey_s_c k;
struct bkey_s_c_inode inode;
struct bch_inode_unpacked u;
bool do_update = false;
int ret;
@ -830,19 +829,17 @@ static int check_inode(struct btree_trans *trans,
if (bch2_snapshot_internal_node(c, k.k->p.snapshot))
return 0;
if (k.k->type != KEY_TYPE_inode)
if (!bkey_is_inode(k.k))
return 0;
inode = bkey_s_c_to_inode(k);
BUG_ON(bch2_inode_unpack(k, &u));
if (!full &&
!(inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY|
BCH_INODE_I_SECTORS_DIRTY|
BCH_INODE_UNLINKED)))
!(u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|
BCH_INODE_I_SECTORS_DIRTY|
BCH_INODE_UNLINKED)))
return 0;
BUG_ON(bch2_inode_unpack(inode, &u));
if (prev->bi_inum != u.bi_inum)
*prev = u;
@ -1963,10 +1960,10 @@ static int check_directory_structure(struct bch_fs *c)
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (k.k->type != KEY_TYPE_inode)
if (!bkey_is_inode(k.k))
continue;
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
ret = bch2_inode_unpack(k, &u);
if (ret) {
/* Should have been caught earlier in fsck: */
bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
@ -2070,7 +2067,6 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_s_c_inode inode;
struct bch_inode_unpacked u;
int ret = 0;
@ -2081,21 +2077,19 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
if (k.k->type != KEY_TYPE_inode)
if (!bkey_is_inode(k.k))
continue;
inode = bkey_s_c_to_inode(k);
/* Should never fail, checked by bch2_inode_invalid: */
BUG_ON(bch2_inode_unpack(k, &u));
/*
* Backpointer and directory structure checks are sufficient for
* directories, since they can't have hardlinks:
*/
if (S_ISDIR(le16_to_cpu(inode.v->bi_mode)))
if (S_ISDIR(le16_to_cpu(u.bi_mode)))
continue;
/* Should never fail, checked by bch2_inode_invalid: */
BUG_ON(bch2_inode_unpack(inode, &u));
if (!u.bi_nlink)
continue;
@ -2169,7 +2163,6 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_s_c_inode inode;
struct bch_inode_unpacked u;
struct nlink *link = links->d;
int ret = 0;
@ -2184,14 +2177,13 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
if (k.k->p.offset >= range_end)
break;
if (k.k->type != KEY_TYPE_inode)
if (!bkey_is_inode(k.k))
continue;
inode = bkey_s_c_to_inode(k);
if (S_ISDIR(le16_to_cpu(inode.v->bi_mode)))
continue;
BUG_ON(bch2_inode_unpack(k, &u));
BUG_ON(bch2_inode_unpack(inode, &u));
if (S_ISDIR(le16_to_cpu(u.bi_mode)))
continue;
if (!u.bi_nlink)
continue;

View File

@ -35,29 +35,6 @@ static const u8 bits_table[8] = {
13 * 8 - 8,
};
static int inode_encode_field(u8 *out, u8 *end, u64 hi, u64 lo)
{
__be64 in[2] = { cpu_to_be64(hi), cpu_to_be64(lo), };
unsigned shift, bytes, bits = likely(!hi)
? fls64(lo)
: fls64(hi) + 64;
for (shift = 1; shift <= 8; shift++)
if (bits < bits_table[shift - 1])
goto got_shift;
BUG();
got_shift:
bytes = byte_table[shift - 1];
BUG_ON(out + bytes > end);
memcpy(out, (u8 *) in + 16 - bytes, bytes);
*out |= (1 << 8) >> shift;
return bytes;
}
static int inode_decode_field(const u8 *in, const u8 *end,
u64 out[2], unsigned *out_bits)
{
@ -92,42 +69,11 @@ static int inode_decode_field(const u8 *in, const u8 *end,
return bytes;
}
static noinline void bch2_inode_pack_v1(struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
void bch2_inode_pack(struct bch_fs *c,
struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
struct bkey_i_inode *k = &packed->inode;
u8 *out = k->v.fields;
u8 *end = (void *) &packed[1];
u8 *last_nonzero_field = out;
unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
unsigned bytes;
#define x(_name, _bits) \
out += inode_encode_field(out, end, 0, inode->_name); \
nr_fields++; \
\
if (inode->_name) { \
last_nonzero_field = out; \
last_nonzero_fieldnr = nr_fields; \
}
BCH_INODE_FIELDS()
#undef x
out = last_nonzero_field;
nr_fields = last_nonzero_fieldnr;
bytes = out - (u8 *) &packed->inode.v;
set_bkey_val_bytes(&packed->inode.k, bytes);
memset_u64s_tail(&packed->inode.v, 0, bytes);
SET_INODE_NR_FIELDS(&k->v, nr_fields);
}
static void bch2_inode_pack_v2(struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
struct bkey_i_inode *k = &packed->inode;
struct bkey_i_inode_v2 *k = &packed->inode;
u8 *out = k->v.fields;
u8 *end = (void *) &packed[1];
u8 *last_nonzero_field = out;
@ -135,6 +81,14 @@ static void bch2_inode_pack_v2(struct bkey_inode_buf *packed,
unsigned bytes;
int ret;
bkey_inode_v2_init(&packed->inode.k_i);
packed->inode.k.p.offset = inode->bi_inum;
packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq);
packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags);
packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags);
packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode);
#define x(_name, _bits) \
nr_fields++; \
\
@ -165,30 +119,12 @@ static void bch2_inode_pack_v2(struct bkey_inode_buf *packed,
set_bkey_val_bytes(&packed->inode.k, bytes);
memset_u64s_tail(&packed->inode.v, 0, bytes);
SET_INODE_NR_FIELDS(&k->v, nr_fields);
}
void bch2_inode_pack(struct bch_fs *c,
struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
bkey_inode_init(&packed->inode.k_i);
packed->inode.k.p.offset = inode->bi_inum;
packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
packed->inode.v.bi_flags = cpu_to_le32(inode->bi_flags);
packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode);
if (c->sb.features & (1ULL << BCH_FEATURE_new_varint)) {
SET_INODE_NEW_VARINT(&packed->inode.v, true);
bch2_inode_pack_v2(packed, inode);
} else {
bch2_inode_pack_v1(packed, inode);
}
SET_INODEv2_NR_FIELDS(&k->v, nr_fields);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
struct bch_inode_unpacked unpacked;
int ret = bch2_inode_unpack(inode_i_to_s_c(&packed->inode),
int ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i),
&unpacked);
BUG_ON(ret);
BUG_ON(unpacked.bi_inum != inode->bi_inum);
@ -237,17 +173,16 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
return 0;
}
static int bch2_inode_unpack_v2(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked)
static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked,
const u8 *in, const u8 *end,
unsigned nr_fields)
{
const u8 *in = inode.v->fields;
const u8 *end = bkey_val_end(inode);
unsigned fieldnr = 0;
int ret;
u64 v[2];
#define x(_name, _bits) \
if (fieldnr < INODE_NR_FIELDS(inode.v)) { \
if (fieldnr < nr_fields) { \
ret = bch2_varint_decode_fast(in, end, &v[0]); \
if (ret < 0) \
return ret; \
@ -277,21 +212,43 @@ static int bch2_inode_unpack_v2(struct bkey_s_c_inode inode,
return 0;
}
int bch2_inode_unpack(struct bkey_s_c_inode inode,
int bch2_inode_unpack(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked)
{
unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
switch (k.k->type) {
case KEY_TYPE_inode: {
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
if (INODE_NEW_VARINT(inode.v)) {
return bch2_inode_unpack_v2(inode, unpacked);
} else {
return bch2_inode_unpack_v1(inode, unpacked);
unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
if (INODE_NEW_VARINT(inode.v)) {
return bch2_inode_unpack_v2(unpacked, inode.v->fields,
bkey_val_end(inode),
INODE_NR_FIELDS(inode.v));
} else {
return bch2_inode_unpack_v1(inode, unpacked);
}
break;
}
case KEY_TYPE_inode_v2: {
struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
return 0;
unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
return bch2_inode_unpack_v2(unpacked, inode.v->fields,
bkey_val_end(inode),
INODEv2_NR_FIELDS(inode.v));
}
default:
BUG();
}
}
int bch2_inode_peek(struct btree_trans *trans,
@ -317,11 +274,11 @@ int bch2_inode_peek(struct btree_trans *trans,
if (ret)
goto err;
ret = k.k->type == KEY_TYPE_inode ? 0 : -ENOENT;
ret = bkey_is_inode(k.k) ? 0 : -ENOENT;
if (ret)
goto err;
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
ret = bch2_inode_unpack(k, inode);
if (ret)
goto err;
@ -363,7 +320,43 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
return "invalid str hash type";
if (bch2_inode_unpack(inode, &unpacked))
if (bch2_inode_unpack(k, &unpacked))
return "invalid variable length fields";
if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1)
return "invalid data checksum type";
if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
return "invalid data checksum type";
if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
unpacked.bi_nlink != 0)
return "flagged as unlinked but bi_nlink != 0";
if (unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode))
return "subvolume root but not a directory";
return NULL;
}
const char *bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
struct bch_inode_unpacked unpacked;
if (k.k->p.inode)
return "nonzero k.p.inode";
if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
return "incorrect value size";
if (k.k->p.offset < BLOCKDEV_INODE_MAX)
return "fs inode in blockdev range";
if (INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
return "invalid str hash type";
if (bch2_inode_unpack(k, &unpacked))
return "invalid variable length fields";
if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1)
@ -384,10 +377,12 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
{
pr_buf(out, "mode %o flags %x ", inode->bi_mode, inode->bi_flags);
pr_buf(out, "mode %o flags %x journal_seq %llu",
inode->bi_mode, inode->bi_flags,
inode->bi_journal_seq);
#define x(_name, _bits) \
pr_buf(out, #_name " %llu ", (u64) inode->_name);
pr_buf(out, " "#_name " %llu", (u64) inode->_name);
BCH_INODE_FIELDS()
#undef x
}
@ -401,15 +396,14 @@ void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked
void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
struct bch_inode_unpacked unpacked;
struct bch_inode_unpacked inode;
if (bch2_inode_unpack(inode, &unpacked)) {
if (bch2_inode_unpack(k, &inode)) {
pr_buf(out, "(unpack error)");
return;
}
__bch2_inode_unpacked_to_text(out, &unpacked);
__bch2_inode_unpacked_to_text(out, &inode);
}
const char *bch2_inode_generation_invalid(const struct bch_fs *c,
@ -485,6 +479,7 @@ static inline u32 bkey_generation(struct bkey_s_c k)
{
switch (k.k->type) {
case KEY_TYPE_inode:
case KEY_TYPE_inode_v2:
BUG();
case KEY_TYPE_inode_generation:
return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
@ -542,7 +537,7 @@ again:
}
if (k.k->p.snapshot == snapshot &&
k.k->type != KEY_TYPE_inode &&
!bkey_is_inode(k.k) &&
!bch2_btree_key_cache_find(c, BTREE_ID_inodes, SPOS(0, pos, snapshot))) {
bch2_btree_iter_advance(iter);
continue;
@ -585,7 +580,7 @@ found_slot:
}
/* We may have raced while the iterator wasn't pointing at pos: */
if (k.k->type == KEY_TYPE_inode ||
if (bkey_is_inode(k.k) ||
bch2_btree_key_cache_find(c, BTREE_ID_inodes, k.k->p))
goto again;
@ -698,7 +693,7 @@ retry:
if (ret)
goto err;
if (k.k->type != KEY_TYPE_inode) {
if (!bkey_is_inode(k.k)) {
bch2_fs_inconsistent(trans.c,
"inode %llu not found when deleting",
inum.inum);
@ -706,7 +701,7 @@ retry:
goto err;
}
bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u);
bch2_inode_unpack(k, &inode_u);
/* Subvolume root? */
BUG_ON(inode_u.bi_subvol);

View File

@ -7,6 +7,7 @@
extern const char * const bch2_inode_opts[];
const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
const char *bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_inode (struct bkey_ops) { \
@ -14,6 +15,17 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
.val_to_text = bch2_inode_to_text, \
}
#define bch2_bkey_ops_inode_v2 (struct bkey_ops) { \
.key_invalid = bch2_inode_v2_invalid, \
.val_to_text = bch2_inode_to_text, \
}
static inline bool bkey_is_inode(const struct bkey *k)
{
return k->type == KEY_TYPE_inode ||
k->type == KEY_TYPE_inode_v2;
}
const char *bch2_inode_generation_invalid(const struct bch_fs *,
struct bkey_s_c);
void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *,
@ -34,6 +46,7 @@ typedef u64 u96;
struct bch_inode_unpacked {
u64 bi_inum;
u64 bi_journal_seq;
__le64 bi_hash_seed;
u32 bi_flags;
u16 bi_mode;
@ -44,7 +57,7 @@ struct bch_inode_unpacked {
};
struct bkey_inode_buf {
struct bkey_i_inode inode;
struct bkey_i_inode_v2 inode;
#define x(_name, _bits) + 8 + _bits / 8
u8 _pad[0 + BCH_INODE_FIELDS()];
@ -53,7 +66,7 @@ struct bkey_inode_buf {
void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *,
const struct bch_inode_unpacked *);
int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *);
void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);

View File

@ -269,6 +269,8 @@ int bch2_extent_update(struct btree_trans *trans,
{
/* this must live until after bch2_trans_commit(): */
struct bkey_inode_buf inode_p;
struct btree_iter inode_iter;
struct bch_inode_unpacked inode_u;
struct bpos next_pos;
bool extending = false, usage_increasing;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
@ -313,50 +315,45 @@ int bch2_extent_update(struct btree_trans *trans,
? min(k->k.p.offset << 9, new_i_size)
: 0;
ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inum,
BTREE_ITER_INTENT);
if (ret)
return ret;
/*
* XXX:
* writeback can race a bit with truncate, because truncate
* first updates the inode then truncates the pagecache. This is
* ugly, but lets us preserve the invariant that the in memory
* i_size is always >= the on disk i_size.
*
BUG_ON(new_i_size > inode_u.bi_size &&
(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY));
*/
BUG_ON(new_i_size > inode_u.bi_size && !extending);
if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
new_i_size > inode_u.bi_size)
inode_u.bi_size = new_i_size;
else
new_i_size = 0;
inode_u.bi_sectors += i_sectors_delta;
if (i_sectors_delta || new_i_size) {
struct btree_iter inode_iter;
struct bch_inode_unpacked inode_u;
bch2_inode_pack(trans->c, &inode_p, &inode_u);
ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inum,
BTREE_ITER_INTENT);
if (ret)
return ret;
inode_p.inode.k.p.snapshot = iter->snapshot;
/*
* XXX:
* writeback can race a bit with truncate, because truncate
* first updates the inode then truncates the pagecache. This is
* ugly, but lets us preserve the invariant that the in memory
* i_size is always >= the on disk i_size.
*
BUG_ON(new_i_size > inode_u.bi_size &&
(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY));
*/
BUG_ON(new_i_size > inode_u.bi_size && !extending);
if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
new_i_size > inode_u.bi_size)
inode_u.bi_size = new_i_size;
else
new_i_size = 0;
inode_u.bi_sectors += i_sectors_delta;
if (i_sectors_delta || new_i_size) {
bch2_inode_pack(trans->c, &inode_p, &inode_u);
inode_p.inode.k.p.snapshot = iter->snapshot;
ret = bch2_trans_update(trans, &inode_iter,
&inode_p.inode.k_i, 0);
}
bch2_trans_iter_exit(trans, &inode_iter);
if (ret)
return ret;
ret = bch2_trans_update(trans, &inode_iter,
&inode_p.inode.k_i, 0);
}
bch2_trans_iter_exit(trans, &inode_iter);
if (ret)
return ret;
next_pos = k->k.p;
ret = bch2_trans_update(trans, iter, k, 0) ?:

View File

@ -88,8 +88,6 @@ static void bch2_journal_buf_init(struct journal *j)
buf->must_flush = false;
buf->separate_flush = false;
memset(buf->has_inode, 0, sizeof(buf->has_inode));
memset(buf->data, 0, sizeof(*buf->data));
buf->data->seq = cpu_to_le64(journal_cur_seq(j));
buf->data->u64s = 0;
@ -335,55 +333,6 @@ static void journal_write_work(struct work_struct *work)
journal_entry_close(j);
}
/*
* Given an inode number, if that inode number has data in the journal that
* hasn't yet been flushed, return the journal sequence number that needs to be
* flushed:
*/
u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
{
size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
union journal_res_state s;
unsigned i;
u64 seq;
spin_lock(&j->lock);
seq = journal_cur_seq(j);
s = READ_ONCE(j->reservations);
i = s.idx;
while (1) {
if (test_bit(h, j->buf[i].has_inode))
goto out;
if (i == s.unwritten_idx)
break;
i = (i - 1) & JOURNAL_BUF_MASK;
seq--;
}
seq = 0;
out:
spin_unlock(&j->lock);
return seq;
}
void bch2_journal_set_has_inum(struct journal *j, u64 inode, u64 seq)
{
size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
struct journal_buf *buf;
spin_lock(&j->lock);
if ((buf = journal_seq_to_buf(j, seq)))
set_bit(h, buf->has_inode);
spin_unlock(&j->lock);
}
static int __journal_res_get(struct journal *j, struct journal_res *res,
unsigned flags)
{

View File

@ -141,7 +141,6 @@ static inline u64 journal_cur_seq(struct journal *j)
return j->pin.back - 1;
}
u64 bch2_inode_journal_seq(struct journal *, u64);
void bch2_journal_set_has_inum(struct journal *, u64, u64);
static inline int journal_state_count(union journal_res_state s, int idx)
@ -163,18 +162,6 @@ static inline void journal_state_inc(union journal_res_state *s)
s->buf3_count += s->idx == 3;
}
static inline void bch2_journal_set_has_inode(struct journal *j,
struct journal_res *res,
u64 inum)
{
struct journal_buf *buf = &j->buf[res->idx];
unsigned long bit = hash_64(inum, ilog2(sizeof(buf->has_inode) * 8));
/* avoid atomic op if possible */
if (unlikely(!test_bit(bit, buf->has_inode)))
set_bit(bit, buf->has_inode);
}
/*
* Amount of space that will be taken up by some keys in the journal (i.e.
* including the jset header)

View File

@ -34,8 +34,6 @@ struct journal_buf {
bool noflush; /* write has already been kicked off, and was noflush */
bool must_flush; /* something wants a flush */
bool separate_flush;
/* bloom filter: */
unsigned long has_inode[1024 / sizeof(unsigned long)];
};
/*

View File

@ -623,11 +623,11 @@ static int lookup_inode(struct btree_trans *trans, struct bpos pos,
goto err;
}
ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
ret = bkey_is_inode(k.k) ? 0 : -EIO;
if (ret)
goto err;
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
ret = bch2_inode_unpack(k, inode);
if (ret)
goto err;
err:

View File

@ -439,9 +439,8 @@ int bch2_fs_quota_read(struct bch_fs *c)
for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
switch (k.k->type) {
case KEY_TYPE_inode:
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
if (bkey_is_inode(k.k)) {
ret = bch2_inode_unpack(k, &u);
if (ret)
return ret;

View File

@ -1015,13 +1015,13 @@ static int bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
if (ret)
goto err;
if (k.k->type != KEY_TYPE_inode) {
if (!bkey_is_inode(k.k)) {
bch_err(c, "root inode not found");
ret = -ENOENT;
goto err;
}
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &inode);
ret = bch2_inode_unpack(k, &inode);
BUG_ON(ret);
inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
@ -1093,6 +1093,9 @@ int bch2_fs_recovery(struct bch_fs *c)
bch_info(c, "filesystem version is prior to subvol_dirent - upgrading");
c->opts.version_upgrade = true;
c->opts.fsck = true;
} else if (c->sb.version < bcachefs_metadata_version_inode_v2) {
bch_info(c, "filesystem version is prior to inode_v2 - upgrading");
c->opts.version_upgrade = true;
}
}