Update bcachefs sources to 600598598b bcachefs: Also log overwrites in journal

This commit is contained in:
Kent Overstreet 2022-06-05 17:42:24 -04:00
parent 46b2c553aa
commit f682263a73
11 changed files with 225 additions and 104 deletions

View File

@ -1 +1 @@
fad6d13aa55f96e01cc6ff516cdfea53b2fc9eb1
600598598b7c6d2069a374a14ad4925f39a30faa

View File

@ -1790,7 +1790,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
x(data_usage, 6) \
x(clock, 7) \
x(dev_usage, 8) \
x(log, 9)
x(log, 9) \
x(overwrite, 10)
enum {
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,

View File

@ -1850,8 +1850,9 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
trans_for_each_update(trans, i) {
struct bkey_s_c old = { &i->old_k, i->old_v };
pr_buf(buf, "update: btree %s %pS",
pr_buf(buf, "update: btree=%s cached=%u %pS",
bch2_btree_ids[i->btree_id],
i->cached,
(void *) i->ip_allocated);
pr_newline(buf);
@ -2237,16 +2238,22 @@ static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
struct bpos pos)
{
struct btree_insert_entry *i;
struct bkey_i *ret = NULL;
trans_for_each_update(trans, i)
if ((cmp_int(btree_id, i->btree_id) ?:
bpos_cmp(pos, i->k->k.p)) <= 0) {
if (btree_id == i->btree_id)
return i->k;
trans_for_each_update(trans, i) {
if (i->btree_id < btree_id)
continue;
if (i->btree_id > btree_id)
break;
}
if (bpos_cmp(i->k->k.p, pos) < 0)
continue;
if (i->key_cache_already_flushed)
continue;
if (!ret || bpos_cmp(i->k->k.p, ret->k.p) < 0)
ret = i->k;
}
return NULL;
return ret;
}
struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,

View File

@ -562,6 +562,16 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
return true;
}
void bch2_btree_key_cache_drop(struct btree_trans *trans,
struct btree_path *path)
{
struct bkey_cached *ck = (void *) path->l[0].b;
ck->valid = false;
BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
}
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
struct shrink_control *sc)
{

View File

@ -32,6 +32,8 @@ bool bch2_btree_insert_key_cached(struct btree_trans *,
struct btree_path *, struct bkey_i *);
int bch2_btree_key_cache_flush(struct btree_trans *,
enum btree_id, struct bpos);
void bch2_btree_key_cache_drop(struct btree_trans *,
struct btree_path *);
void bch2_fs_btree_key_cache_exit(struct btree_key_cache *);
void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *);

View File

@ -348,6 +348,7 @@ struct btree_insert_entry {
bool cached:1;
bool insert_trigger_run:1;
bool overwrite_trigger_run:1;
bool key_cache_already_flushed:1;
/*
* @old_k may be a key from the journal; @old_btree_u64s always refers
* to the size of the key being overwritten in the btree:

View File

@ -31,6 +31,7 @@ static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
const struct btree_insert_entry *r)
{
return cmp_int(l->btree_id, r->btree_id) ?:
cmp_int(l->cached, r->cached) ?:
-cmp_int(l->level, r->level) ?:
bpos_cmp(l->k->k.p, r->k->k.p);
}
@ -308,25 +309,15 @@ static inline int bch2_trans_journal_res_get(struct btree_trans *trans,
static void journal_transaction_name(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct jset_entry *entry = journal_res_entry(&c->journal, &trans->journal_res);
struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry);
unsigned u64s = JSET_ENTRY_LOG_U64s - 1;
unsigned b, buflen = u64s * sizeof(u64);
struct journal *j = &c->journal;
struct jset_entry *entry =
bch2_journal_add_entry(j, &trans->journal_res,
BCH_JSET_ENTRY_log, 0, 0,
JSET_ENTRY_LOG_U64s);
struct jset_entry_log *l =
container_of(entry, struct jset_entry_log, entry);
l->entry.u64s = cpu_to_le16(u64s);
l->entry.btree_id = 0;
l->entry.level = 0;
l->entry.type = BCH_JSET_ENTRY_log;
l->entry.pad[0] = 0;
l->entry.pad[1] = 0;
l->entry.pad[2] = 0;
b = min_t(unsigned, strlen(trans->fn), buflen);
memcpy(l->d, trans->fn, b);
while (b < buflen)
l->d[b++] = '\0';
trans->journal_res.offset += JSET_ENTRY_LOG_U64s;
trans->journal_res.u64s -= JSET_ENTRY_LOG_U64s;
strncpy(l->d, trans->fn, JSET_ENTRY_LOG_U64s * sizeof(u64));
}
static inline enum btree_insert_ret
@ -393,33 +384,6 @@ btree_key_can_insert_cached(struct btree_trans *trans,
return -EINTR;
}
static inline void do_btree_insert_one(struct btree_trans *trans,
struct btree_insert_entry *i)
{
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
EBUG_ON(trans->journal_res.ref !=
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
i->k->k.needs_whiteout = false;
if (!i->cached)
btree_insert_key_leaf(trans, i);
else
bch2_btree_insert_key_cached(trans, i->path, i->k);
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
bch2_journal_add_keys(j, &trans->journal_res,
i->btree_id,
i->level,
i->k);
if (trans->journal_seq)
*trans->journal_seq = trans->journal_res.seq;
}
}
/* Triggers: */
static int run_one_mem_trigger(struct btree_trans *trans,
@ -729,8 +693,42 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
return ret;
}
trans_for_each_update(trans, i)
do_btree_insert_one(trans, i);
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
trans_for_each_update(trans, i) {
struct journal *j = &c->journal;
struct jset_entry *entry;
if (i->key_cache_already_flushed)
continue;
entry = bch2_journal_add_entry(j, &trans->journal_res,
BCH_JSET_ENTRY_overwrite,
i->btree_id, i->level,
i->old_k.u64s);
bkey_reassemble(&entry->start[0],
(struct bkey_s_c) { &i->old_k, i->old_v });
entry = bch2_journal_add_entry(j, &trans->journal_res,
BCH_JSET_ENTRY_btree_keys,
i->btree_id, i->level,
i->k->k.u64s);
bkey_copy(&entry->start[0], i->k);
}
if (trans->journal_seq)
*trans->journal_seq = trans->journal_res.seq;
}
trans_for_each_update(trans, i) {
i->k->k.needs_whiteout = false;
if (!i->cached)
btree_insert_key_leaf(trans, i);
else if (!i->key_cache_already_flushed)
bch2_btree_insert_key_cached(trans, i->path, i->k);
else
bch2_btree_key_cache_drop(trans, i->path);
}
return ret;
}
@ -1118,7 +1116,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
trans->journal_preres_u64s = 0;
/* For journalling transaction name: */
trans->journal_u64s += JSET_ENTRY_LOG_U64s;
trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
trans_for_each_update(trans, i) {
BUG_ON(!i->path->should_be_locked);
@ -1132,11 +1130,18 @@ int __bch2_trans_commit(struct btree_trans *trans)
BUG_ON(!btree_node_intent_locked(i->path, i->level));
if (i->key_cache_already_flushed)
continue;
/* we're going to journal the key being updated: */
u64s = jset_u64s(i->k->k.u64s);
if (i->cached &&
likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)))
trans->journal_preres_u64s += u64s;
trans->journal_u64s += u64s;
/* and we're also going to log the overwrite: */
trans->journal_u64s += jset_u64s(i->old_k.u64s);
}
if (trans->extra_journal_res) {
@ -1473,11 +1478,13 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans,
}
static int __must_check
bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path,
struct bkey_i *k, enum btree_update_flags flags)
bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path,
struct bkey_i *k, enum btree_update_flags flags,
unsigned long ip)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i, n;
int ret = 0;
BUG_ON(!path->should_be_locked);
@ -1492,7 +1499,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path,
.cached = path->cached,
.path = path,
.k = k,
.ip_allocated = _RET_IP_,
.ip_allocated = ip,
};
#ifdef CONFIG_BCACHEFS_DEBUG
@ -1537,8 +1544,43 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path,
}
}
__btree_path_get(n.path, true);
return 0;
__btree_path_get(i->path, true);
/*
* If a key is present in the key cache, it must also exist in the
* btree - this is necessary for cache coherency. When iterating over
* a btree that's cached in the key cache, the btree iter code checks
* the key cache - but the key has to exist in the btree for that to
* work:
*/
if (path->cached &&
bkey_deleted(&i->old_k)) {
struct btree_path *btree_path;
i->key_cache_already_flushed = true;
i->flags |= BTREE_TRIGGER_NORUN;
btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
BTREE_ITER_INTENT, _THIS_IP_);
ret = bch2_btree_path_traverse(trans, btree_path, 0);
if (ret)
goto err;
btree_path->should_be_locked = true;
ret = bch2_trans_update_by_path_trace(trans, btree_path, k, flags, ip);
err:
bch2_path_put(trans, btree_path, true);
}
return ret;
}
static int __must_check
bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path,
struct bkey_i *k, enum btree_update_flags flags)
{
return bch2_trans_update_by_path_trace(trans, path, k, flags, _RET_IP_);
}
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
@ -1562,6 +1604,9 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
k->k.type = KEY_TYPE_whiteout;
}
/*
* Ensure that updates to cached btrees go to the key cache:
*/
if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
!path->cached &&
!path->level &&

View File

@ -114,15 +114,41 @@ static inline int do_encrypt(struct crypto_sync_skcipher *tfm,
struct nonce nonce,
void *buf, size_t len)
{
struct scatterlist sg;
if (!is_vmalloc_addr(buf)) {
struct scatterlist sg;
sg_init_table(&sg, 1);
sg_set_page(&sg,
is_vmalloc_addr(buf)
? vmalloc_to_page(buf)
: virt_to_page(buf),
len, offset_in_page(buf));
return do_encrypt_sg(tfm, nonce, &sg, len);
sg_init_table(&sg, 1);
sg_set_page(&sg,
is_vmalloc_addr(buf)
? vmalloc_to_page(buf)
: virt_to_page(buf),
len, offset_in_page(buf));
return do_encrypt_sg(tfm, nonce, &sg, len);
} else {
unsigned pages = buf_pages(buf, len);
struct scatterlist *sg;
size_t orig_len = len;
int ret, i;
sg = kmalloc_array(sizeof(*sg), pages, GFP_KERNEL);
if (!sg)
return -ENOMEM;
sg_init_table(sg, pages);
for (i = 0; i < pages; i++) {
unsigned offset = offset_in_page(buf);
unsigned pg_len = min(len, PAGE_SIZE - offset);
sg_set_page(sg + i, vmalloc_to_page(buf), pg_len, offset);
buf += pg_len;
len -= pg_len;
}
ret = do_encrypt_sg(tfm, nonce, sg, orig_len);
kfree(sg);
return ret;
}
}
int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,

View File

@ -199,9 +199,9 @@ journal_res_entry(struct journal *j, struct journal_res *res)
return vstruct_idx(j->buf[res->idx].data, res->offset);
}
static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type,
static inline unsigned journal_entry_init(struct jset_entry *entry, unsigned type,
enum btree_id id, unsigned level,
const void *data, unsigned u64s)
unsigned u64s)
{
entry->u64s = cpu_to_le16(u64s);
entry->btree_id = id;
@ -210,32 +210,33 @@ static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type
entry->pad[0] = 0;
entry->pad[1] = 0;
entry->pad[2] = 0;
memcpy_u64s_small(entry->_data, data, u64s);
return jset_u64s(u64s);
}
static inline void bch2_journal_add_entry(struct journal *j, struct journal_res *res,
unsigned type, enum btree_id id,
unsigned level,
static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type,
enum btree_id id, unsigned level,
const void *data, unsigned u64s)
{
unsigned actual = journal_entry_set(journal_res_entry(j, res),
type, id, level, data, u64s);
unsigned ret = journal_entry_init(entry, type, id, level, u64s);
memcpy_u64s_small(entry->_data, data, u64s);
return ret;
}
static inline struct jset_entry *
bch2_journal_add_entry(struct journal *j, struct journal_res *res,
unsigned type, enum btree_id id,
unsigned level, unsigned u64s)
{
struct jset_entry *entry = journal_res_entry(j, res);
unsigned actual = journal_entry_init(entry, type, id, level, u64s);
EBUG_ON(!res->ref);
EBUG_ON(actual > res->u64s);
res->offset += actual;
res->u64s -= actual;
}
static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *res,
enum btree_id id, unsigned level,
const struct bkey_i *k)
{
bch2_journal_add_entry(j, res, BCH_JSET_ENTRY_btree_keys,
id, level, k, k->k.u64s);
return entry;
}
static inline bool journal_entry_empty(struct jset *j)
@ -283,7 +284,7 @@ static inline void bch2_journal_res_put(struct journal *j,
while (res->u64s)
bch2_journal_add_entry(j, res,
BCH_JSET_ENTRY_btree_keys,
0, 0, NULL, 0);
0, 0, 0);
bch2_journal_buf_put(j, res->idx);

View File

@ -213,7 +213,7 @@ static void journal_entry_null_range(void *start, void *end)
static int journal_validate_key(struct bch_fs *c, const char *where,
struct jset_entry *entry,
unsigned level, enum btree_id btree_id,
struct bkey_i *k, const char *type,
struct bkey_i *k,
unsigned version, int big_endian, int write)
{
void *next = vstruct_next(entry);
@ -221,8 +221,8 @@ static int journal_validate_key(struct bch_fs *c, const char *where,
int ret = 0;
if (journal_entry_err_on(!k->k.u64s, c,
"invalid %s in %s entry offset %zi/%u: k->u64s 0",
type, where,
"invalid key in %s at %s offset %zi/%u: k->u64s 0",
bch2_jset_entry_types[entry->type], where,
(u64 *) k - entry->_data,
le16_to_cpu(entry->u64s))) {
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
@ -232,8 +232,8 @@ static int journal_validate_key(struct bch_fs *c, const char *where,
if (journal_entry_err_on((void *) bkey_next(k) >
(void *) vstruct_next(entry), c,
"invalid %s in %s entry offset %zi/%u: extends past end of journal entry",
type, where,
"invalid key in %s at %s offset %zi/%u: extends past end of journal entry",
bch2_jset_entry_types[entry->type], where,
(u64 *) k - entry->_data,
le16_to_cpu(entry->u64s))) {
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
@ -242,8 +242,8 @@ static int journal_validate_key(struct bch_fs *c, const char *where,
}
if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
"invalid %s in %s entry offset %zi/%u: bad format %u",
type, where,
"invalid key in %s at %s offset %zi/%u: bad format %u",
bch2_jset_entry_types[entry->type], where,
(u64 *) k - entry->_data,
le16_to_cpu(entry->u64s),
k->k.format)) {
@ -260,8 +260,8 @@ static int journal_validate_key(struct bch_fs *c, const char *where,
if (bch2_bkey_invalid(c, bkey_i_to_s_c(k),
__btree_node_type(level, btree_id), write, &buf)) {
printbuf_reset(&buf);
pr_buf(&buf, "invalid %s in %s entry offset %zi/%u:",
type, where,
pr_buf(&buf, "invalid key in %s at %s offset %zi/%u:",
bch2_jset_entry_types[entry->type], where,
(u64 *) k - entry->_data,
le16_to_cpu(entry->u64s));
pr_newline(&buf);
@ -301,7 +301,7 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c,
int ret = journal_validate_key(c, where, entry,
entry->level,
entry->btree_id,
k, "key", version, big_endian, write);
k, version, big_endian, write);
if (ret == FSCK_DELETED_KEY)
continue;
@ -351,7 +351,7 @@ static int journal_entry_btree_root_validate(struct bch_fs *c,
}
return journal_validate_key(c, where, entry, 1, entry->btree_id, k,
"btree root", version, big_endian, write);
version, big_endian, write);
fsck_err:
return ret;
}
@ -613,6 +613,19 @@ static void journal_entry_log_to_text(struct printbuf *out, struct bch_fs *c,
pr_buf(out, "%.*s", bytes, l->d);
}
static int journal_entry_overwrite_validate(struct bch_fs *c, const char *where,
struct jset_entry *entry,
unsigned version, int big_endian, int write)
{
return journal_entry_btree_keys_validate(c, where, entry, version, big_endian, write);
}
static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs *c,
struct jset_entry *entry)
{
journal_entry_btree_keys_to_text(out, c, entry);
}
struct jset_entry_ops {
int (*validate)(struct bch_fs *, const char *,
struct jset_entry *, unsigned, int, int);

View File

@ -267,20 +267,31 @@ int bch2_opt_parse(struct bch_fs *c,
switch (opt->type) {
case BCH_OPT_BOOL:
ret = kstrtou64(val, 10, res);
if (ret < 0)
if (ret < 0 || (*res != 0 && *res != 1)) {
pr_buf(err, "%s: must be bool",
opt->attr.name);
return ret;
}
break;
case BCH_OPT_UINT:
ret = opt->flags & OPT_HUMAN_READABLE
? bch2_strtou64_h(val, res)
: kstrtou64(val, 10, res);
if (ret < 0)
if (ret < 0) {
if (err)
pr_buf(err, "%s: must be a number",
opt->attr.name);
return ret;
}
break;
case BCH_OPT_STR:
ret = match_string(opt->choices, -1, val);
if (ret < 0)
if (ret < 0) {
if (err)
pr_buf(err, "%s: invalid selection",
opt->attr.name);
return ret;
}
*res = ret;
break;
@ -289,8 +300,12 @@ int bch2_opt_parse(struct bch_fs *c,
return 0;
ret = opt->parse(c, val, res);
if (ret < 0)
if (ret < 0) {
if (err)
pr_buf(err, "%s: parse error",
opt->attr.name);
return ret;
}
}
return bch2_opt_validate(opt, *res, err);