diff --git a/.bcachefs_revision b/.bcachefs_revision index e94e8e76..5eb25511 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -fad6d13aa55f96e01cc6ff516cdfea53b2fc9eb1 +600598598b7c6d2069a374a14ad4925f39a30faa diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 08ae6a36..5af9f2e7 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1790,7 +1790,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb) x(data_usage, 6) \ x(clock, 7) \ x(dev_usage, 8) \ - x(log, 9) + x(log, 9) \ + x(overwrite, 10) enum { #define x(f, nr) BCH_JSET_ENTRY_##f = nr, diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 05156258..761b09bf 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1850,8 +1850,9 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) trans_for_each_update(trans, i) { struct bkey_s_c old = { &i->old_k, i->old_v }; - pr_buf(buf, "update: btree %s %pS", + pr_buf(buf, "update: btree=%s cached=%u %pS", bch2_btree_ids[i->btree_id], + i->cached, (void *) i->ip_allocated); pr_newline(buf); @@ -2237,16 +2238,22 @@ static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans, struct bpos pos) { struct btree_insert_entry *i; + struct bkey_i *ret = NULL; - trans_for_each_update(trans, i) - if ((cmp_int(btree_id, i->btree_id) ?: - bpos_cmp(pos, i->k->k.p)) <= 0) { - if (btree_id == i->btree_id) - return i->k; + trans_for_each_update(trans, i) { + if (i->btree_id < btree_id) + continue; + if (i->btree_id > btree_id) break; - } + if (bpos_cmp(i->k->k.p, pos) < 0) + continue; + if (i->key_cache_already_flushed) + continue; + if (!ret || bpos_cmp(i->k->k.p, ret->k.p) < 0) + ret = i->k; + } - return NULL; + return ret; } struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index a575189f..5785efc8 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -562,6 +562,16 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, return true; } +void bch2_btree_key_cache_drop(struct btree_trans *trans, + struct btree_path *path) +{ + struct bkey_cached *ck = (void *) path->l[0].b; + + ck->valid = false; + + BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); +} + static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { diff --git a/libbcachefs/btree_key_cache.h b/libbcachefs/btree_key_cache.h index fd29c14c..670746e7 100644 --- a/libbcachefs/btree_key_cache.h +++ b/libbcachefs/btree_key_cache.h @@ -32,6 +32,8 @@ bool bch2_btree_insert_key_cached(struct btree_trans *, struct btree_path *, struct bkey_i *); int bch2_btree_key_cache_flush(struct btree_trans *, enum btree_id, struct bpos); +void bch2_btree_key_cache_drop(struct btree_trans *, + struct btree_path *); void bch2_fs_btree_key_cache_exit(struct btree_key_cache *); void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *); diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index a1c04419..d8f92cc9 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -348,6 +348,7 @@ struct btree_insert_entry { bool cached:1; bool insert_trigger_run:1; bool overwrite_trigger_run:1; + bool key_cache_already_flushed:1; /* * @old_k may be a key from the journal; @old_btree_u64s always refers * to the size of the key being overwritten in the btree: diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 7a26c793..5f7e6ad6 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -31,6 +31,7 @@ static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, const struct btree_insert_entry *r) { return cmp_int(l->btree_id, r->btree_id) ?: + cmp_int(l->cached, r->cached) ?: -cmp_int(l->level, r->level) ?: bpos_cmp(l->k->k.p, r->k->k.p); } @@ -308,25 +309,15 @@ static inline int bch2_trans_journal_res_get(struct btree_trans *trans, static void journal_transaction_name(struct btree_trans *trans) { struct bch_fs *c = trans->c; - struct jset_entry *entry = journal_res_entry(&c->journal, &trans->journal_res); - struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry); - unsigned u64s = JSET_ENTRY_LOG_U64s - 1; - unsigned b, buflen = u64s * sizeof(u64); + struct journal *j = &c->journal; + struct jset_entry *entry = + bch2_journal_add_entry(j, &trans->journal_res, + BCH_JSET_ENTRY_log, 0, 0, + JSET_ENTRY_LOG_U64s); + struct jset_entry_log *l = + container_of(entry, struct jset_entry_log, entry); - l->entry.u64s = cpu_to_le16(u64s); - l->entry.btree_id = 0; - l->entry.level = 0; - l->entry.type = BCH_JSET_ENTRY_log; - l->entry.pad[0] = 0; - l->entry.pad[1] = 0; - l->entry.pad[2] = 0; - b = min_t(unsigned, strlen(trans->fn), buflen); - memcpy(l->d, trans->fn, b); - while (b < buflen) - l->d[b++] = '\0'; - - trans->journal_res.offset += JSET_ENTRY_LOG_U64s; - trans->journal_res.u64s -= JSET_ENTRY_LOG_U64s; + strncpy(l->d, trans->fn, JSET_ENTRY_LOG_U64s * sizeof(u64)); } static inline enum btree_insert_ret @@ -393,33 +384,6 @@ btree_key_can_insert_cached(struct btree_trans *trans, return -EINTR; } -static inline void do_btree_insert_one(struct btree_trans *trans, - struct btree_insert_entry *i) -{ - struct bch_fs *c = trans->c; - struct journal *j = &c->journal; - - EBUG_ON(trans->journal_res.ref != - !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)); - - i->k->k.needs_whiteout = false; - - if (!i->cached) - btree_insert_key_leaf(trans, i); - else - bch2_btree_insert_key_cached(trans, i->path, i->k); - - if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) { - bch2_journal_add_keys(j, &trans->journal_res, - i->btree_id, - i->level, - i->k); - - if (trans->journal_seq) - *trans->journal_seq = trans->journal_res.seq; - } -} - /* Triggers: */ static int run_one_mem_trigger(struct btree_trans *trans, @@ -729,8 +693,42 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, return ret; } - trans_for_each_update(trans, i) - do_btree_insert_one(trans, i); + if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) { + trans_for_each_update(trans, i) { + struct journal *j = &c->journal; + struct jset_entry *entry; + + if (i->key_cache_already_flushed) + continue; + + entry = bch2_journal_add_entry(j, &trans->journal_res, + BCH_JSET_ENTRY_overwrite, + i->btree_id, i->level, + i->old_k.u64s); + bkey_reassemble(&entry->start[0], + (struct bkey_s_c) { &i->old_k, i->old_v }); + + entry = bch2_journal_add_entry(j, &trans->journal_res, + BCH_JSET_ENTRY_btree_keys, + i->btree_id, i->level, + i->k->k.u64s); + bkey_copy(&entry->start[0], i->k); + } + + if (trans->journal_seq) + *trans->journal_seq = trans->journal_res.seq; + } + + trans_for_each_update(trans, i) { + i->k->k.needs_whiteout = false; + + if (!i->cached) + btree_insert_key_leaf(trans, i); + else if (!i->key_cache_already_flushed) + bch2_btree_insert_key_cached(trans, i->path, i->k); + else + bch2_btree_key_cache_drop(trans, i->path); + } return ret; } @@ -1118,7 +1116,7 @@ int __bch2_trans_commit(struct btree_trans *trans) trans->journal_preres_u64s = 0; /* For journalling transaction name: */ - trans->journal_u64s += JSET_ENTRY_LOG_U64s; + trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); trans_for_each_update(trans, i) { BUG_ON(!i->path->should_be_locked); @@ -1132,11 +1130,18 @@ int __bch2_trans_commit(struct btree_trans *trans) BUG_ON(!btree_node_intent_locked(i->path, i->level)); + if (i->key_cache_already_flushed) + continue; + + /* we're going to journal the key being updated: */ u64s = jset_u64s(i->k->k.u64s); if (i->cached && likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) trans->journal_preres_u64s += u64s; trans->journal_u64s += u64s; + + /* and we're also going to log the overwrite: */ + trans->journal_u64s += jset_u64s(i->old_k.u64s); } if (trans->extra_journal_res) { @@ -1473,11 +1478,13 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans, } static int __must_check -bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, - struct bkey_i *k, enum btree_update_flags flags) +bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path, + struct bkey_i *k, enum btree_update_flags flags, + unsigned long ip) { struct bch_fs *c = trans->c; struct btree_insert_entry *i, n; + int ret = 0; BUG_ON(!path->should_be_locked); @@ -1492,7 +1499,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, .cached = path->cached, .path = path, .k = k, - .ip_allocated = _RET_IP_, + .ip_allocated = ip, }; #ifdef CONFIG_BCACHEFS_DEBUG @@ -1537,8 +1544,43 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, } } - __btree_path_get(n.path, true); - return 0; + __btree_path_get(i->path, true); + + /* + * If a key is present in the key cache, it must also exist in the + * btree - this is necessary for cache coherency. When iterating over + * a btree that's cached in the key cache, the btree iter code checks + * the key cache - but the key has to exist in the btree for that to + * work: + */ + if (path->cached && + bkey_deleted(&i->old_k)) { + struct btree_path *btree_path; + + i->key_cache_already_flushed = true; + i->flags |= BTREE_TRIGGER_NORUN; + + btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0, + BTREE_ITER_INTENT, _THIS_IP_); + + ret = bch2_btree_path_traverse(trans, btree_path, 0); + if (ret) + goto err; + + btree_path->should_be_locked = true; + ret = bch2_trans_update_by_path_trace(trans, btree_path, k, flags, ip); +err: + bch2_path_put(trans, btree_path, true); + } + + return ret; +} + +static int __must_check +bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, + struct bkey_i *k, enum btree_update_flags flags) +{ + return bch2_trans_update_by_path_trace(trans, path, k, flags, _RET_IP_); } int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, @@ -1562,6 +1604,9 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter k->k.type = KEY_TYPE_whiteout; } + /* + * Ensure that updates to cached btrees go to the key cache: + */ if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && !path->cached && !path->level && diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c index 317efd04..e9a444f7 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/checksum.c @@ -114,15 +114,41 @@ static inline int do_encrypt(struct crypto_sync_skcipher *tfm, struct nonce nonce, void *buf, size_t len) { - struct scatterlist sg; + if (!is_vmalloc_addr(buf)) { + struct scatterlist sg; - sg_init_table(&sg, 1); - sg_set_page(&sg, - is_vmalloc_addr(buf) - ? vmalloc_to_page(buf) - : virt_to_page(buf), - len, offset_in_page(buf)); - return do_encrypt_sg(tfm, nonce, &sg, len); + sg_init_table(&sg, 1); + sg_set_page(&sg, + is_vmalloc_addr(buf) + ? vmalloc_to_page(buf) + : virt_to_page(buf), + len, offset_in_page(buf)); + return do_encrypt_sg(tfm, nonce, &sg, len); + } else { + unsigned pages = buf_pages(buf, len); + struct scatterlist *sg; + size_t orig_len = len; + int ret, i; + + sg = kmalloc_array(sizeof(*sg), pages, GFP_KERNEL); + if (!sg) + return -ENOMEM; + + sg_init_table(sg, pages); + + for (i = 0; i < pages; i++) { + unsigned offset = offset_in_page(buf); + unsigned pg_len = min(len, PAGE_SIZE - offset); + + sg_set_page(sg + i, vmalloc_to_page(buf), pg_len, offset); + buf += pg_len; + len -= pg_len; + } + + ret = do_encrypt_sg(tfm, nonce, sg, orig_len); + kfree(sg); + return ret; + } } int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index 59453dcf..d3caa7ea 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -199,9 +199,9 @@ journal_res_entry(struct journal *j, struct journal_res *res) return vstruct_idx(j->buf[res->idx].data, res->offset); } -static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type, +static inline unsigned journal_entry_init(struct jset_entry *entry, unsigned type, enum btree_id id, unsigned level, - const void *data, unsigned u64s) + unsigned u64s) { entry->u64s = cpu_to_le16(u64s); entry->btree_id = id; @@ -210,32 +210,33 @@ static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type entry->pad[0] = 0; entry->pad[1] = 0; entry->pad[2] = 0; - memcpy_u64s_small(entry->_data, data, u64s); - return jset_u64s(u64s); } -static inline void bch2_journal_add_entry(struct journal *j, struct journal_res *res, - unsigned type, enum btree_id id, - unsigned level, +static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type, + enum btree_id id, unsigned level, const void *data, unsigned u64s) { - unsigned actual = journal_entry_set(journal_res_entry(j, res), - type, id, level, data, u64s); + unsigned ret = journal_entry_init(entry, type, id, level, u64s); + + memcpy_u64s_small(entry->_data, data, u64s); + return ret; +} + +static inline struct jset_entry * +bch2_journal_add_entry(struct journal *j, struct journal_res *res, + unsigned type, enum btree_id id, + unsigned level, unsigned u64s) +{ + struct jset_entry *entry = journal_res_entry(j, res); + unsigned actual = journal_entry_init(entry, type, id, level, u64s); EBUG_ON(!res->ref); EBUG_ON(actual > res->u64s); res->offset += actual; res->u64s -= actual; -} - -static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *res, - enum btree_id id, unsigned level, - const struct bkey_i *k) -{ - bch2_journal_add_entry(j, res, BCH_JSET_ENTRY_btree_keys, - id, level, k, k->k.u64s); + return entry; } static inline bool journal_entry_empty(struct jset *j) @@ -283,7 +284,7 @@ static inline void bch2_journal_res_put(struct journal *j, while (res->u64s) bch2_journal_add_entry(j, res, BCH_JSET_ENTRY_btree_keys, - 0, 0, NULL, 0); + 0, 0, 0); bch2_journal_buf_put(j, res->idx); diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index d59a49a5..a1df3856 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -213,7 +213,7 @@ static void journal_entry_null_range(void *start, void *end) static int journal_validate_key(struct bch_fs *c, const char *where, struct jset_entry *entry, unsigned level, enum btree_id btree_id, - struct bkey_i *k, const char *type, + struct bkey_i *k, unsigned version, int big_endian, int write) { void *next = vstruct_next(entry); @@ -221,8 +221,8 @@ static int journal_validate_key(struct bch_fs *c, const char *where, int ret = 0; if (journal_entry_err_on(!k->k.u64s, c, - "invalid %s in %s entry offset %zi/%u: k->u64s 0", - type, where, + "invalid key in %s at %s offset %zi/%u: k->u64s 0", + bch2_jset_entry_types[entry->type], where, (u64 *) k - entry->_data, le16_to_cpu(entry->u64s))) { entry->u64s = cpu_to_le16((u64 *) k - entry->_data); @@ -232,8 +232,8 @@ static int journal_validate_key(struct bch_fs *c, const char *where, if (journal_entry_err_on((void *) bkey_next(k) > (void *) vstruct_next(entry), c, - "invalid %s in %s entry offset %zi/%u: extends past end of journal entry", - type, where, + "invalid key in %s at %s offset %zi/%u: extends past end of journal entry", + bch2_jset_entry_types[entry->type], where, (u64 *) k - entry->_data, le16_to_cpu(entry->u64s))) { entry->u64s = cpu_to_le16((u64 *) k - entry->_data); @@ -242,8 +242,8 @@ static int journal_validate_key(struct bch_fs *c, const char *where, } if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c, - "invalid %s in %s entry offset %zi/%u: bad format %u", - type, where, + "invalid key in %s at %s offset %zi/%u: bad format %u", + bch2_jset_entry_types[entry->type], where, (u64 *) k - entry->_data, le16_to_cpu(entry->u64s), k->k.format)) { @@ -260,8 +260,8 @@ static int journal_validate_key(struct bch_fs *c, const char *where, if (bch2_bkey_invalid(c, bkey_i_to_s_c(k), __btree_node_type(level, btree_id), write, &buf)) { printbuf_reset(&buf); - pr_buf(&buf, "invalid %s in %s entry offset %zi/%u:", - type, where, + pr_buf(&buf, "invalid key in %s at %s offset %zi/%u:", + bch2_jset_entry_types[entry->type], where, (u64 *) k - entry->_data, le16_to_cpu(entry->u64s)); pr_newline(&buf); @@ -301,7 +301,7 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c, int ret = journal_validate_key(c, where, entry, entry->level, entry->btree_id, - k, "key", version, big_endian, write); + k, version, big_endian, write); if (ret == FSCK_DELETED_KEY) continue; @@ -351,7 +351,7 @@ static int journal_entry_btree_root_validate(struct bch_fs *c, } return journal_validate_key(c, where, entry, 1, entry->btree_id, k, - "btree root", version, big_endian, write); + version, big_endian, write); fsck_err: return ret; } @@ -613,6 +613,19 @@ static void journal_entry_log_to_text(struct printbuf *out, struct bch_fs *c, pr_buf(out, "%.*s", bytes, l->d); } +static int journal_entry_overwrite_validate(struct bch_fs *c, const char *where, + struct jset_entry *entry, + unsigned version, int big_endian, int write) +{ + return journal_entry_btree_keys_validate(c, where, entry, version, big_endian, write); +} + +static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs *c, + struct jset_entry *entry) +{ + journal_entry_btree_keys_to_text(out, c, entry); +} + struct jset_entry_ops { int (*validate)(struct bch_fs *, const char *, struct jset_entry *, unsigned, int, int); diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index 385451ef..d4d16cb8 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -267,20 +267,31 @@ int bch2_opt_parse(struct bch_fs *c, switch (opt->type) { case BCH_OPT_BOOL: ret = kstrtou64(val, 10, res); - if (ret < 0) + if (ret < 0 || (*res != 0 && *res != 1)) { + pr_buf(err, "%s: must be bool", + opt->attr.name); return ret; + } break; case BCH_OPT_UINT: ret = opt->flags & OPT_HUMAN_READABLE ? bch2_strtou64_h(val, res) : kstrtou64(val, 10, res); - if (ret < 0) + if (ret < 0) { + if (err) + pr_buf(err, "%s: must be a number", + opt->attr.name); return ret; + } break; case BCH_OPT_STR: ret = match_string(opt->choices, -1, val); - if (ret < 0) + if (ret < 0) { + if (err) + pr_buf(err, "%s: invalid selection", + opt->attr.name); return ret; + } *res = ret; break; @@ -289,8 +300,12 @@ int bch2_opt_parse(struct bch_fs *c, return 0; ret = opt->parse(c, val, res); - if (ret < 0) + if (ret < 0) { + if (err) + pr_buf(err, "%s: parse error", + opt->attr.name); return ret; + } } return bch2_opt_validate(opt, *res, err);