Update bcachefs sources to 79847e4824 bcachefs: Fixes/improvements for journal entry reservations

This commit is contained in:
Kent Overstreet 2021-02-03 13:11:03 -05:00
parent 4064aa126e
commit 612f6b9ab7
8 changed files with 61 additions and 52 deletions

View File

@ -1 +1 @@
26409a8f755b8faa620a49796d7935566204daaf 79847e4824278463f7eb826dfd78221979e29a8b

View File

@ -582,8 +582,9 @@ struct bch_fs {
struct bch_replicas_cpu replicas_gc; struct bch_replicas_cpu replicas_gc;
struct mutex replicas_gc_lock; struct mutex replicas_gc_lock;
struct journal_entry_res btree_root_journal_res;
struct journal_entry_res replicas_journal_res; struct journal_entry_res replicas_journal_res;
struct journal_entry_res clock_journal_res;
struct journal_entry_res dev_usage_journal_res; struct journal_entry_res dev_usage_journal_res;
struct bch_disk_groups_cpu __rcu *disk_groups; struct bch_disk_groups_cpu __rcu *disk_groups;

View File

@ -608,11 +608,16 @@ static void btree_pos_to_text(struct printbuf *out, struct bch_fs *c,
} }
static void btree_err_msg(struct printbuf *out, struct bch_fs *c, static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca,
struct btree *b, struct bset *i, struct btree *b, struct bset *i,
unsigned offset, int write) unsigned offset, int write)
{ {
pr_buf(out, "error validating btree node %sat btree ", pr_buf(out, "error validating btree node ");
write ? "before write " : ""); if (write)
pr_buf(out, "before write ");
if (ca)
pr_buf(out, "on %s ", ca->name);
pr_buf(out, "at btree ");
btree_pos_to_text(out, c, b); btree_pos_to_text(out, c, b);
pr_buf(out, "\n node offset %u", b->written); pr_buf(out, "\n node offset %u", b->written);
@ -631,7 +636,7 @@ enum btree_validate_ret {
BTREE_RETRY_READ = 64, BTREE_RETRY_READ = 64,
}; };
#define btree_err(type, c, b, i, msg, ...) \ #define btree_err(type, c, ca, b, i, msg, ...) \
({ \ ({ \
__label__ out; \ __label__ out; \
char _buf[300]; \ char _buf[300]; \
@ -642,7 +647,7 @@ enum btree_validate_ret {
if (buf2) \ if (buf2) \
out = _PBUF(buf2, 4986); \ out = _PBUF(buf2, 4986); \
\ \
btree_err_msg(&out, c, b, i, b->written, write); \ btree_err_msg(&out, c, ca, b, i, b->written, write); \
pr_buf(&out, ": " msg, ##__VA_ARGS__); \ pr_buf(&out, ": " msg, ##__VA_ARGS__); \
\ \
if (type == BTREE_ERR_FIXABLE && \ if (type == BTREE_ERR_FIXABLE && \
@ -691,9 +696,9 @@ out: \
#define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false) #define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false)
static int validate_bset(struct bch_fs *c, struct btree *b, static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
struct bset *i, unsigned sectors, struct btree *b, struct bset *i,
int write, bool have_retry) unsigned sectors, int write, bool have_retry)
{ {
unsigned version = le16_to_cpu(i->version); unsigned version = le16_to_cpu(i->version);
const char *err; const char *err;
@ -702,18 +707,18 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
btree_err_on((version != BCH_BSET_VERSION_OLD && btree_err_on((version != BCH_BSET_VERSION_OLD &&
version < bcachefs_metadata_version_min) || version < bcachefs_metadata_version_min) ||
version >= bcachefs_metadata_version_max, version >= bcachefs_metadata_version_max,
BTREE_ERR_FATAL, c, b, i, BTREE_ERR_FATAL, c, ca, b, i,
"unsupported bset version"); "unsupported bset version");
if (btree_err_on(b->written + sectors > c->opts.btree_node_size, if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
BTREE_ERR_FIXABLE, c, b, i, BTREE_ERR_FIXABLE, c, ca, b, i,
"bset past end of btree node")) { "bset past end of btree node")) {
i->u64s = 0; i->u64s = 0;
return 0; return 0;
} }
btree_err_on(b->written && !i->u64s, btree_err_on(b->written && !i->u64s,
BTREE_ERR_FIXABLE, c, b, i, BTREE_ERR_FIXABLE, c, ca, b, i,
"empty bset"); "empty bset");
if (!b->written) { if (!b->written) {
@ -727,16 +732,16 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
/* XXX endianness */ /* XXX endianness */
btree_err_on(bp->seq != bn->keys.seq, btree_err_on(bp->seq != bn->keys.seq,
BTREE_ERR_MUST_RETRY, c, b, NULL, BTREE_ERR_MUST_RETRY, c, ca, b, NULL,
"incorrect sequence number (wrong btree node)"); "incorrect sequence number (wrong btree node)");
} }
btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id, btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id,
BTREE_ERR_MUST_RETRY, c, b, i, BTREE_ERR_MUST_RETRY, c, ca, b, i,
"incorrect btree id"); "incorrect btree id");
btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level, btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level,
BTREE_ERR_MUST_RETRY, c, b, i, BTREE_ERR_MUST_RETRY, c, ca, b, i,
"incorrect level"); "incorrect level");
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) { if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
@ -759,7 +764,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
} }
btree_err_on(bkey_cmp(b->data->min_key, bp->min_key), btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
BTREE_ERR_MUST_RETRY, c, b, NULL, BTREE_ERR_MUST_RETRY, c, ca, b, NULL,
"incorrect min_key: got %llu:%llu should be %llu:%llu", "incorrect min_key: got %llu:%llu should be %llu:%llu",
b->data->min_key.inode, b->data->min_key.inode,
b->data->min_key.offset, b->data->min_key.offset,
@ -768,7 +773,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
} }
btree_err_on(bkey_cmp(bn->max_key, b->key.k.p), btree_err_on(bkey_cmp(bn->max_key, b->key.k.p),
BTREE_ERR_MUST_RETRY, c, b, i, BTREE_ERR_MUST_RETRY, c, ca, b, i,
"incorrect max key %llu:%llu", "incorrect max key %llu:%llu",
bn->max_key.inode, bn->max_key.inode,
bn->max_key.offset); bn->max_key.offset);
@ -793,7 +798,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
#endif #endif
err = bch2_bkey_format_validate(&bn->format); err = bch2_bkey_format_validate(&bn->format);
btree_err_on(err, btree_err_on(err,
BTREE_ERR_FATAL, c, b, i, BTREE_ERR_FATAL, c, ca, b, i,
"invalid bkey format: %s", err); "invalid bkey format: %s", err);
compat_bformat(b->c.level, b->c.btree_id, version, compat_bformat(b->c.level, b->c.btree_id, version,
@ -825,14 +830,14 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
const char *invalid; const char *invalid;
if (btree_err_on(bkey_next(k) > vstruct_last(i), if (btree_err_on(bkey_next(k) > vstruct_last(i),
BTREE_ERR_FIXABLE, c, b, i, BTREE_ERR_FIXABLE, c, NULL, b, i,
"key extends past end of bset")) { "key extends past end of bset")) {
i->u64s = cpu_to_le16((u64 *) k - i->_data); i->u64s = cpu_to_le16((u64 *) k - i->_data);
break; break;
} }
if (btree_err_on(k->format > KEY_FORMAT_CURRENT, if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
BTREE_ERR_FIXABLE, c, b, i, BTREE_ERR_FIXABLE, c, NULL, b, i,
"invalid bkey format %u", k->format)) { "invalid bkey format %u", k->format)) {
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
memmove_u64s_down(k, bkey_next(k), memmove_u64s_down(k, bkey_next(k),
@ -855,7 +860,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
char buf[160]; char buf[160];
bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c); bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
btree_err(BTREE_ERR_FIXABLE, c, b, i, btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i,
"invalid bkey: %s\n%s", invalid, buf); "invalid bkey: %s\n%s", invalid, buf);
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
@ -889,7 +894,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
bch2_bkey_to_text(&PBUF(buf2), u.k); bch2_bkey_to_text(&PBUF(buf2), u.k);
bch2_dump_bset(c, b, i, 0); bch2_dump_bset(c, b, i, 0);
btree_err(BTREE_ERR_FATAL, c, b, i, btree_err(BTREE_ERR_FATAL, c, NULL, b, i,
"keys out of order: %s > %s", "keys out of order: %s > %s",
buf1, buf2); buf1, buf2);
/* XXX: repair this */ /* XXX: repair this */
@ -902,7 +907,8 @@ fsck_err:
return ret; return ret;
} }
int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry) int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
struct btree *b, bool have_retry)
{ {
struct btree_node_entry *bne; struct btree_node_entry *bne;
struct sort_iter *iter; struct sort_iter *iter;
@ -919,15 +925,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
iter->size = (btree_blocks(c) + 1) * 2; iter->size = (btree_blocks(c) + 1) * 2;
if (bch2_meta_read_fault("btree")) if (bch2_meta_read_fault("btree"))
btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL, btree_err(BTREE_ERR_MUST_RETRY, c, ca, b, NULL,
"dynamic fault"); "dynamic fault");
btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c), btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
BTREE_ERR_MUST_RETRY, c, b, NULL, BTREE_ERR_MUST_RETRY, c, ca, b, NULL,
"bad magic"); "bad magic");
btree_err_on(!b->data->keys.seq, btree_err_on(!b->data->keys.seq,
BTREE_ERR_MUST_RETRY, c, b, NULL, BTREE_ERR_MUST_RETRY, c, ca, b, NULL,
"bad btree header"); "bad btree header");
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
@ -935,7 +941,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
&bkey_i_to_btree_ptr_v2(&b->key)->v; &bkey_i_to_btree_ptr_v2(&b->key)->v;
btree_err_on(b->data->keys.seq != bp->seq, btree_err_on(b->data->keys.seq != bp->seq,
BTREE_ERR_MUST_RETRY, c, b, NULL, BTREE_ERR_MUST_RETRY, c, ca, b, NULL,
"got wrong btree node (seq %llx want %llx)", "got wrong btree node (seq %llx want %llx)",
b->data->keys.seq, bp->seq); b->data->keys.seq, bp->seq);
} }
@ -950,7 +956,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
i = &b->data->keys; i = &b->data->keys;
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
BTREE_ERR_WANT_RETRY, c, b, i, BTREE_ERR_WANT_RETRY, c, ca, b, i,
"unknown checksum type %llu", "unknown checksum type %llu",
BSET_CSUM_TYPE(i)); BSET_CSUM_TYPE(i));
@ -958,7 +964,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
btree_err_on(bch2_crc_cmp(csum, b->data->csum), btree_err_on(bch2_crc_cmp(csum, b->data->csum),
BTREE_ERR_WANT_RETRY, c, b, i, BTREE_ERR_WANT_RETRY, c, ca, b, i,
"invalid checksum"); "invalid checksum");
bset_encrypt(c, i, b->written << 9); bset_encrypt(c, i, b->written << 9);
@ -978,7 +984,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
break; break;
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
BTREE_ERR_WANT_RETRY, c, b, i, BTREE_ERR_WANT_RETRY, c, ca, b, i,
"unknown checksum type %llu", "unknown checksum type %llu",
BSET_CSUM_TYPE(i)); BSET_CSUM_TYPE(i));
@ -986,7 +992,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
btree_err_on(bch2_crc_cmp(csum, bne->csum), btree_err_on(bch2_crc_cmp(csum, bne->csum),
BTREE_ERR_WANT_RETRY, c, b, i, BTREE_ERR_WANT_RETRY, c, ca, b, i,
"invalid checksum"); "invalid checksum");
bset_encrypt(c, i, b->written << 9); bset_encrypt(c, i, b->written << 9);
@ -994,7 +1000,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
sectors = vstruct_sectors(bne, c->block_bits); sectors = vstruct_sectors(bne, c->block_bits);
} }
ret = validate_bset(c, b, i, sectors, ret = validate_bset(c, ca, b, i, sectors,
READ, have_retry); READ, have_retry);
if (ret) if (ret)
goto fsck_err; goto fsck_err;
@ -1016,7 +1022,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
true); true);
btree_err_on(blacklisted && first, btree_err_on(blacklisted && first,
BTREE_ERR_FIXABLE, c, b, i, BTREE_ERR_FIXABLE, c, ca, b, i,
"first btree node bset has blacklisted journal seq"); "first btree node bset has blacklisted journal seq");
if (blacklisted && !first) if (blacklisted && !first)
continue; continue;
@ -1033,7 +1039,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
bset_byte_offset(b, bne) < btree_bytes(c); bset_byte_offset(b, bne) < btree_bytes(c);
bne = (void *) bne + block_bytes(c)) bne = (void *) bne + block_bytes(c))
btree_err_on(bne->keys.seq == b->data->keys.seq, btree_err_on(bne->keys.seq == b->data->keys.seq,
BTREE_ERR_WANT_RETRY, c, b, NULL, BTREE_ERR_WANT_RETRY, c, ca, b, NULL,
"found bset signature after last bset"); "found bset signature after last bset");
sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool); sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
@ -1068,7 +1074,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
char buf[160]; char buf[160];
bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c); bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
btree_err(BTREE_ERR_FIXABLE, c, b, i, btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i,
"invalid bkey %s: %s", buf, invalid); "invalid bkey %s: %s", buf, invalid);
btree_keys_account_key_drop(&b->nr, 0, k); btree_keys_account_key_drop(&b->nr, 0, k);
@ -1159,7 +1165,7 @@ start:
&failed, &rb->pick) > 0; &failed, &rb->pick) > 0;
if (!bio->bi_status && if (!bio->bi_status &&
!bch2_btree_node_read_done(c, b, can_retry)) !bch2_btree_node_read_done(c, ca, b, can_retry))
break; break;
if (!can_retry) { if (!can_retry) {
@ -1465,7 +1471,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE)) if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE))
return -1; return -1;
ret = validate_bset(c, b, i, sectors, WRITE, false) ?: ret = validate_bset(c, NULL, b, i, sectors, WRITE, false) ?:
validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false); validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false);
if (ret) { if (ret) {
bch2_inconsistent_error(c); bch2_inconsistent_error(c);

View File

@ -134,7 +134,8 @@ void bch2_btree_build_aux_trees(struct btree *);
void bch2_btree_init_next(struct bch_fs *, struct btree *, void bch2_btree_init_next(struct bch_fs *, struct btree *,
struct btree_iter *); struct btree_iter *);
int bch2_btree_node_read_done(struct bch_fs *, struct btree *, bool); int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *,
struct btree *, bool);
void bch2_btree_node_read(struct bch_fs *, struct btree *, bool); void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
int bch2_btree_root_read(struct bch_fs *, enum btree_id, int bch2_btree_root_read(struct bch_fs *, enum btree_id,
const struct bkey_i *, unsigned); const struct bkey_i *, unsigned);

View File

@ -79,7 +79,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
memcpy(n_ondisk, n_sorted, btree_bytes(c)); memcpy(n_ondisk, n_sorted, btree_bytes(c));
if (bch2_btree_node_read_done(c, v, false)) if (bch2_btree_node_read_done(c, ca, v, false))
goto out; goto out;
n_sorted = c->verify_data->data; n_sorted = c->verify_data->data;

View File

@ -1117,13 +1117,6 @@ int bch2_fs_journal_init(struct journal *j)
j->write_delay_ms = 1000; j->write_delay_ms = 1000;
j->reclaim_delay_ms = 100; j->reclaim_delay_ms = 100;
/* Btree roots: */
j->entry_u64s_reserved +=
BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_BTREE_PTR_U64s_MAX);
j->entry_u64s_reserved +=
2 * (sizeof(struct jset_entry_clock) / sizeof(u64));
atomic64_set(&j->reservations.counter, atomic64_set(&j->reservations.counter,
((union journal_res_state) ((union journal_res_state)
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);

View File

@ -1065,8 +1065,9 @@ unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
int bch2_fs_replicas_init(struct bch_fs *c) int bch2_fs_replicas_init(struct bch_fs *c)
{ {
c->journal.entry_u64s_reserved += bch2_journal_entry_res_resize(&c->journal,
reserve_journal_replicas(c, &c->replicas); &c->replicas_journal_res,
reserve_journal_replicas(c, &c->replicas));
return replicas_table_update(c, &c->replicas); return replicas_table_update(c, &c->replicas);
} }

View File

@ -152,8 +152,9 @@ static void bch2_dev_usage_journal_reserve(struct bch_fs *c)
{ {
struct bch_dev *ca; struct bch_dev *ca;
unsigned i, nr = 0, u64s = unsigned i, nr = 0, u64s =
(sizeof(struct jset_entry_dev_usage) + ((sizeof(struct jset_entry_dev_usage) +
sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR); sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR)) /
sizeof(u64);
rcu_read_lock(); rcu_read_lock();
for_each_member_device_rcu(ca, c, i, NULL) for_each_member_device_rcu(ca, c, i, NULL)
@ -789,14 +790,20 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_fsio_init(c)) bch2_fs_fsio_init(c))
goto err; goto err;
bch2_dev_usage_journal_reserve(c);
mi = bch2_sb_get_members(c->disk_sb.sb); mi = bch2_sb_get_members(c->disk_sb.sb);
for (i = 0; i < c->sb.nr_devices; i++) for (i = 0; i < c->sb.nr_devices; i++)
if (bch2_dev_exists(c->disk_sb.sb, mi, i) && if (bch2_dev_exists(c->disk_sb.sb, mi, i) &&
bch2_dev_alloc(c, i)) bch2_dev_alloc(c, i))
goto err; goto err;
bch2_journal_entry_res_resize(&c->journal,
&c->btree_root_journal_res,
BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_BTREE_PTR_U64s_MAX));
bch2_dev_usage_journal_reserve(c);
bch2_journal_entry_res_resize(&c->journal,
&c->clock_journal_res,
(sizeof(struct jset_entry_clock) / sizeof(u64)) * 2);
mutex_lock(&bch_fs_list_lock); mutex_lock(&bch_fs_list_lock);
err = bch2_fs_online(c); err = bch2_fs_online(c);
mutex_unlock(&bch_fs_list_lock); mutex_unlock(&bch_fs_list_lock);