Update bcachefs sources to 5264e9f4d0c0 bcachefs: fix setting version_upgrade_complete

This commit is contained in:
Kent Overstreet 2023-12-30 16:04:21 -05:00
parent 44bf7868e5
commit 378ae738d5
22 changed files with 118 additions and 103 deletions

View File

@ -1 +1 @@
44ac32df8e0c112b76c841088d93c7e923aada80
5264e9f4d0c00922dee2b2635dedaa3438a78e3f

View File

@ -958,8 +958,8 @@ static int __open_bucket_add_buckets(struct btree_trans *trans,
devs = target_rw_devs(c, wp->data_type, target);
/* Don't allocate from devices we already have pointers to: */
for (i = 0; i < devs_have->nr; i++)
__clear_bit(devs_have->devs[i], devs.d);
darray_for_each(*devs_have, i)
__clear_bit(*i, devs.d);
open_bucket_for_each(c, ptrs, ob, i)
__clear_bit(ob->dev, devs.d);

View File

@ -647,6 +647,7 @@ struct btree_debug {
#define BCH_TRANSACTIONS_NR 128
struct btree_transaction_stats {
struct bch2_time_stats duration;
struct bch2_time_stats lock_hold_times;
struct mutex lock;
unsigned nr_max_paths;

View File

@ -1797,8 +1797,10 @@ static void btree_node_write_work(struct work_struct *work)
bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), ptr,
bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key)))
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) {
ret = -BCH_ERR_btree_write_all_failed;
goto err;
}
if (wbio->wbio.first_btree_write) {
if (wbio->wbio.failed.nr) {

View File

@ -1476,9 +1476,6 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans)
struct printbuf buf = PRINTBUF;
size_t nr = bitmap_weight(trans->paths_allocated, trans->nr_paths);
if (!s)
return;
bch2_trans_paths_to_text(&buf, trans);
if (!buf.allocation_failure) {
@ -2148,18 +2145,16 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
goto out_no_locked;
/*
* iter->pos should be mononotically increasing, and always be
* equal to the key we just returned - except extents can
* straddle iter->pos:
* We need to check against @end before FILTER_SNAPSHOTS because
* if we get to a different inode that requested we might be
* seeing keys for a different snapshot tree that will all be
* filtered out.
*
* But we can't do the full check here, because bkey_start_pos()
* isn't monotonically increasing before FILTER_SNAPSHOTS, and
* that's what we check against in extents mode:
*/
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
iter_pos = k.k->p;
else
iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
? bkey_gt(iter_pos, end)
: bkey_ge(iter_pos, end)))
if (k.k->p.inode > end.inode)
goto end;
if (iter->update_path &&
@ -2218,6 +2213,21 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
continue;
}
/*
* iter->pos should be mononotically increasing, and always be
* equal to the key we just returned - except extents can
* straddle iter->pos:
*/
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
iter_pos = k.k->p;
else
iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
? bkey_gt(iter_pos, end)
: bkey_ge(iter_pos, end)))
goto end;
break;
}
@ -2768,7 +2778,6 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
struct btree_transaction_stats *s = btree_trans_stats(trans);
if (s)
s->max_mem = max(s->max_mem, new_bytes);
new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
@ -2885,9 +2894,15 @@ u32 bch2_trans_begin(struct btree_trans *trans)
}
now = local_clock();
if (!IS_ENABLED(CONFIG_BCACHEFS_NO_LATENCY_ACCT) &&
time_after64(now, trans->last_begin_time + 10))
__bch2_time_stats_update(&btree_trans_stats(trans)->duration,
trans->last_begin_time, now);
if (!trans->restarted &&
(need_resched() ||
now - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) {
time_after64(now, trans->last_begin_time + BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS))) {
drop_locks_do(trans, (cond_resched(), 0));
now = local_clock();
}
@ -2906,13 +2921,11 @@ u32 bch2_trans_begin(struct btree_trans *trans)
return trans->restart_count;
}
const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR] = { "(unknown)" };
unsigned bch2_trans_get_fn_idx(const char *fn)
{
unsigned i;
for (i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++)
for (unsigned i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++)
if (!bch2_btree_transaction_fns[i] ||
bch2_btree_transaction_fns[i] == fn) {
bch2_btree_transaction_fns[i] = fn;
@ -2920,7 +2933,7 @@ unsigned bch2_trans_get_fn_idx(const char *fn)
}
pr_warn_once("BCH_TRANSACTIONS_NR not big enough!");
return i;
return 0;
}
struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
@ -3225,6 +3238,7 @@ void bch2_fs_btree_iter_init_early(struct bch_fs *c)
for (s = c->btree_transaction_stats;
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
s++) {
bch2_time_stats_init(&s->duration);
bch2_time_stats_init(&s->lock_hold_times);
mutex_init(&s->lock);
}

View File

@ -467,9 +467,7 @@ static void __journal_keys_sort(struct journal_keys *keys)
src = dst = keys->d;
while (src < keys->d + keys->nr) {
while (src + 1 < keys->d + keys->nr &&
src[0].btree_id == src[1].btree_id &&
src[0].level == src[1].level &&
bpos_eq(src[0].k->k.p, src[1].k->k.p))
!journal_key_cmp(src, src + 1))
src++;
*dst++ = *src++;

View File

@ -122,10 +122,7 @@ static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
struct btree_path *path, unsigned level)
{
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
struct btree_transaction_stats *s = btree_trans_stats(trans);
if (s)
__bch2_time_stats_update(&s->lock_hold_times,
__bch2_time_stats_update(&btree_trans_stats(trans)->lock_hold_times,
path->l[level].lock_taken_time,
local_clock());
#endif

View File

@ -557,9 +557,7 @@ struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsi
if (new_top > trans->journal_entries_size) {
trans->journal_entries_size = roundup_pow_of_two(new_top);
struct btree_transaction_stats *s = btree_trans_stats(trans);
if (s)
s->journal_entries_size = trans->journal_entries_size;
btree_trans_stats(trans)->journal_entries_size = trans->journal_entries_size;
}
struct jset_entry *n =

View File

@ -17,14 +17,14 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *,
static int bch2_journal_keys_to_write_buffer(struct bch_fs *, struct journal_buf *);
static inline bool __wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r)
static inline bool __wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r)
{
return (cmp_int(l->hi, r->hi) ?:
cmp_int(l->mi, r->mi) ?:
cmp_int(l->lo, r->lo)) >= 0;
}
static inline bool wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r)
static inline bool wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r)
{
#ifdef CONFIG_X86_64
int cmp;
@ -39,10 +39,10 @@ static inline bool wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_re
: [l] "r" (l), [r] "r" (r)
: "rax", "cc");
EBUG_ON(cmp != __wb_key_cmp(l, r));
EBUG_ON(cmp != __wb_key_ref_cmp(l, r));
return cmp;
#else
return __wb_key_cmp(l, r);
return __wb_key_ref_cmp(l, r);
#endif
}
@ -87,12 +87,12 @@ static noinline void wb_sort(struct wb_key_ref *base, size_t num)
* average, 3/4 worst-case.)
*/
for (b = a; c = 2*b + 1, (d = c + 1) < n;)
b = wb_key_cmp(base + c, base + d) ? c : d;
b = wb_key_ref_cmp(base + c, base + d) ? c : d;
if (d == n) /* Special case last leaf with no sibling */
b = c;
/* Now backtrack from "b" to the correct location for "a" */
while (b != a && wb_key_cmp(base + a, base + b))
while (b != a && wb_key_ref_cmp(base + a, base + b))
b = (b - 1) / 2;
c = b; /* Where "a" belongs */
while (b != a) { /* Shift it into place */
@ -484,7 +484,7 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
}
int __bch2_journal_key_to_wb(struct bch_fs *c,
int bch2_journal_key_to_wb_slowpath(struct bch_fs *c,
struct journal_keys_to_wb *dst,
enum btree_id btree, struct bkey_i *k)
{

View File

@ -29,7 +29,7 @@ struct journal_keys_to_wb {
u64 seq;
};
int __bch2_journal_key_to_wb(struct bch_fs *,
int bch2_journal_key_to_wb_slowpath(struct bch_fs *,
struct journal_keys_to_wb *,
enum btree_id, struct bkey_i *);
@ -40,7 +40,7 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c,
EBUG_ON(!dst->seq);
if (unlikely(!dst->room))
return __bch2_journal_key_to_wb(c, dst, btree, k);
return bch2_journal_key_to_wb_slowpath(c, dst, btree, k);
struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys);
wb_k->journal_seq = dst->seq;

View File

@ -600,7 +600,8 @@ int bch2_data_update_init(struct btree_trans *trans,
* Increasing replication is an explicit operation triggered by
* rereplicate, currently, so that users don't get an unexpected -ENOSPC
*/
if (durability_have >= io_opts.data_replicas) {
if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) &&
durability_have >= io_opts.data_replicas) {
m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
m->data_opts.rewrite_ptrs = 0;
/* if iter == NULL, it's just a promote */

View File

@ -693,7 +693,7 @@ static const struct file_operations journal_pins_ops = {
.read = bch2_journal_pins_read,
};
static int lock_held_stats_open(struct inode *inode, struct file *file)
static int btree_transaction_stats_open(struct inode *inode, struct file *file)
{
struct bch_fs *c = inode->i_private;
struct dump_iter *i;
@ -703,7 +703,7 @@ static int lock_held_stats_open(struct inode *inode, struct file *file)
if (!i)
return -ENOMEM;
i->iter = 0;
i->iter = 1;
i->c = c;
i->buf = PRINTBUF;
file->private_data = i;
@ -711,7 +711,7 @@ static int lock_held_stats_open(struct inode *inode, struct file *file)
return 0;
}
static int lock_held_stats_release(struct inode *inode, struct file *file)
static int btree_transaction_stats_release(struct inode *inode, struct file *file)
{
struct dump_iter *i = file->private_data;
@ -721,7 +721,7 @@ static int lock_held_stats_release(struct inode *inode, struct file *file)
return 0;
}
static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iter *i = file->private_data;
@ -755,6 +755,13 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
prt_printf(&i->buf, "Max mem used: %u", s->max_mem);
prt_newline(&i->buf);
prt_printf(&i->buf, "Transaction duration:");
prt_newline(&i->buf);
printbuf_indent_add(&i->buf, 2);
bch2_time_stats_to_text(&i->buf, &s->duration);
printbuf_indent_sub(&i->buf, 2);
if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
prt_printf(&i->buf, "Lock hold times:");
prt_newline(&i->buf);
@ -786,11 +793,11 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
return i->ret;
}
static const struct file_operations lock_held_stats_op = {
static const struct file_operations btree_transaction_stats_op = {
.owner = THIS_MODULE,
.open = lock_held_stats_open,
.release = lock_held_stats_release,
.read = lock_held_stats_read,
.open = btree_transaction_stats_open,
.release = btree_transaction_stats_release,
.read = btree_transaction_stats_read,
};
static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
@ -882,7 +889,7 @@ void bch2_fs_debug_init(struct bch_fs *c)
c->btree_debug, &journal_pins_ops);
debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
c, &lock_held_stats_op);
c, &btree_transaction_stats_op);
debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir,
c->btree_debug, &btree_deadlock_ops);

View File

@ -222,6 +222,7 @@
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
x(EIO, btree_node_read_err) \
x(EIO, sb_not_downgraded) \
x(EIO, btree_write_all_failed) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \

View File

@ -566,7 +566,7 @@ static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr(p, ptr)
ret.devs[ret.nr++] = ptr->dev;
ret.data[ret.nr++] = ptr->dev;
return ret;
}
@ -578,7 +578,7 @@ static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
bkey_for_each_ptr(p, ptr)
if (!ptr->cached)
ret.devs[ret.nr++] = ptr->dev;
ret.data[ret.nr++] = ptr->dev;
return ret;
}
@ -590,7 +590,7 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
bkey_for_each_ptr(p, ptr)
if (ptr->cached)
ret.devs[ret.nr++] = ptr->dev;
ret.data[ret.nr++] = ptr->dev;
return ret;
}

View File

@ -1602,12 +1602,12 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
struct bch_opts opts = bch2_opts_empty();
int ret;
opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
ret = bch2_parse_mount_opts(c, &opts, data);
if (ret)
goto err;
opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
if (opts.read_only != c->opts.read_only) {
down_write(&c->state_lock);

View File

@ -925,8 +925,8 @@ use_clean:
}
mutex_lock(&c->sb_lock);
if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != c->sb.version) {
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, c->sb.version);
if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version));
write_sb = true;
}

View File

@ -173,8 +173,6 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e,
enum bch_data_type data_type,
struct bch_devs_list devs)
{
unsigned i;
BUG_ON(!data_type ||
data_type == BCH_DATA_sb ||
data_type >= BCH_DATA_NR);
@ -183,8 +181,8 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e,
e->nr_devs = 0;
e->nr_required = 1;
for (i = 0; i < devs.nr; i++)
e->devs[e->nr_devs++] = devs.devs[i];
darray_for_each(devs, i)
e->devs[e->nr_devs++] = *i;
bch2_replicas_entry_sort(e);
}

View File

@ -2,6 +2,8 @@
#ifndef _BCACHEFS_SB_MEMBERS_H
#define _BCACHEFS_SB_MEMBERS_H
#include "darray.h"
extern char * const bch2_member_error_strs[];
static inline struct bch_member *
@ -47,23 +49,18 @@ static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs)
static inline bool bch2_dev_list_has_dev(struct bch_devs_list devs,
unsigned dev)
{
unsigned i;
for (i = 0; i < devs.nr; i++)
if (devs.devs[i] == dev)
darray_for_each(devs, i)
if (*i == dev)
return true;
return false;
}
static inline void bch2_dev_list_drop_dev(struct bch_devs_list *devs,
unsigned dev)
{
unsigned i;
for (i = 0; i < devs->nr; i++)
if (devs->devs[i] == dev) {
array_remove_item(devs->devs, devs->nr, i);
darray_for_each(*devs, i)
if (*i == dev) {
darray_remove_item(devs, i);
return;
}
}
@ -72,14 +69,14 @@ static inline void bch2_dev_list_add_dev(struct bch_devs_list *devs,
unsigned dev)
{
if (!bch2_dev_list_has_dev(*devs, dev)) {
BUG_ON(devs->nr >= ARRAY_SIZE(devs->devs));
devs->devs[devs->nr++] = dev;
BUG_ON(devs->nr >= ARRAY_SIZE(devs->data));
devs->data[devs->nr++] = dev;
}
}
static inline struct bch_devs_list bch2_dev_list_single(unsigned dev)
{
return (struct bch_devs_list) { .nr = 1, .devs[0] = dev };
return (struct bch_devs_list) { .nr = 1, .data[0] = dev };
}
static inline struct bch_dev *__bch2_next_dev_idx(struct bch_fs *c, unsigned idx,

View File

@ -907,18 +907,6 @@ int bch2_write_super(struct bch_fs *c)
c->disk_sb.sb->magic = BCHFS_MAGIC;
c->disk_sb.sb->layout.magic = BCHFS_MAGIC;
if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) {
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "attempting to write superblock that wasn't version downgraded (");
bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version));
prt_str(&buf, " > ");
bch2_version_to_text(&buf, bcachefs_metadata_version_current);
prt_str(&buf, ")");
bch2_fs_fatal_error(c, "%s", buf.buf);
printbuf_exit(&buf);
return -BCH_ERR_sb_not_downgraded;
}
le64_add_cpu(&c->disk_sb.sb->seq, 1);
struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
@ -962,6 +950,18 @@ int bch2_write_super(struct bch_fs *c)
if (!BCH_SB_INITIALIZED(c->disk_sb.sb))
goto out;
if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) {
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "attempting to write superblock that wasn't version downgraded (");
bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version));
prt_str(&buf, " > ");
bch2_version_to_text(&buf, bcachefs_metadata_version_current);
prt_str(&buf, ")");
bch2_fs_fatal_error(c, "%s", buf.buf);
printbuf_exit(&buf);
return -BCH_ERR_sb_not_downgraded;
}
for_each_online_member(c, ca) {
__set_bit(ca->dev_idx, sb_written.d);
ca->sb_write_error = 0;

View File

@ -1040,12 +1040,13 @@ int bch2_fs_start(struct bch_fs *c)
}
ret = 0;
out:
err:
if (ret)
bch_err_msg(c, ret, "starting filesystem");
else
bch_verbose(c, "done starting filesystem");
up_write(&c->state_lock);
return ret;
err:
bch_err_msg(c, ret, "starting filesystem");
goto out;
}
static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)

View File

@ -22,7 +22,7 @@ struct bch_devs_mask {
struct bch_devs_list {
u8 nr;
u8 devs[BCH_BKEY_PTRS_MAX];
u8 data[BCH_BKEY_PTRS_MAX];
};
struct bch_member_cpu {

View File

@ -447,9 +447,9 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
{
unsigned long flags;
WARN_RATELIMIT(!stats->min_duration || !stats->min_freq,
"time_stats: min_duration = %llu, min_freq = %llu",
stats->min_duration, stats->min_freq);
WARN_ONCE(!stats->duration_stats_weighted.weight ||
!stats->freq_stats_weighted.weight,
"uninitialized time_stats");
if (!stats->buffer) {
spin_lock_irqsave(&stats->lock, flags);