Update bcachefs sources to e3e6e947d0c9 bcachefs: Clear recovery_passes_required when initializing
Some checks failed
Nix Flake actions / nix-matrix (push) Has been cancelled
build / bcachefs-tools-msrv (push) Has been cancelled
.deb build orchestrator / source-only (push) Has been cancelled
.deb build orchestrator / publish (push) Has been cancelled
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Has been cancelled
.deb build orchestrator / obs (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / reprotest (push) Has been cancelled

This commit is contained in:
Kent Overstreet 2025-10-23 22:22:03 -04:00
parent cf6d398cbb
commit fa1882de61
34 changed files with 392 additions and 253 deletions

View File

@ -1 +1 @@
ed9ece3835e374b4740124c5c3597f42c3b6d354
e3e6e947d0c9af7dce749a5d9a88ef5d6cc60311

View File

@ -1234,7 +1234,7 @@ put_ref:
static int invalidate_one_bp(struct btree_trans *trans,
struct bch_dev *ca,
struct bkey_s_c_backpointer bp,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
CLASS(btree_iter_uninit, iter)(trans);
struct bkey_s_c k = bkey_try(bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed));
@ -1252,7 +1252,7 @@ static int invalidate_one_bucket_by_bps(struct btree_trans *trans,
struct bch_dev *ca,
struct bpos bucket,
u8 gen,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
struct bpos bp_start = bucket_pos_to_bp_start(ca, bucket);
struct bpos bp_end = bucket_pos_to_bp_end(ca, bucket);
@ -1281,7 +1281,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
struct bch_dev *ca,
struct btree_iter *lru_iter,
struct bkey_s_c lru_k,
struct bkey_buf *last_flushed,
struct wb_maybe_flush *last_flushed,
s64 *nr_to_invalidate)
{
struct bch_fs *c = trans->c;
@ -1364,8 +1364,8 @@ static void bch2_do_invalidates_work(struct work_struct *work)
CLASS(btree_trans, trans)(c);
int ret = 0;
struct bkey_buf last_flushed __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&last_flushed);
struct wb_maybe_flush last_flushed __cleanup(wb_maybe_flush_exit);
wb_maybe_flush_init(&last_flushed);
ret = bch2_btree_write_buffer_tryflush(trans);
if (ret)
@ -1398,6 +1398,7 @@ restart_err:
if (ret)
break;
wb_maybe_flush_inc(&last_flushed);
bch2_btree_iter_advance(&iter);
}
bch2_trans_iter_exit(&iter);
@ -1593,13 +1594,22 @@ void bch2_dev_allocator_set_rw(struct bch_fs *c, struct bch_dev *ca, bool rw)
{
/* BCH_DATA_free == all rw devs */
for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
if (rw &&
(i == BCH_DATA_free ||
(ca->mi.data_allowed & BIT(i))))
set_bit(ca->dev_idx, c->rw_devs[i].d);
else
clear_bit(ca->dev_idx, c->rw_devs[i].d);
for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++) {
bool data_type_rw = rw;
if (i != BCH_DATA_free &&
!(ca->mi.data_allowed & BIT(i)))
data_type_rw = false;
if ((i == BCH_DATA_journal ||
i == BCH_DATA_btree) &&
!ca->mi.durability)
data_type_rw = false;
mod_bit(ca->dev_idx, c->rw_devs[i].d, data_type_rw);
}
c->rw_devs_change_count++;
}
/* device goes ro: */
@ -1610,8 +1620,6 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
/* First, remove device from allocation groups: */
bch2_dev_allocator_set_rw(c, ca, false);
c->rw_devs_change_count++;
/*
* Capacity is calculated based off of devices in allocation groups:
*/

View File

@ -6,7 +6,6 @@
#include "alloc/backpointers.h"
#include "btree/bbpos.h"
#include "btree/bkey_buf.h"
#include "btree/cache.h"
#include "btree/update.h"
#include "btree/interior.h"
@ -187,7 +186,7 @@ static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos)
static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans,
struct bkey_s_c visiting_k,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
return !static_branch_unlikely(&bch2_backpointers_no_use_write_buffer)
? bch2_btree_write_buffer_maybe_flush(trans, visiting_k, last_flushed)
@ -197,7 +196,7 @@ static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans,
static int backpointer_target_not_found(struct btree_trans *trans,
struct bkey_s_c_backpointer bp,
struct bkey_s_c target_k,
struct bkey_buf *last_flushed,
struct wb_maybe_flush *last_flushed,
bool commit)
{
struct bch_fs *c = trans->c;
@ -260,7 +259,7 @@ fsck_err:
static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans,
struct bkey_s_c_backpointer bp,
struct btree_iter *iter,
struct bkey_buf *last_flushed,
struct wb_maybe_flush *last_flushed,
bool commit)
{
struct bch_fs *c = trans->c;
@ -306,7 +305,7 @@ static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans,
struct bkey_s_c_backpointer bp,
struct btree_iter *iter,
unsigned iter_flags,
struct bkey_buf *last_flushed,
struct wb_maybe_flush *last_flushed,
bool commit)
{
struct bch_fs *c = trans->c;
@ -358,7 +357,7 @@ static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans,
struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
struct bkey_s_c_backpointer bp,
struct btree_iter *iter,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
return __bch2_backpointer_get_node(trans, bp, iter, last_flushed, true);
}
@ -367,13 +366,13 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
struct bkey_s_c_backpointer bp,
struct btree_iter *iter,
unsigned iter_flags,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
return __bch2_backpointer_get_key(trans, bp, iter, iter_flags, last_flushed, true);
}
static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
if (k.k->type != KEY_TYPE_backpointer)
return 0;
@ -415,11 +414,10 @@ fsck_err:
int bch2_check_btree_backpointers(struct bch_fs *c)
{
struct progress_indicator_state progress;
bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_backpointers));
struct bkey_buf last_flushed __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&last_flushed);
struct wb_maybe_flush last_flushed __cleanup(wb_maybe_flush_exit);
wb_maybe_flush_init(&last_flushed);
CLASS(btree_trans, trans)(c);
return for_each_btree_key_commit(trans, iter,
@ -431,9 +429,9 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
}
struct extents_to_bp_state {
struct bpos bp_start;
struct bpos bp_end;
struct bkey_buf last_flushed;
struct bpos bp_start;
struct bpos bp_end;
struct wb_maybe_flush last_flushed;
};
static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
@ -790,6 +788,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
try(for_each_btree_key_continue(trans, iter, 0, k, ({
bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers") ?:
wb_maybe_flush_inc(&s->last_flushed) ?:
check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
})));
@ -825,11 +824,11 @@ static int data_type_to_alloc_counter(enum bch_data_type t)
}
static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos,
struct bkey_buf *last_flushed);
struct wb_maybe_flush *last_flushed);
static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
bool *had_mismatch,
struct bkey_buf *last_flushed,
struct wb_maybe_flush *last_flushed,
struct bpos *last_pos,
unsigned *nr_iters)
{
@ -1100,7 +1099,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
struct bpos last_pos = POS_MIN;
unsigned nr_iters = 0;
bch2_bkey_buf_init(&s.last_flushed);
wb_maybe_flush_init(&s.last_flushed);
ret = for_each_btree_key(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k, ({
@ -1132,8 +1131,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
if ( bpos_eq(s.bp_start, POS_MIN) &&
!bpos_eq(s.bp_end, SPOS_MAX))
bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
__func__, btree_nodes_fit_in_ram(c));
bch_info(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
__func__, btree_nodes_fit_in_ram(c));
if (!bpos_eq(s.bp_start, POS_MIN) ||
!bpos_eq(s.bp_end, SPOS_MAX)) {
@ -1159,7 +1158,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty);
}
err:
bch2_bkey_buf_exit(&s.last_flushed);
wb_maybe_flush_exit(&s.last_flushed);
bch2_btree_cache_unpin(c);
return ret;
}
@ -1167,7 +1166,7 @@ err:
static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans,
struct bpos bucket,
bool *had_mismatch,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
CLASS(btree_iter, alloc_iter)(trans, BTREE_ID_alloc, bucket, BTREE_ITER_cached);
struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&alloc_iter));
@ -1182,7 +1181,7 @@ static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans,
int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans,
struct bch_dev *ca, u64 bucket,
bool copygc,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
struct bch_fs *c = trans->c;
bool had_mismatch;
@ -1215,7 +1214,7 @@ static int check_one_backpointer(struct btree_trans *trans,
struct bbpos start,
struct bbpos end,
struct bkey_s_c bp_k,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
if (bp_k.k->type != KEY_TYPE_backpointer)
return 0;
@ -1237,7 +1236,7 @@ static int check_one_backpointer(struct btree_trans *trans,
static int check_bucket_backpointers_to_extents(struct btree_trans *trans,
struct bch_dev *ca, struct bpos bucket,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
u32 restart_count = trans->restart_count;
@ -1257,8 +1256,8 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
struct bbpos start,
struct bbpos end)
{
struct bkey_buf last_flushed __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&last_flushed);
struct wb_maybe_flush last_flushed __cleanup(wb_maybe_flush_exit);
wb_maybe_flush_init(&last_flushed);
struct progress_indicator_state progress;
bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));

View File

@ -174,14 +174,14 @@ static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
};
}
struct bkey_buf;
struct wb_maybe_flush;
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_backpointer,
struct btree_iter *, unsigned, struct bkey_buf *);
struct btree_iter *, unsigned, struct wb_maybe_flush *);
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer,
struct btree_iter *, struct bkey_buf *);
struct btree_iter *, struct wb_maybe_flush *);
int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bch_dev *, u64,
bool, struct bkey_buf *);
bool, struct wb_maybe_flush *);
int bch2_check_btree_backpointers(struct bch_fs *);
int bch2_check_extents_to_backpointers(struct bch_fs *);

View File

@ -5,9 +5,9 @@
#include "alloc/check.h"
#include "alloc/lru.h"
#include "btree/bkey_buf.h"
#include "btree/cache.h"
#include "btree/update.h"
#include "btree/write_buffer.h"
#include "data/ec.h"
@ -619,7 +619,7 @@ bkey_err:
static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
struct btree_iter *alloc_iter,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
struct bch_fs *c = trans->c;
struct bch_alloc_v4 a_convert;
@ -670,8 +670,8 @@ fsck_err:
int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
{
struct bkey_buf last_flushed __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&last_flushed);
struct wb_maybe_flush last_flushed __cleanup(wb_maybe_flush_exit);
wb_maybe_flush_init(&last_flushed);
struct progress_indicator_state progress;
bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_alloc));
@ -681,6 +681,7 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
progress_update_iter(trans, &progress, &iter) ?:
wb_maybe_flush_inc(&last_flushed) ?:
bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed);
}))?: bch2_check_stripe_to_lru_refs(trans);
}

View File

@ -80,7 +80,7 @@ int bch2_lru_check_set(struct btree_trans *trans,
u64 dev_bucket,
u64 time,
struct bkey_s_c referring_k,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
struct bch_fs *c = trans->c;
int ret = 0;
@ -168,7 +168,7 @@ static u64 bkey_lru_type_idx(struct bch_fs *c,
static int bch2_check_lru_key(struct btree_trans *trans,
struct btree_iter *lru_iter,
struct bkey_s_c lru_k,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
struct bch_fs *c = trans->c;
CLASS(printbuf, buf1)();
@ -202,8 +202,8 @@ fsck_err:
int bch2_check_lrus(struct bch_fs *c)
{
struct bkey_buf last_flushed __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&last_flushed);
struct wb_maybe_flush last_flushed __cleanup(wb_maybe_flush_exit);
wb_maybe_flush_init(&last_flushed);
struct progress_indicator_state progress;
bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_lru));
@ -213,6 +213,7 @@ int bch2_check_lrus(struct bch_fs *c)
BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
progress_update_iter(trans, &progress, &iter) ?:
wb_maybe_flush_inc(&last_flushed) ?:
bch2_check_lru_key(trans, &iter, k, &last_flushed);
}));
}

View File

@ -72,8 +72,9 @@ static inline int bch2_lru_change(struct btree_trans *trans,
int bch2_dev_remove_lrus(struct bch_fs *, struct bch_dev *);
struct bkey_buf;
int bch2_lru_check_set(struct btree_trans *, u16, u64, u64, struct bkey_s_c, struct bkey_buf *);
struct wb_maybe_flush;
int bch2_lru_check_set(struct btree_trans *, u16, u64, u64, struct bkey_s_c,
struct wb_maybe_flush *);
int bch2_check_lrus(struct bch_fs *);
#endif /* _BCACHEFS_LRU_H */

View File

@ -776,8 +776,8 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
/* Query replicas: */
bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
unsigned flags, struct printbuf *err)
bool bch2_can_read_fs_with_devs(struct bch_fs *c, struct bch_devs_mask devs,
unsigned flags, struct printbuf *err)
{
struct bch_replicas_entry_v1 *e;
@ -829,6 +829,77 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
return true;
}
bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
unsigned flags, struct printbuf *err,
bool write)
{
if (write) {
unsigned nr_have[BCH_DATA_NR];
memset(nr_have, 0, sizeof(nr_have));
unsigned nr_online[BCH_DATA_NR];
memset(nr_online, 0, sizeof(nr_online));
scoped_guard(rcu)
for_each_member_device_rcu(c, ca, &devs) {
if (!ca->mi.durability)
continue;
bool online = ca->mi.state == BCH_MEMBER_STATE_rw &&
test_bit(ca->dev_idx, devs.d);
for (unsigned i = 0; i < BCH_DATA_NR; i++) {
nr_have[i] += ca->mi.data_allowed & BIT(i) ? ca->mi.durability : 0;
if (online)
nr_online[i] += ca->mi.data_allowed & BIT(i) ? ca->mi.durability : 0;
}
}
if (!nr_online[BCH_DATA_journal]) {
prt_printf(err, "No rw journal devices online\n");
return false;
}
if (!nr_online[BCH_DATA_btree]) {
prt_printf(err, "No rw btree devices online\n");
return false;
}
if (!nr_online[BCH_DATA_user]) {
prt_printf(err, "No rw user data devices online\n");
return false;
}
if (!(flags & BCH_FORCE_IF_METADATA_DEGRADED)) {
if (nr_online[BCH_DATA_journal] < nr_have[BCH_DATA_journal] &&
nr_online[BCH_DATA_journal] < c->opts.metadata_replicas) {
prt_printf(err, "Insufficient rw journal devices (%u) online\n",
nr_online[BCH_DATA_journal]);
return false;
}
if (nr_online[BCH_DATA_btree] < nr_have[BCH_DATA_btree] &&
nr_online[BCH_DATA_btree] < c->opts.metadata_replicas) {
prt_printf(err, "Insufficient rw btree devices (%u) online\n",
nr_online[BCH_DATA_btree]);
return false;
}
}
if (!(flags & BCH_FORCE_IF_DATA_DEGRADED)) {
if (nr_online[BCH_DATA_user] < nr_have[BCH_DATA_user] &&
nr_online[BCH_DATA_user] < c->opts.data_replicas) {
prt_printf(err, "Insufficient rw user data devices (%u) online\n",
nr_online[BCH_DATA_user]);
return false;
}
}
}
return bch2_can_read_fs_with_devs(c, devs, flags, err);
}
unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
{
struct bch_sb_field_replicas *replicas;

View File

@ -43,8 +43,10 @@ static inline void bch2_replicas_entry_cached(struct bch_replicas_entry_v1 *e,
e->devs[0] = dev;
}
bool bch2_can_read_fs_with_devs(struct bch_fs *, struct bch_devs_mask,
unsigned, struct printbuf *);
bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask,
unsigned, struct printbuf *);
unsigned, struct printbuf *, bool);
unsigned bch2_sb_dev_has_data(struct bch_sb *, unsigned);
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);

View File

@ -188,14 +188,6 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
return bkey_gt(l, r) ? l : r;
}
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
{
return bpos_eq(l.k->p, r.k->p) &&
l.k->size == r.k->size &&
bkey_bytes(l.k) == bkey_bytes(r.k) &&
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
}
void bch2_bpos_swab(struct bpos *);
void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
@ -205,6 +197,22 @@ static __always_inline int bversion_cmp(struct bversion l, struct bversion r)
cmp_int(l.lo, r.lo);
}
static __always_inline bool bversion_eq(struct bversion l, struct bversion r)
{
return l.hi == r.hi &&
l.lo == r.lo;
}
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
{
return l.k->u64s == r.k->u64s &&
l.k->type == r.k->type &&
bpos_eq(l.k->p, r.k->p) &&
bversion_eq(l.k->bversion, r.k->bversion) &&
l.k->size == r.k->size &&
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
}
#define ZERO_VERSION ((struct bversion) { .hi = 0, .lo = 0 })
#define MAX_VERSION ((struct bversion) { .hi = ~0, .lo = ~0ULL })

View File

@ -67,11 +67,8 @@ static void verify_update_old_key(struct btree_trans *trans, struct btree_insert
k = bkey_i_to_s_c(j_k);
}
u = *k.k;
u.needs_whiteout = i->old_k.needs_whiteout;
BUG_ON(memcmp(&i->old_k, &u, sizeof(struct bkey)));
BUG_ON(i->old_v != k.v);
struct bkey_s_c old = { &i->old_k, i->old_v };
BUG_ON(!bkey_and_val_eq(k, old));
#endif
}
@ -692,14 +689,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
trans_for_each_update(trans, i)
if (btree_node_type_has_atomic_triggers(i->bkey_type)) {
ret = run_one_mem_trigger(trans, i, BTREE_TRIGGER_atomic|i->flags);
if (ret)
goto fatal_err;
if (bch2_fs_fatal_err_on(ret, c, "fatal error in transaction commit: %s", bch2_err_str(ret)))
return ret;
}
if (unlikely(c->gc_pos.phase)) {
ret = bch2_trans_commit_run_gc_triggers(trans);
if (ret)
goto fatal_err;
if (bch2_fs_fatal_err_on(ret, c, "fatal error in transaction commit: %s", bch2_err_str(ret)))
return ret;
}
struct bkey_validate_context validate_context = { .from = BKEY_VALIDATE_commit };
@ -716,7 +713,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
if (unlikely(ret)) {
bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n",
trans->fn);
goto fatal_err;
bch2_sb_error_count(c, BCH_FSCK_ERR_validate_error_in_commit);
__WARN();
return ret;
}
}
@ -728,7 +727,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
if (unlikely(ret)){
bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n",
trans->fn, (void *) i->ip_allocated);
goto fatal_err;
bch2_sb_error_count(c, BCH_FSCK_ERR_validate_error_in_commit);
__WARN();
return ret;
}
btree_insert_entry_checks(trans, i);
}
@ -795,9 +796,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
}
return 0;
fatal_err:
bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret));
return ret;
}
static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
@ -1055,6 +1053,29 @@ int __bch2_trans_commit(struct btree_trans *trans, enum bch_trans_commit_flags f
if (ret)
goto out_reset;
if (likely(!(flags & BCH_TRANS_COMMIT_no_skip_noops))) {
struct btree_insert_entry *dst = trans->updates;
trans_for_each_update(trans, i) {
struct bkey_s_c old = { &i->old_k, i->old_v };
/*
* We can't drop noop inode updates because fsync relies
* on grabbing the journal_seq of the latest update from
* the inode - and the journal_seq isn't updated until
* the atomic trigger:
*/
if (likely(i->bkey_type == BKEY_TYPE_inodes ||
!bkey_and_val_eq(old, bkey_i_to_s_c(i->k))))
*dst++ = *i;
else
bch2_path_put(trans, i->path, true);
}
trans->nr_updates = dst - trans->updates;
if (!bch2_trans_has_updates(trans))
goto out_reset;
}
if (!(flags & BCH_TRANS_COMMIT_no_check_rw) &&
unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_trans))) {
if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags)))

View File

@ -2390,28 +2390,17 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
BTREE_TRIGGER_transactional));
}
CLASS(btree_iter_uninit, iter2)(trans);
struct btree *parent = btree_node_parent(btree_iter_path(trans, iter), b);
if (parent) {
bch2_trans_copy_iter(&iter2, iter);
if (!btree_node_is_root(c, b)) {
CLASS(btree_node_iter, parent_iter)(trans,
b->c.btree_id,
b->key.k.p,
0,
b->c.level + 1,
BTREE_ITER_intent);
iter2.path = bch2_btree_path_make_mut(trans, iter2.path,
iter2.flags & BTREE_ITER_intent,
_THIS_IP_);
struct btree_path *path2 = btree_iter_path(trans, &iter2);
BUG_ON(path2->level != b->c.level);
BUG_ON(!bpos_eq(path2->pos, new_key->k.p));
btree_path_set_level_up(trans, path2);
trans->paths_sorted = false;
try(bch2_btree_iter_traverse(&iter2));
try(bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun));
try(bch2_btree_iter_traverse(&parent_iter));
try(bch2_trans_update(trans, &parent_iter, new_key, BTREE_TRIGGER_norun));
} else {
BUG_ON(!btree_node_is_root(c, b));
struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans,
jset_u64s(new_key->k.u64s)));
@ -2453,6 +2442,11 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
{
struct btree_path *path = btree_iter_path(trans, iter);
/*
* Awkward - we can't rely on caller specifying BTREE_ITER_intent, and
* the commit will downgrade locks
*/
try(bch2_btree_path_upgrade(trans, path, b->c.level + 1));
path->intent_ref++;

View File

@ -1514,11 +1514,13 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
i->cached,
(void *) i->ip_allocated);
prt_printf(buf, " old ");
guard(printbuf_indent)(buf);
prt_printf(buf, "old ");
bch2_bkey_val_to_text(buf, trans->c, old);
prt_newline(buf);
prt_printf(buf, " new ");
prt_printf(buf, "new ");
bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k));
prt_newline(buf);
}

View File

@ -410,7 +410,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
struct bkey_cached *ck = NULL;
int ret;
CLASS(btree_iter, b_iter)(trans, key.btree_id, key.pos,
BTREE_ITER_slots|
@ -427,69 +426,56 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
if (!ck)
return 0;
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
if (evict)
goto evict;
return 0;
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
if (journal_seq && ck->journal.seq != journal_seq)
return 0;
trans->journal_res.seq = ck->journal.seq;
/*
* If we're at the end of the journal, we really want to free up space
* in the journal right away - we don't want to pin that old journal
* sequence number with a new btree node write, we want to re-journal
* the update
*/
if (ck->journal.seq == journal_last_seq(j))
commit_flags |= BCH_WATERMARK_reclaim;
if (ck->journal.seq != journal_last_seq(j) ||
!journal_low_on_space(&c->journal))
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
struct bkey_s_c btree_k = bkey_try(bch2_btree_iter_peek_slot(&b_iter));
/* * Check that we're not violating cache coherency rules: */
BUG_ON(bkey_deleted(btree_k.k));
try(bch2_trans_update(trans, &b_iter, ck->k,
BTREE_UPDATE_internal_snapshot_node|
BTREE_UPDATE_key_cache_reclaim|
BTREE_TRIGGER_norun));
try(bch2_trans_commit(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_skip_noops|
commit_flags));
bch2_journal_pin_drop(j, &ck->journal);
struct btree_path *path = btree_iter_path(trans, &c_iter);
BUG_ON(!btree_node_locked(path, 0));
}
if (journal_seq && ck->journal.seq != journal_seq)
return 0;
trans->journal_res.seq = ck->journal.seq;
/*
* If we're at the end of the journal, we really want to free up space
* in the journal right away - we don't want to pin that old journal
* sequence number with a new btree node write, we want to re-journal
* the update
*/
if (ck->journal.seq == journal_last_seq(j))
commit_flags |= BCH_WATERMARK_reclaim;
if (ck->journal.seq != journal_last_seq(j) ||
!journal_low_on_space(&c->journal))
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(&b_iter);
ret = bkey_err(btree_k);
if (ret)
goto err;
/* * Check that we're not violating cache coherency rules: */
BUG_ON(bkey_deleted(btree_k.k));
ret = bch2_trans_update(trans, &b_iter, ck->k,
BTREE_UPDATE_key_cache_reclaim|
BTREE_UPDATE_internal_snapshot_node|
BTREE_TRIGGER_norun) ?:
bch2_trans_commit(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc|
commit_flags);
err:
bch2_fs_fatal_err_on(ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
!bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
!bch2_journal_error(j), c,
"flushing key cache: %s", bch2_err_str(ret));
if (ret)
goto out;
bch2_journal_pin_drop(j, &ck->journal);
struct btree_path *path = btree_iter_path(trans, &c_iter);
BUG_ON(!btree_node_locked(path, 0));
if (!evict) {
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
atomic_long_dec(&c->btree_key_cache.nr_dirty);
}
} else {
struct btree_path *path = btree_iter_path(trans, &c_iter);
struct btree_path *path2;
unsigned i;
evict:
trans_for_each_path(trans, path2, i)
if (path2 != path)
__bch2_btree_path_unlock(trans, path2);
@ -509,8 +495,8 @@ evict:
six_unlock_intent(&ck->c.lock);
}
}
out:
return ret;
return 0;
}
int bch2_btree_key_cache_journal_flush(struct journal *j,
@ -544,6 +530,10 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
ret = lockrestart_do(trans,
btree_key_cache_flush_pos(trans, key, seq,
BCH_TRANS_COMMIT_journal_reclaim, false));
bch2_fs_fatal_err_on(ret &&
!bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
!bch2_journal_error(j), c,
"flushing key cache: %s", bch2_err_str(ret));
unlock:
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
return ret;

View File

@ -28,6 +28,7 @@ void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,
x(no_check_rw, "don't attempt to take a ref on c->writes") \
x(no_journal_res, "don't take a journal reservation, instead " \
"pin journal entry referred to by trans->journal_res.seq") \
x(no_skip_noops, "don't drop noop updates") \
x(journal_reclaim, "operation required for journal reclaim; may return error" \
"instead of deadlocking if BCH_WATERMARK_reclaim not specified")\
x(skip_accounting_apply, "we're in journal replay - accounting updates have already been applied")

View File

@ -109,7 +109,7 @@ static int btree_node_write_update_key(struct btree_trans *trans,
bch2_bkey_drop_ptrs(bkey_i_to_s(n), p, entry,
bch2_dev_list_has_dev(wbio->wbio.failed, p.ptr.dev));
if (!bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&wbio->key)))
if (!bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(n)))
return bch_err_throw(c, btree_node_write_all_failed);
return bch2_btree_node_update_key(trans, &iter, b, n,
@ -127,7 +127,6 @@ static void btree_node_write_work(struct work_struct *work)
struct bch_fs *c = wbio->wbio.c;
struct btree *b = wbio->wbio.bio.bi_private;
u64 start_time = wbio->start_time;
int ret = 0;
bch2_btree_bounce_free(c,
wbio->data_bytes,
@ -135,7 +134,7 @@ static void btree_node_write_work(struct work_struct *work)
wbio->data);
if (!wbio->wbio.first_btree_write || wbio->wbio.failed.nr) {
ret = bch2_trans_do(c, btree_node_write_update_key(trans, wbio, b));
int ret = bch2_trans_do(c, btree_node_write_update_key(trans, wbio, b));
if (ret) {
set_btree_node_noevict(b);

View File

@ -135,6 +135,7 @@ static noinline int wb_flush_one_slowpath(struct btree_trans *trans,
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_journal_res|
BCH_TRANS_COMMIT_no_skip_noops|
BCH_TRANS_COMMIT_journal_reclaim);
}
@ -142,7 +143,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
struct btree_write_buffered_key *wb,
bool *write_locked,
bool *accounting_accumulated,
size_t *fast)
size_t *fast, size_t *noop)
{
struct btree_path *path;
@ -171,6 +172,21 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
path = btree_iter_path(trans, iter);
struct btree_path_level *l = path_l(path);
struct bkey_packed *old_p = bch2_btree_node_iter_peek_all(&l->iter, l->b);
if (old_p && bkey_cmp_left_packed(l->b, old_p, &wb->k.k.p))
old_p = NULL;
struct bkey old_u;
struct bkey_s_c old = old_p
? bkey_disassemble(l->b, old_p, &old_u)
: bkey_s_c_null;
if (old.k && bkey_and_val_eq(old, bkey_i_to_s_c(&wb->k))) {
(*noop)++;
return 0;
}
if (!*write_locked) {
try(bch2_btree_node_lock_write(trans, path, &path->l[0].b->c));
@ -282,7 +298,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
struct journal *j = &c->journal;
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct btree_iter iter = { NULL };
size_t overwritten = 0, fast = 0, slowpath = 0, could_not_insert = 0;
size_t overwritten = 0, fast = 0, noop = 0, slowpath = 0, could_not_insert = 0;
bool write_locked = false;
bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags);
int ret = 0;
@ -394,7 +410,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
}
ret = wb_flush_one(trans, &iter, k, &write_locked,
&accounting_accumulated, &fast);
&accounting_accumulated, &fast, &noop);
if (!write_locked)
bch2_trans_begin(trans);
} while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
@ -495,7 +511,7 @@ err:
bch2_time_stats_update(&c->times[BCH_TIME_btree_write_buffer_flush], start_time);
bch2_fs_fatal_err_on(ret, c, "%s", bch2_err_str(ret));
trace_write_buffer_flush(trans, nr_flushing, overwritten, fast);
trace_write_buffer_flush(trans, nr_flushing, overwritten, fast, noop);
return ret;
}
@ -642,11 +658,16 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
*/
int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
struct bkey_s_c referring_k,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *f)
{
struct bch_fs *c = trans->c;
if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) {
if (f->seen_error &&
f->nr_flushes > 32 &&
f->nr_flushes * 8 > f->nr_done)
return 0;
if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(f->last_flushed.k))) {
if (trace_write_buffer_maybe_flush_enabled()) {
CLASS(printbuf, buf)();
@ -665,13 +686,15 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
try(bch2_btree_write_buffer_flush_sync(trans));
bch2_bkey_buf_copy(last_flushed, tmp.k);
bch2_bkey_buf_copy(&f->last_flushed, tmp.k);
f->nr_flushes++;
/* can we avoid the unconditional restart? */
trace_and_count(c, trans_restart_write_buffer_flush, trans, _RET_IP_);
return bch_err_throw(c, transaction_restart_write_buffer_flush);
}
f->seen_error = true;
return 0;
}

View File

@ -3,6 +3,7 @@
#define _BCACHEFS_BTREE_WRITE_BUFFER_H
#include "btree/bkey.h"
#include "btree/bkey_buf.h"
#include "alloc/accounting.h"
static inline bool bch2_btree_write_buffer_should_flush(struct bch_fs *c)
@ -25,8 +26,31 @@ bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *);
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
int bch2_btree_write_buffer_tryflush(struct btree_trans *);
struct bkey_buf;
int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct bkey_buf *);
struct wb_maybe_flush {
struct bkey_buf last_flushed;
u64 nr_flushes;
u64 nr_done;
bool seen_error;
};
static inline void wb_maybe_flush_exit(struct wb_maybe_flush *f)
{
bch2_bkey_buf_exit(&f->last_flushed);
}
static inline void wb_maybe_flush_init(struct wb_maybe_flush *f)
{
memset(f, 0, sizeof(*f));
bch2_bkey_buf_init(&f->last_flushed);
}
static inline int wb_maybe_flush_inc(struct wb_maybe_flush *f)
{
f->nr_done++;
return 0;
}
int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct wb_maybe_flush *);
struct journal_keys_to_wb {
struct btree_write_buffer_keys *wb;

View File

@ -1035,7 +1035,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
struct bpos bucket, u8 gen,
struct ec_stripe_buf *s,
struct bkey_s_c_backpointer bp,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
struct bch_fs *c = trans->c;
@ -1123,8 +1123,8 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr);
struct bkey_buf last_flushed __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&last_flushed);
struct wb_maybe_flush last_flushed __cleanup(wb_maybe_flush_exit);
wb_maybe_flush_init(&last_flushed);
return for_each_btree_key_max_commit(trans, bp_iter, BTREE_ID_backpointers,
bucket_pos_to_bp_start(ca, bucket_pos),
@ -1142,6 +1142,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
if (bp.v->btree_id == BTREE_ID_stripes)
continue;
wb_maybe_flush_inc(&last_flushed);
ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, bp, &last_flushed);
}));
}
@ -2243,7 +2244,7 @@ int bch2_fs_ec_init(struct bch_fs *c)
static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans,
struct bkey_s_c k,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
if (k.k->type != KEY_TYPE_stripe)
return 0;
@ -2258,8 +2259,8 @@ static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans,
int bch2_check_stripe_to_lru_refs(struct btree_trans *trans)
{
struct bkey_buf last_flushed __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&last_flushed);
struct wb_maybe_flush last_flushed __cleanup(wb_maybe_flush_exit);
wb_maybe_flush_init(&last_flushed);
return for_each_btree_key_commit(trans, iter, BTREE_ID_stripes,
POS_MIN, BTREE_ITER_prefetch, k,

View File

@ -98,7 +98,7 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
static int bch2_dev_btree_drop_key(struct btree_trans *trans,
struct bkey_s_c_backpointer bp,
unsigned dev_idx,
struct bkey_buf *last_flushed,
struct wb_maybe_flush *last_flushed,
unsigned flags, struct printbuf *err)
{
CLASS(btree_iter_uninit, iter)(trans);
@ -185,7 +185,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c,
}
static int data_drop_bp(struct btree_trans *trans, unsigned dev_idx,
struct bkey_s_c_backpointer bp, struct bkey_buf *last_flushed,
struct bkey_s_c_backpointer bp, struct wb_maybe_flush *last_flushed,
unsigned flags, struct printbuf *err)
{
CLASS(btree_iter_uninit, iter)(trans);
@ -218,8 +218,8 @@ int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsig
{
CLASS(btree_trans, trans)(c);
struct bkey_buf last_flushed __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&last_flushed);
struct wb_maybe_flush last_flushed __cleanup(wb_maybe_flush_exit);
wb_maybe_flush_init(&last_flushed);
return bch2_btree_write_buffer_flush_sync(trans) ?:
for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers,
@ -229,6 +229,7 @@ int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsig
if (k.k->type != KEY_TYPE_backpointer)
continue;
wb_maybe_flush_inc(&last_flushed);
data_drop_bp(trans, dev_idx, bkey_s_c_to_backpointer(k),
&last_flushed, flags, err);

View File

@ -572,8 +572,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
struct bpos bp_start = bucket_pos_to_bp_start(ca, POS(dev, bucket_start));
struct bpos bp_end = bucket_pos_to_bp_end(ca, POS(dev, bucket_end));
struct bkey_buf last_flushed __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&last_flushed);
struct wb_maybe_flush last_flushed __cleanup(wb_maybe_flush_exit);
wb_maybe_flush_init(&last_flushed);
/*
* We're not run in a context that handles transaction restarts:

View File

@ -907,7 +907,7 @@ int bch2_fs_rebalance_init(struct bch_fs *c)
static int check_rebalance_work_one(struct btree_trans *trans,
struct btree_iter *extent_iter,
struct btree_iter *rebalance_iter,
struct bkey_buf *last_flushed)
struct wb_maybe_flush *last_flushed)
{
struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
@ -983,8 +983,8 @@ int bch2_check_rebalance_work(struct bch_fs *c)
CLASS(btree_iter, rebalance_iter)(trans, BTREE_ID_rebalance_work, POS_MIN,
BTREE_ITER_prefetch);
struct bkey_buf last_flushed __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&last_flushed);
struct wb_maybe_flush last_flushed __cleanup(wb_maybe_flush_exit);
wb_maybe_flush_init(&last_flushed);
struct progress_indicator_state progress;
bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_rebalance_work));
@ -992,6 +992,7 @@ int bch2_check_rebalance_work(struct bch_fs *c)
int ret = 0;
while (!(ret = lockrestart_do(trans,
progress_update_iter(trans, &progress, &rebalance_iter) ?:
wb_maybe_flush_inc(&last_flushed) ?:
check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, &last_flushed))))
;

View File

@ -764,11 +764,20 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
}
if (!nr_replicas) {
if (trace) {
prt_printf(&buf, "\nnr_replicas %u < %u", nr_replicas, m->op.nr_replicas);
trace_data_update_fail(c, buf.buf);
/*
* If it's a promote that's failing because the promote target
* is full - we expect that in normal operation; it'll still
* show up in io_read_nopromote and error_throw:
*/
if (m->opts.type != BCH_DATA_UPDATE_promote) {
if (trace) {
prt_printf(&buf, " - got replicas %u\n", nr_replicas);
bch2_data_update_to_text(&buf, m);
prt_printf(&buf, "\nret:\t%s\n", bch2_err_str(-BCH_ERR_data_update_fail_no_rw_devs));
trace_data_update_fail(c, buf.buf);
}
count_event(c, data_update_fail);
}
count_event(c, data_update_fail);
return bch_err_throw(c, data_update_fail_no_rw_devs);
}

View File

@ -1246,23 +1246,25 @@ TRACE_EVENT(key_cache_fill,
);
TRACE_EVENT(write_buffer_flush,
TP_PROTO(struct btree_trans *trans, size_t nr, size_t skipped, size_t fast),
TP_ARGS(trans, nr, skipped, fast),
TP_PROTO(struct btree_trans *trans, size_t nr, size_t skipped, size_t fast, size_t noop),
TP_ARGS(trans, nr, skipped, fast, noop),
TP_STRUCT__entry(
__field(size_t, nr )
__field(size_t, skipped )
__field(size_t, fast )
__field(size_t, noop )
),
TP_fast_assign(
__entry->nr = nr;
__entry->skipped = skipped;
__entry->fast = fast;
__entry->noop = noop;
),
TP_printk("flushed %zu skipped %zu fast %zu",
__entry->nr, __entry->skipped, __entry->fast)
TP_printk("flushed %zu skipped %zu fast %zu noop %zu",
__entry->nr, __entry->skipped, __entry->fast, __entry->noop)
);
TRACE_EVENT(write_buffer_flush_sync,

View File

@ -514,7 +514,8 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
new_online_devs = c->online_devs;
__clear_bit(ca->dev_idx, new_online_devs.d);
return bch2_have_enough_devs(c, new_online_devs, flags, err);
return bch2_have_enough_devs(c, new_online_devs, flags, err,
test_bit(BCH_FS_rw, &c->flags));
default:
BUG();
}

View File

@ -451,8 +451,6 @@ bool bch2_fs_emergency_read_only_locked(struct bch_fs *c)
static int __bch2_fs_read_write(struct bch_fs *c, bool early)
{
int ret;
BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags));
if (WARN_ON(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))
@ -473,17 +471,9 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
bch_info(c, "going read-write");
ret = bch2_fs_init_rw(c);
if (ret)
return ret;
ret = bch2_sb_members_v2_init(c);
if (ret)
return ret;
ret = bch2_fs_mark_dirty(c);
if (ret)
return ret;
try(bch2_fs_init_rw(c));
try(bch2_sb_members_v2_init(c));
try(bch2_fs_mark_dirty(c));
clear_bit(BCH_FS_clean_shutdown, &c->flags);
@ -518,22 +508,12 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
enumerated_ref_start(&c->writes);
ret = bch2_journal_reclaim_start(&c->journal);
int ret = bch2_journal_reclaim_start(&c->journal) ?:
bch2_copygc_start(c) ?:
bch2_rebalance_start(c);
if (ret) {
bch_err_msg(c, ret, "error starting journal reclaim thread");
goto err;
}
ret = bch2_copygc_start(c);
if (ret) {
bch_err_msg(c, ret, "error starting copygc thread");
goto err;
}
ret = bch2_rebalance_start(c);
if (ret) {
bch_err_msg(c, ret, "error starting rebalance thread");
goto err;
bch2_fs_read_only(c);
return ret;
}
bch2_do_discards(c);
@ -541,12 +521,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
bch2_do_stripe_deletes(c);
bch2_do_pending_node_rewrites(c);
return 0;
err:
if (test_bit(BCH_FS_rw, &c->flags))
bch2_fs_read_only(c);
else
__bch2_fs_read_only(c);
return ret;
}
int bch2_fs_read_write(struct bch_fs *c)
@ -905,8 +879,6 @@ static bool check_version_upgrade(struct bch_fs *c)
noinline_for_stack
static int bch2_fs_opt_version_init(struct bch_fs *c)
{
int ret = 0;
if (c->opts.norecovery) {
c->opts.recovery_pass_last = c->opts.recovery_pass_last
? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read)
@ -974,9 +946,7 @@ static int bch2_fs_opt_version_init(struct bch_fs *c)
if (!ext)
return bch_err_throw(c, ENOSPC_sb);
ret = bch2_sb_members_v2_init(c);
if (ret)
return ret;
try(bch2_sb_members_v2_init(c));
__le64 now = cpu_to_le64(ktime_get_real_seconds());
scoped_guard(rcu)
@ -1370,7 +1340,7 @@ static bool bch2_fs_may_start(struct bch_fs *c)
}
CLASS(printbuf, err)();
bool ret = bch2_have_enough_devs(c, c->online_devs, flags, &err);
bool ret = bch2_have_enough_devs(c, c->online_devs, flags, &err, !c->opts.read_only);
if (!ret)
bch2_print_str(c, KERN_ERR, err.buf);
return ret;

View File

@ -368,6 +368,7 @@ int bch2_journal_replay(struct bch_fs *c)
ret = commit_do(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_skip_noops|
BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_skip_accounting_apply|
BCH_TRANS_COMMIT_no_journal_res|
@ -400,6 +401,7 @@ int bch2_journal_replay(struct bch_fs *c)
ret = c->journal.watermark ? -1 :
commit_do(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_skip_noops|
BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_skip_accounting_apply|
(!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0),
@ -429,6 +431,7 @@ int bch2_journal_replay(struct bch_fs *c)
ret = commit_do(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_skip_noops|
BCH_TRANS_COMMIT_skip_accounting_apply|
(!k->allocated
? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim
@ -1082,6 +1085,11 @@ int bch2_fs_initialize(struct bch_fs *c)
scoped_guard(mutex, &c->sb_lock) {
SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required));
bch2_write_super(c);
}

View File

@ -1147,14 +1147,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
}
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
if (!ca->mi.durability)
continue;
struct journal_device *ja = &ca->journal;
if (!test_bit(ca->dev_idx, c->rw_devs[BCH_DATA_journal].d))
continue;
if (!ja->nr)
continue;

View File

@ -390,15 +390,15 @@ static int journal_validate_key(struct bch_fs *c,
bch2_bkey_compat(from.level, from.btree, version, big_endian,
write, NULL, bkey_to_packed(k));
ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), from);
if (ret == -BCH_ERR_fsck_delete_bkey) {
if (journal_entry_err_on(ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), from),
c, version, jset, entry,
journal_entry_bkey_bad_format,
"bkey validate error %s", bch2_err_str(ret))) {
le16_add_cpu(&entry->u64s, -((u16) k->k.u64s));
memmove(k, bkey_next(k), next - (void *) bkey_next(k));
journal_entry_null_range(vstruct_next(entry), next);
return FSCK_DELETED_KEY;
}
if (ret)
goto fsck_err;
if (write)
bch2_bkey_compat(from.level, from.btree, version, big_endian,

View File

@ -156,8 +156,7 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
(totalram_pages() * PAGE_SIZE) / 4 - j->dirty_entry_bytes);
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
if (!ca->journal.nr ||
!ca->mi.durability)
if (!ca->journal.nr)
continue;
min_bucket_size = min(min_bucket_size, ca->mi.bucket_size);

View File

@ -74,9 +74,7 @@ static void __journal_write_alloc(struct journal *j,
* Check that we can use this device, and aren't already using
* it:
*/
if (!ca->mi.durability ||
ca->mi.state != BCH_MEMBER_STATE_rw ||
!ja->nr ||
if (!ja->nr ||
bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) ||
sectors > ja->sectors_free) {
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);

View File

@ -6,6 +6,7 @@
#include "bcachefs.h"
#include "opts.h"
#include "alloc/background.h"
#include "alloc/disk_groups.h"
#include "data/compress.h"
@ -601,6 +602,15 @@ void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum,
bch2_write_super(c);
}
break;
case Opt_durability:
if (test_bit(BCH_FS_rw, &c->flags) &&
ca &&
bch2_dev_is_online(ca) &&
ca->mi.state == BCH_MEMBER_STATE_rw) {
guard(rcu)();
bch2_dev_allocator_set_rw(c, ca, true);
}
break;
case Opt_version_upgrade:
/*
* XXX: in the future we'll likely want to do compatible

View File

@ -340,7 +340,8 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
x(MAX, 329, 0)
x(validate_error_in_commit, 329, 0) \
x(MAX, 330, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,

View File

@ -1171,13 +1171,13 @@ int bch2_write_super(struct bch_fs *c)
nr_wrote = dev_mask_nr(&sb_written);
can_mount_with_written =
bch2_have_enough_devs(c, sb_written, degraded_flags, NULL);
bch2_can_read_fs_with_devs(c, sb_written, degraded_flags, NULL);
for (unsigned i = 0; i < ARRAY_SIZE(sb_written.d); i++)
sb_written.d[i] = ~sb_written.d[i];
can_mount_without_written =
bch2_have_enough_devs(c, sb_written, degraded_flags, NULL);
bch2_can_read_fs_with_devs(c, sb_written, degraded_flags, NULL);
/*
* If we would be able to mount _without_ the devices we successfully