From f13abfac80699276bced2f6c967bb79e50e15841 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Nov 2025 10:29:48 -0500 Subject: [PATCH] Update bcachefs sources to 4550c5c02d57 bcachefs: sysfs trigger_check_inconsistent_replicas --- .bcachefs_revision | 2 +- libbcachefs/data/extents.c | 54 ++++++++++++++++++++++++++++++------ libbcachefs/data/extents.h | 2 ++ libbcachefs/data/read.c | 8 ++++++ libbcachefs/debug/sysfs.c | 57 ++++++++++++++++++++++++++++++++++++++ libbcachefs/debug/trace.h | 5 ++++ 6 files changed, 119 insertions(+), 9 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 9fb7c6c8..3809984f 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -4749aaded066f8efed6819bf616eb4097e25dac2 +4550c5c02d578afb066ae3949bc6f130f7e3e958 diff --git a/libbcachefs/data/extents.c b/libbcachefs/data/extents.c index e20c2be1..95dcf7cf 100644 --- a/libbcachefs/data/extents.c +++ b/libbcachefs/data/extents.c @@ -27,6 +27,8 @@ #include "util/util.h" +#include + #ifdef CONFIG_BCACHEFS_DEBUG static int bch2_force_read_device = -1; @@ -34,6 +36,10 @@ module_param_named(force_read_device, bch2_force_read_device, int, 0644); MODULE_PARM_DESC(force_read_device, ""); #endif +static int bch2_force_read_idx = -1; +module_param_named(force_read_idx, bch2_force_read_idx, int, 0644); +MODULE_PARM_DESC(force_read_idx, ""); + static const char * const bch2_extent_flags_strs[] = { #define x(n, v) [BCH_EXTENT_FLAG_##n] = #n, BCH_EXTENT_FLAGS() @@ -199,6 +205,29 @@ static inline bool ptr_better(struct bch_fs *c, return bch2_get_random_u64_below(p1_latency + p2_latency) > p1_latency; } +static int pick_read_device_idx(struct bch_fs *c, struct bkey_s_c k, + struct extent_ptr_decoded *pick, unsigned idx_want) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.ptr.unwritten) + break; + + if (!idx_want--) { + *pick = p; + + struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); + return ca && + bch2_dev_is_online(ca) && + !dev_ptr_stale_rcu(ca, &p.ptr); + } + } + + return 0; +} + /* * This picks a non-stale pointer, preferably from a device other than @avoid. * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to @@ -215,12 +244,17 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (k.k->type == KEY_TYPE_error) return bch_err_throw(c, key_type_error); - rcu_read_lock(); + guard(rcu)(); + + if (unlikely(bch2_force_read_idx >= 0) && + !failed && + pick_read_device_idx(c, k, pick, bch2_force_read_idx)) + return 1; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; u64 pick_latency; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { have_dirty_ptrs |= !p.ptr.cached; @@ -228,10 +262,8 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, * Unwritten extent: no need to actually read, treat it as a * hole and return 0s: */ - if (p.ptr.unwritten) { - rcu_read_unlock(); + if (p.ptr.unwritten) return 0; - } /* Are we being asked to read from a specific device? */ if (dev >= 0 && p.ptr.dev != dev) @@ -297,7 +329,6 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, have_pick = true; } } - rcu_read_unlock(); if (have_pick) return 1; @@ -627,8 +658,7 @@ bool bch2_bkey_narrow_crc(const struct bch_fs *c, { BUG_ON(crc_is_compressed(new)); BUG_ON(new.offset); - BUG_ON(new.live_size != k->k.size); - + BUG_ON(new.uncompressed_size != new.live_size); union bch_extent_entry *old_e = bkey_crc_find(c, k, old); if (!old_e) @@ -1381,6 +1411,14 @@ void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_cr bch2_prt_compression_type(out, crc->compression_type); } +void bch2_extent_ptr_decoded_to_text(struct printbuf *out, struct bch_fs *c, + struct extent_ptr_decoded *p) +{ + bch2_extent_crc_unpacked_to_text(out, &p->crc); + prt_newline(out); + bch2_extent_ptr_to_text(out, c, &p->ptr); +} + const char * const bch2_extent_entry_types[] = { #define x(t, n, ...) [n] = #t, BCH_EXTENT_ENTRY_TYPES() diff --git a/libbcachefs/data/extents.h b/libbcachefs/data/extents.h index 871d68b2..ff725e5f 100644 --- a/libbcachefs/data/extents.h +++ b/libbcachefs/data/extents.h @@ -200,6 +200,8 @@ static inline bool crc_is_encoded(struct bch_extent_crc_unpacked crc) } void bch2_extent_crc_unpacked_to_text(struct printbuf *, struct bch_extent_crc_unpacked *); +void bch2_extent_ptr_decoded_to_text(struct printbuf *, struct bch_fs *, + struct extent_ptr_decoded *); /* bkey_ptrs: generically over any key type that has ptrs */ diff --git a/libbcachefs/data/read.c b/libbcachefs/data/read.c index a919ee7d..d4a82cd2 100644 --- a/libbcachefs/data/read.c +++ b/libbcachefs/data/read.c @@ -1309,6 +1309,14 @@ retry_pick: this_cpu_add(c->counters[BCH_COUNTER_io_move_read], bio_sectors(&rbio->bio)); bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); + if (trace_io_read_enabled()) { + CLASS(printbuf, buf)(); + bch2_bkey_val_to_text(&buf, c, k); + prt_printf(&buf, "\nreading from:\n"); + bch2_extent_ptr_decoded_to_text(&buf, c, &rbio->pick); + trace_io_read(c, buf.buf); + } + /* * If it's being moved internally, we don't want to flag it as a cache * hit: diff --git a/libbcachefs/debug/sysfs.c b/libbcachefs/debug/sysfs.c index ccb14fe3..d091b146 100644 --- a/libbcachefs/debug/sysfs.c +++ b/libbcachefs/debug/sysfs.c @@ -165,6 +165,7 @@ write_attribute(trigger_freelist_wakeup); write_attribute(trigger_recalc_capacity); write_attribute(trigger_delete_dead_snapshots); write_attribute(trigger_emergency_read_only); +write_attribute(trigger_check_inconsistent_replicas); read_attribute(gc_gens_pos); read_attribute(uuid); @@ -314,6 +315,58 @@ static void bch2_fs_usage_base_to_text(struct printbuf *out, struct bch_fs *c) prt_printf(out, "reserved:\t\t%llu\n", b.reserved); } +static bool bkey_has_inconsistent_checksums(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p, prev; + bool have_prev = false; + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.crc.uncompressed_size != p.crc.live_size) + continue; + + if (!have_prev) { + prev = p; + have_prev = true; + continue; + } + + if (prev.crc.csum_type == p.crc.csum_type && + bch2_crc_cmp(prev.crc.csum, p.crc.csum)) + return true; + } + + return false; +} + +static int bkey_print_if_inconsistent_checksums(struct bch_fs *c, struct bkey_s_c k) +{ + if (bkey_has_inconsistent_checksums(c, k)) { + CLASS(printbuf, buf)(); + bch2_bkey_val_to_text(&buf, c, k); + pr_info("%s", buf.buf); + } + + return 0; +} + +static void bch2_check_inconsistent_replicas(struct bch_fs *c) +{ + CLASS(btree_trans, trans)(c); + for_each_btree_key(trans, iter, + BTREE_ID_extents, POS_MIN, + BTREE_ITER_all_snapshots, k, ({ + bkey_print_if_inconsistent_checksums(c, k); + })); + + for_each_btree_key(trans, iter, + BTREE_ID_reflink, POS_MIN, + BTREE_ITER_all_snapshots, k, ({ + bkey_print_if_inconsistent_checksums(c, k); + })); +} + SHOW(bch2_fs) { struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); @@ -486,6 +539,9 @@ STORE(bch2_fs) printbuf_exit(&buf); } + if (attr == &sysfs_trigger_check_inconsistent_replicas) + bch2_check_inconsistent_replicas(c); + #ifdef CONFIG_BCACHEFS_TESTS if (attr == &sysfs_perf_test) { char *tmp __free(kfree) = kstrdup(buf, GFP_KERNEL), *p = tmp; @@ -622,6 +678,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_trigger_recalc_capacity, &sysfs_trigger_delete_dead_snapshots, &sysfs_trigger_emergency_read_only, + &sysfs_trigger_check_inconsistent_replicas, &sysfs_gc_gens_pos, diff --git a/libbcachefs/debug/trace.h b/libbcachefs/debug/trace.h index c125bbd6..2adf712c 100644 --- a/libbcachefs/debug/trace.h +++ b/libbcachefs/debug/trace.h @@ -287,6 +287,11 @@ TRACE_EVENT(write_super, /* io.c: */ +DEFINE_EVENT(fs_str, io_read, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + DEFINE_EVENT(bio, io_read_promote, TP_PROTO(struct bio *bio), TP_ARGS(bio)